gallium: add lima driver
v2: - use renamed util_dynarray_grow_cap - use DEBUG_GET_ONCE_FLAGS_OPTION for debug flags - remove DRM_FORMAT_MOD_ARM_AGTB_MODE0 usage - compute min/max index in driver v3: - fix plbu framebuffer state calculation - fix color_16pc assemble - use nir_lower_all_source_mods for lowering neg/abs/sat - use float arrary for static GPU data - add disassemble comment for static shader code - use drm_find_modifier v4: - use lima_nir_lower_uniform_to_scalar v5: - remove nir_opt_global_to_local when rebase Cc: Rob Clark <robdclark@gmail.com> Cc: Alyssa Rosenzweig <alyssa@rosenzweig.io> Acked-by: Eric Anholt <eric@anholt.net> Signed-off-by: Andreas Baierl <ichgeh@imkreisrum.de> Signed-off-by: Arno Messiaen <arnomessiaen@gmail.com> Signed-off-by: Connor Abbott <cwabbott0@gmail.com> Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Signed-off-by: Heiko Stuebner <heiko@sntech.de> Signed-off-by: Koen Kooi <koen@dominion.thruhere.net> Signed-off-by: Marek Vasut <marex@denx.de> Signed-off-by: marmeladema <xademax@gmail.com> Signed-off-by: Paweł Chmiel <pawel.mikolaj.chmiel@gmail.com> Signed-off-by: Rob Herring <robh@kernel.org> Signed-off-by: Rohan Garg <rohan@garg.io> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com>
This commit is contained in:
parent
64eaf60ca7
commit
92d7ca4b1c
|
@ -132,7 +132,7 @@ if _drivers.contains('auto')
|
|||
elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
|
||||
_drivers = [
|
||||
'kmsro', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'nouveau',
|
||||
'tegra', 'virgl', 'swrast'
|
||||
'tegra', 'virgl', 'lima', 'swrast'
|
||||
]
|
||||
else
|
||||
error('Unknown architecture @0@. Please pass -Dgallium-drivers to set driver options. Patches gladly accepted to fix this.'.format(
|
||||
|
@ -162,6 +162,7 @@ with_gallium_i915 = _drivers.contains('i915')
|
|||
with_gallium_svga = _drivers.contains('svga')
|
||||
with_gallium_virgl = _drivers.contains('virgl')
|
||||
with_gallium_swr = _drivers.contains('swr')
|
||||
with_gallium_lima = _drivers.contains('lima')
|
||||
|
||||
if cc.get_id() == 'intel'
|
||||
if meson.version().version_compare('< 0.49.0')
|
||||
|
|
|
@ -60,7 +60,7 @@ option(
|
|||
choices : [
|
||||
'', 'auto', 'kmsro', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno',
|
||||
'swrast', 'v3d', 'vc4', 'etnaviv', 'tegra', 'i915', 'svga', 'virgl',
|
||||
'swr', 'panfrost', 'iris'
|
||||
'swr', 'panfrost', 'iris', 'lima'
|
||||
],
|
||||
description : 'List of gallium drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
|
||||
)
|
||||
|
|
|
@ -141,6 +141,11 @@ static const struct drm_driver_descriptor driver_descriptors[] = {
|
|||
.create_screen = pipe_tegra_create_screen,
|
||||
.configuration = pipe_default_configuration_query,
|
||||
},
|
||||
{
|
||||
.driver_name = "lima",
|
||||
.create_screen = pipe_lima_create_screen,
|
||||
.configuration = pipe_default_configuration_query,
|
||||
},
|
||||
};
|
||||
|
||||
static const struct drm_driver_descriptor default_driver_descriptor = {
|
||||
|
|
|
@ -425,4 +425,27 @@ pipe_tegra_create_screen(int fd, const struct pipe_screen_config *config)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef GALLIUM_LIMA
|
||||
#include "lima/drm/lima_drm_public.h"
|
||||
|
||||
struct pipe_screen *
|
||||
pipe_lima_create_screen(int fd, const struct pipe_screen_config *config)
|
||||
{
|
||||
struct pipe_screen *screen;
|
||||
|
||||
screen = lima_drm_screen_create(fd);
|
||||
return screen ? debug_screen_wrap(screen) : NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
struct pipe_screen *
|
||||
pipe_lima_create_screen(int fd, const struct pipe_screen_config *config)
|
||||
{
|
||||
fprintf(stderr, "lima: driver missing\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* DRM_HELPER_H */
|
||||
|
|
|
@ -57,6 +57,9 @@ pipe_imx_drm_create_screen(int fd, const struct pipe_screen_config *config);
|
|||
struct pipe_screen *
|
||||
pipe_tegra_create_screen(int fd, const struct pipe_screen_config *config);
|
||||
|
||||
struct pipe_screen *
|
||||
pipe_lima_create_screen(int fd, const struct pipe_screen_config *config);
|
||||
|
||||
const struct drm_conf_ret *
|
||||
pipe_default_configuration_query(enum drm_conf conf);
|
||||
|
||||
|
|
|
@ -0,0 +1,619 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/ralloc.h"
|
||||
|
||||
#include "gpir.h"
|
||||
#include "codegen.h"
|
||||
#include "lima_context.h"
|
||||
|
||||
static gpir_codegen_src gpir_get_alu_input(gpir_node *parent, gpir_node *child)
|
||||
{
|
||||
static const int slot_to_src[GPIR_INSTR_SLOT_NUM][3] = {
|
||||
[GPIR_INSTR_SLOT_MUL0] = {
|
||||
gpir_codegen_src_unused, gpir_codegen_src_p1_mul_0, gpir_codegen_src_p2_mul_0 },
|
||||
[GPIR_INSTR_SLOT_MUL1] = {
|
||||
gpir_codegen_src_unused, gpir_codegen_src_p1_mul_1, gpir_codegen_src_p2_mul_1 },
|
||||
|
||||
[GPIR_INSTR_SLOT_ADD0] = {
|
||||
gpir_codegen_src_unused, gpir_codegen_src_p1_acc_0, gpir_codegen_src_p2_acc_0 },
|
||||
[GPIR_INSTR_SLOT_ADD1] = {
|
||||
gpir_codegen_src_unused, gpir_codegen_src_p1_acc_1, gpir_codegen_src_p2_acc_1 },
|
||||
|
||||
[GPIR_INSTR_SLOT_COMPLEX] = {
|
||||
gpir_codegen_src_unused, gpir_codegen_src_p1_complex, gpir_codegen_src_unused },
|
||||
[GPIR_INSTR_SLOT_PASS] = {
|
||||
gpir_codegen_src_unused, gpir_codegen_src_p1_pass, gpir_codegen_src_p2_pass },
|
||||
[GPIR_INSTR_SLOT_BRANCH] = {
|
||||
gpir_codegen_src_unused, gpir_codegen_src_unused, gpir_codegen_src_unused },
|
||||
|
||||
[GPIR_INSTR_SLOT_REG0_LOAD0] = {
|
||||
gpir_codegen_src_attrib_x, gpir_codegen_src_p1_attrib_x, gpir_codegen_src_unused },
|
||||
[GPIR_INSTR_SLOT_REG0_LOAD1] = {
|
||||
gpir_codegen_src_attrib_y, gpir_codegen_src_p1_attrib_y, gpir_codegen_src_unused },
|
||||
[GPIR_INSTR_SLOT_REG0_LOAD2] = {
|
||||
gpir_codegen_src_attrib_z, gpir_codegen_src_p1_attrib_z, gpir_codegen_src_unused },
|
||||
[GPIR_INSTR_SLOT_REG0_LOAD3] = {
|
||||
gpir_codegen_src_attrib_w, gpir_codegen_src_p1_attrib_w, gpir_codegen_src_unused },
|
||||
|
||||
[GPIR_INSTR_SLOT_REG1_LOAD0] = {
|
||||
gpir_codegen_src_register_x, gpir_codegen_src_unused, gpir_codegen_src_unused},
|
||||
[GPIR_INSTR_SLOT_REG1_LOAD1] = {
|
||||
gpir_codegen_src_register_y, gpir_codegen_src_unused, gpir_codegen_src_unused},
|
||||
[GPIR_INSTR_SLOT_REG1_LOAD2] = {
|
||||
gpir_codegen_src_register_z, gpir_codegen_src_unused, gpir_codegen_src_unused},
|
||||
[GPIR_INSTR_SLOT_REG1_LOAD3] = {
|
||||
gpir_codegen_src_register_w, gpir_codegen_src_unused, gpir_codegen_src_unused},
|
||||
|
||||
[GPIR_INSTR_SLOT_MEM_LOAD0] = {
|
||||
gpir_codegen_src_load_x, gpir_codegen_src_unused, gpir_codegen_src_unused },
|
||||
[GPIR_INSTR_SLOT_MEM_LOAD1] = {
|
||||
gpir_codegen_src_load_y, gpir_codegen_src_unused, gpir_codegen_src_unused },
|
||||
[GPIR_INSTR_SLOT_MEM_LOAD2] = {
|
||||
gpir_codegen_src_load_z, gpir_codegen_src_unused, gpir_codegen_src_unused },
|
||||
[GPIR_INSTR_SLOT_MEM_LOAD3] = {
|
||||
gpir_codegen_src_load_w, gpir_codegen_src_unused, gpir_codegen_src_unused },
|
||||
};
|
||||
|
||||
assert(child->sched.instr - parent->sched.instr < 3);
|
||||
|
||||
return slot_to_src[child->sched.pos][child->sched.instr - parent->sched.instr];
|
||||
}
|
||||
|
||||
static void gpir_codegen_mul0_slot(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
{
|
||||
gpir_node *node = instr->slots[GPIR_INSTR_SLOT_MUL0];
|
||||
|
||||
if (!node) {
|
||||
code->mul0_src0 = gpir_codegen_src_unused;
|
||||
code->mul0_src1 = gpir_codegen_src_unused;
|
||||
return;
|
||||
}
|
||||
|
||||
gpir_alu_node *alu = gpir_node_to_alu(node);
|
||||
|
||||
switch (node->op) {
|
||||
case gpir_op_mul:
|
||||
code->mul0_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->mul0_src1 = gpir_get_alu_input(node, alu->children[1]);
|
||||
if (code->mul0_src1 == gpir_codegen_src_p1_complex) {
|
||||
/* Will get confused with gpir_codegen_src_ident, so need to swap inputs */
|
||||
code->mul0_src1 = code->mul0_src0;
|
||||
code->mul0_src0 = gpir_codegen_src_p1_complex;
|
||||
}
|
||||
|
||||
code->mul0_neg = alu->dest_negate;
|
||||
if (alu->children_negate[0])
|
||||
code->mul0_neg = !code->mul0_neg;
|
||||
if (alu->children_negate[1])
|
||||
code->mul0_neg = !code->mul0_neg;
|
||||
break;
|
||||
|
||||
case gpir_op_neg:
|
||||
code->mul0_neg = true;
|
||||
case gpir_op_mov:
|
||||
code->mul0_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->mul0_src1 = gpir_codegen_src_ident;
|
||||
break;
|
||||
|
||||
case gpir_op_complex1:
|
||||
code->mul0_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->mul0_src1 = gpir_get_alu_input(node, alu->children[1]);
|
||||
code->mul_op = gpir_codegen_mul_op_complex1;
|
||||
break;
|
||||
|
||||
case gpir_op_complex2:
|
||||
code->mul0_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->mul0_src1 = code->mul0_src0;
|
||||
code->mul_op = gpir_codegen_mul_op_complex2;
|
||||
break;
|
||||
|
||||
case gpir_op_select:
|
||||
code->mul0_src0 = gpir_get_alu_input(node, alu->children[2]);
|
||||
code->mul0_src1 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->mul_op = gpir_codegen_mul_op_select;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
static void gpir_codegen_mul1_slot(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
{
|
||||
gpir_node *node = instr->slots[GPIR_INSTR_SLOT_MUL1];
|
||||
|
||||
if (!node) {
|
||||
code->mul1_src0 = gpir_codegen_src_unused;
|
||||
code->mul1_src1 = gpir_codegen_src_unused;
|
||||
return;
|
||||
}
|
||||
|
||||
gpir_alu_node *alu = gpir_node_to_alu(node);
|
||||
|
||||
switch (node->op) {
|
||||
case gpir_op_mul:
|
||||
code->mul1_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->mul1_src1 = gpir_get_alu_input(node, alu->children[1]);
|
||||
if (code->mul1_src1 == gpir_codegen_src_p1_complex) {
|
||||
/* Will get confused with gpir_codegen_src_ident, so need to swap inputs */
|
||||
code->mul1_src1 = code->mul1_src0;
|
||||
code->mul1_src0 = gpir_codegen_src_p1_complex;
|
||||
}
|
||||
|
||||
code->mul1_neg = alu->dest_negate;
|
||||
if (alu->children_negate[0])
|
||||
code->mul1_neg = !code->mul1_neg;
|
||||
if (alu->children_negate[1])
|
||||
code->mul1_neg = !code->mul1_neg;
|
||||
break;
|
||||
|
||||
case gpir_op_neg:
|
||||
code->mul1_neg = true;
|
||||
case gpir_op_mov:
|
||||
code->mul1_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->mul1_src1 = gpir_codegen_src_ident;
|
||||
break;
|
||||
|
||||
case gpir_op_complex1:
|
||||
code->mul1_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->mul1_src1 = gpir_get_alu_input(node, alu->children[2]);
|
||||
break;
|
||||
|
||||
case gpir_op_select:
|
||||
code->mul1_src0 = gpir_get_alu_input(node, alu->children[1]);
|
||||
code->mul1_src1 = gpir_codegen_src_unused;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
static void gpir_codegen_add0_slot(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
{
|
||||
gpir_node *node = instr->slots[GPIR_INSTR_SLOT_ADD0];
|
||||
|
||||
if (!node) {
|
||||
code->acc0_src0 = gpir_codegen_src_unused;
|
||||
code->acc0_src1 = gpir_codegen_src_unused;
|
||||
return;
|
||||
}
|
||||
|
||||
gpir_alu_node *alu = gpir_node_to_alu(node);
|
||||
|
||||
switch (node->op) {
|
||||
case gpir_op_add:
|
||||
case gpir_op_min:
|
||||
case gpir_op_max:
|
||||
case gpir_op_lt:
|
||||
case gpir_op_ge:
|
||||
code->acc0_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->acc0_src1 = gpir_get_alu_input(node, alu->children[1]);
|
||||
|
||||
code->acc0_src0_neg = alu->children_negate[0];
|
||||
code->acc0_src1_neg = alu->children_negate[1];
|
||||
|
||||
switch (node->op) {
|
||||
case gpir_op_add:
|
||||
code->acc_op = gpir_codegen_acc_op_add;
|
||||
if (code->acc0_src1 == gpir_codegen_src_p1_complex) {
|
||||
code->acc0_src1 = code->acc0_src0;
|
||||
code->acc0_src0 = gpir_codegen_src_p1_complex;
|
||||
|
||||
bool tmp = code->acc0_src0_neg;
|
||||
code->acc0_src0_neg = code->acc0_src1_neg;
|
||||
code->acc0_src1_neg = tmp;
|
||||
}
|
||||
break;
|
||||
case gpir_op_min:
|
||||
code->acc_op = gpir_codegen_acc_op_min;
|
||||
break;
|
||||
case gpir_op_max:
|
||||
code->acc_op = gpir_codegen_acc_op_max;
|
||||
break;
|
||||
case gpir_op_lt:
|
||||
code->acc_op = gpir_codegen_acc_op_lt;
|
||||
break;
|
||||
case gpir_op_ge:
|
||||
code->acc_op = gpir_codegen_acc_op_ge;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case gpir_op_floor:
|
||||
case gpir_op_sign:
|
||||
code->acc0_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->acc0_src0_neg = alu->children_negate[0];
|
||||
switch (node->op) {
|
||||
case gpir_op_floor:
|
||||
code->acc_op = gpir_codegen_acc_op_floor;
|
||||
break;
|
||||
case gpir_op_sign:
|
||||
code->acc_op = gpir_codegen_acc_op_sign;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
break;
|
||||
|
||||
case gpir_op_neg:
|
||||
code->acc0_src0_neg = true;
|
||||
case gpir_op_mov:
|
||||
code->acc_op = gpir_codegen_acc_op_add;
|
||||
code->acc0_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->acc0_src1 = gpir_codegen_src_ident;
|
||||
code->acc0_src1_neg = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
static void gpir_codegen_add1_slot(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
{
|
||||
gpir_node *node = instr->slots[GPIR_INSTR_SLOT_ADD1];
|
||||
|
||||
if (!node) {
|
||||
code->acc1_src0 = gpir_codegen_src_unused;
|
||||
code->acc1_src1 = gpir_codegen_src_unused;
|
||||
return;
|
||||
}
|
||||
|
||||
gpir_alu_node *alu = gpir_node_to_alu(node);
|
||||
|
||||
switch (node->op) {
|
||||
case gpir_op_add:
|
||||
case gpir_op_min:
|
||||
case gpir_op_max:
|
||||
case gpir_op_lt:
|
||||
case gpir_op_ge:
|
||||
code->acc1_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->acc1_src1 = gpir_get_alu_input(node, alu->children[1]);
|
||||
|
||||
code->acc1_src0_neg = alu->children_negate[0];
|
||||
code->acc1_src1_neg = alu->children_negate[1];
|
||||
|
||||
switch (node->op) {
|
||||
case gpir_op_add:
|
||||
code->acc_op = gpir_codegen_acc_op_add;
|
||||
if (code->acc1_src1 == gpir_codegen_src_p1_complex) {
|
||||
code->acc1_src1 = code->acc1_src0;
|
||||
code->acc1_src0 = gpir_codegen_src_p1_complex;
|
||||
|
||||
bool tmp = code->acc1_src0_neg;
|
||||
code->acc1_src0_neg = code->acc1_src1_neg;
|
||||
code->acc1_src1_neg = tmp;
|
||||
}
|
||||
break;
|
||||
case gpir_op_min:
|
||||
code->acc_op = gpir_codegen_acc_op_min;
|
||||
break;
|
||||
case gpir_op_max:
|
||||
code->acc_op = gpir_codegen_acc_op_max;
|
||||
break;
|
||||
case gpir_op_lt:
|
||||
code->acc_op = gpir_codegen_acc_op_lt;
|
||||
break;
|
||||
case gpir_op_ge:
|
||||
code->acc_op = gpir_codegen_acc_op_ge;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case gpir_op_floor:
|
||||
case gpir_op_sign:
|
||||
code->acc1_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->acc1_src0_neg = alu->children_negate[0];
|
||||
switch (node->op) {
|
||||
case gpir_op_floor:
|
||||
code->acc_op = gpir_codegen_acc_op_floor;
|
||||
break;
|
||||
case gpir_op_sign:
|
||||
code->acc_op = gpir_codegen_acc_op_sign;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
break;
|
||||
|
||||
case gpir_op_neg:
|
||||
code->acc1_src0_neg = true;
|
||||
case gpir_op_mov:
|
||||
code->acc_op = gpir_codegen_acc_op_add;
|
||||
code->acc1_src0 = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->acc1_src1 = gpir_codegen_src_ident;
|
||||
code->acc1_src1_neg = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
{
|
||||
gpir_node *node = instr->slots[GPIR_INSTR_SLOT_COMPLEX];
|
||||
|
||||
if (!node) {
|
||||
code->complex_src = gpir_codegen_src_unused;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (node->op) {
|
||||
case gpir_op_mov:
|
||||
case gpir_op_rcp_impl:
|
||||
case gpir_op_rsqrt_impl:
|
||||
{
|
||||
gpir_alu_node *alu = gpir_node_to_alu(node);
|
||||
code->complex_src = gpir_get_alu_input(node, alu->children[0]);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
switch (node->op) {
|
||||
case gpir_op_mov:
|
||||
code->complex_op = gpir_codegen_complex_op_pass;
|
||||
break;
|
||||
case gpir_op_rcp_impl:
|
||||
code->complex_op = gpir_codegen_complex_op_rcp;
|
||||
break;
|
||||
case gpir_op_rsqrt_impl:
|
||||
code->complex_op = gpir_codegen_complex_op_rsqrt;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
static void gpir_codegen_pass_slot(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
{
|
||||
gpir_node *node = instr->slots[GPIR_INSTR_SLOT_PASS];
|
||||
|
||||
if (!node) {
|
||||
code->pass_op = gpir_codegen_pass_op_pass;
|
||||
code->pass_src = gpir_codegen_src_unused;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (node->op) {
|
||||
case gpir_op_mov:
|
||||
{
|
||||
gpir_alu_node *alu = gpir_node_to_alu(node);
|
||||
code->pass_src = gpir_get_alu_input(node, alu->children[0]);
|
||||
code->pass_op = gpir_codegen_pass_op_pass;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
static void gpir_codegen_branch_slot(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
{
|
||||
gpir_node *node = instr->slots[GPIR_INSTR_SLOT_BRANCH];
|
||||
|
||||
if (!node)
|
||||
return;
|
||||
|
||||
assert(0);
|
||||
}
|
||||
|
||||
static void gpir_codegen_reg0_slot(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
{
|
||||
if (!instr->reg0_use_count)
|
||||
return;
|
||||
|
||||
code->register0_attribute = instr->reg0_is_attr;
|
||||
code->register0_addr = instr->reg0_index;
|
||||
}
|
||||
|
||||
static void gpir_codegen_reg1_slot(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
{
|
||||
if (!instr->reg1_use_count)
|
||||
return;
|
||||
|
||||
code->register1_addr = instr->reg1_index;
|
||||
}
|
||||
|
||||
static void gpir_codegen_mem_slot(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
{
|
||||
if (!instr->mem_use_count) {
|
||||
code->load_offset = gpir_codegen_load_off_none;
|
||||
return;
|
||||
}
|
||||
|
||||
code->load_addr = instr->mem_index;
|
||||
code->load_offset = gpir_codegen_load_off_none;
|
||||
}
|
||||
|
||||
static gpir_codegen_store_src gpir_get_store_input(gpir_node *node)
|
||||
{
|
||||
static int slot_to_src[GPIR_INSTR_SLOT_NUM] = {
|
||||
[GPIR_INSTR_SLOT_MUL0] = gpir_codegen_store_src_mul_0,
|
||||
[GPIR_INSTR_SLOT_MUL1] = gpir_codegen_store_src_mul_1,
|
||||
[GPIR_INSTR_SLOT_ADD0] = gpir_codegen_store_src_acc_0,
|
||||
[GPIR_INSTR_SLOT_ADD1] = gpir_codegen_store_src_acc_1,
|
||||
[GPIR_INSTR_SLOT_COMPLEX] = gpir_codegen_store_src_complex,
|
||||
[GPIR_INSTR_SLOT_PASS] = gpir_codegen_store_src_pass,
|
||||
[GPIR_INSTR_SLOT_BRANCH...GPIR_INSTR_SLOT_STORE3] = gpir_codegen_store_src_none,
|
||||
};
|
||||
|
||||
gpir_store_node *store = gpir_node_to_store(node);
|
||||
return slot_to_src[store->child->sched.pos];
|
||||
}
|
||||
|
||||
static void gpir_codegen_store_slot(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
{
|
||||
|
||||
gpir_node *node = instr->slots[GPIR_INSTR_SLOT_STORE0];
|
||||
if (node)
|
||||
code->store0_src_x = gpir_get_store_input(node);
|
||||
else
|
||||
code->store0_src_x = gpir_codegen_store_src_none;
|
||||
|
||||
node = instr->slots[GPIR_INSTR_SLOT_STORE1];
|
||||
if (node)
|
||||
code->store0_src_y = gpir_get_store_input(node);
|
||||
else
|
||||
code->store0_src_y = gpir_codegen_store_src_none;
|
||||
|
||||
node = instr->slots[GPIR_INSTR_SLOT_STORE2];
|
||||
if (node)
|
||||
code->store1_src_z = gpir_get_store_input(node);
|
||||
else
|
||||
code->store1_src_z = gpir_codegen_store_src_none;
|
||||
|
||||
node = instr->slots[GPIR_INSTR_SLOT_STORE3];
|
||||
if (node)
|
||||
code->store1_src_w = gpir_get_store_input(node);
|
||||
else
|
||||
code->store1_src_w = gpir_codegen_store_src_none;
|
||||
|
||||
if (instr->store_content[0] == GPIR_INSTR_STORE_TEMP) {
|
||||
code->store0_temporary = true;
|
||||
code->unknown_1 = 12;
|
||||
}
|
||||
else {
|
||||
code->store0_varying = instr->store_content[0] == GPIR_INSTR_STORE_VARYING;
|
||||
code->store0_addr = instr->store_index[0];
|
||||
}
|
||||
|
||||
if (instr->store_content[1] == GPIR_INSTR_STORE_TEMP) {
|
||||
code->store1_temporary = true;
|
||||
code->unknown_1 = 12;
|
||||
}
|
||||
else {
|
||||
code->store1_varying = instr->store_content[1] == GPIR_INSTR_STORE_VARYING;
|
||||
code->store1_addr = instr->store_index[1];
|
||||
}
|
||||
}
|
||||
|
||||
static void gpir_codegen(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
{
|
||||
gpir_codegen_mul0_slot(code, instr);
|
||||
gpir_codegen_mul1_slot(code, instr);
|
||||
|
||||
gpir_codegen_add0_slot(code, instr);
|
||||
gpir_codegen_add1_slot(code, instr);
|
||||
|
||||
gpir_codegen_complex_slot(code, instr);
|
||||
gpir_codegen_pass_slot(code, instr);
|
||||
gpir_codegen_branch_slot(code, instr);
|
||||
|
||||
gpir_codegen_reg0_slot(code, instr);
|
||||
gpir_codegen_reg1_slot(code, instr);
|
||||
gpir_codegen_mem_slot(code, instr);
|
||||
|
||||
gpir_codegen_store_slot(code, instr);
|
||||
}
|
||||
|
||||
static void gpir_codegen_print_prog(gpir_compiler *comp)
|
||||
{
|
||||
uint32_t *data = comp->prog->shader;
|
||||
int size = comp->prog->shader_size;
|
||||
int num_instr = size / sizeof(gpir_codegen_instr);
|
||||
int num_dword_per_instr = sizeof(gpir_codegen_instr) / sizeof(uint32_t);
|
||||
|
||||
for (int i = 0; i < num_instr; i++) {
|
||||
printf("%03d: ", i);
|
||||
for (int j = 0; j < num_dword_per_instr; j++)
|
||||
printf("%08x ", data[i * num_dword_per_instr + j]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
bool gpir_codegen_prog(gpir_compiler *comp)
|
||||
{
|
||||
int num_instr = 0;
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
num_instr += list_length(&block->instr_list);
|
||||
}
|
||||
|
||||
gpir_codegen_instr *code = rzalloc_array(comp->prog, gpir_codegen_instr, num_instr);
|
||||
if (!code)
|
||||
return false;
|
||||
|
||||
int instr_index = 0;
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_instr, instr, &block->instr_list, list) {
|
||||
gpir_codegen(code + instr_index, instr);
|
||||
instr_index++;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_instr; i++) {
|
||||
if (code[i].register0_attribute)
|
||||
comp->prog->prefetch = i;
|
||||
}
|
||||
|
||||
comp->prog->shader = code;
|
||||
comp->prog->shader_size = num_instr * sizeof(gpir_codegen_instr);
|
||||
|
||||
if (lima_debug & LIMA_DEBUG_GP) {
|
||||
gpir_codegen_print_prog(comp);
|
||||
gpir_disassemble_program(code, num_instr);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static gpir_codegen_acc_op gpir_codegen_get_acc_op(gpir_op op)
|
||||
{
|
||||
switch (op) {
|
||||
case gpir_op_add:
|
||||
case gpir_op_neg:
|
||||
case gpir_op_mov:
|
||||
return gpir_codegen_acc_op_add;
|
||||
case gpir_op_min:
|
||||
return gpir_codegen_acc_op_min;
|
||||
case gpir_op_max:
|
||||
return gpir_codegen_acc_op_max;
|
||||
case gpir_op_lt:
|
||||
return gpir_codegen_acc_op_lt;
|
||||
case gpir_op_ge:
|
||||
return gpir_codegen_acc_op_ge;
|
||||
case gpir_op_floor:
|
||||
return gpir_codegen_acc_op_floor;
|
||||
case gpir_op_sign:
|
||||
return gpir_codegen_acc_op_sign;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2)
|
||||
{
|
||||
return gpir_codegen_get_acc_op(op1) == gpir_codegen_get_acc_op(op2);
|
||||
}
|
|
@ -0,0 +1,166 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
* Copyright (c) 2013 Ben Brewer (ben.brewer@codethink.co.uk)
|
||||
* Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef LIMA_IR_GP_CODEGEN_H
|
||||
#define LIMA_IR_GP_CODEGEN_H
|
||||
|
||||
typedef enum {
|
||||
gpir_codegen_src_attrib_x = 0,
|
||||
gpir_codegen_src_attrib_y = 1,
|
||||
gpir_codegen_src_attrib_z = 2,
|
||||
gpir_codegen_src_attrib_w = 3,
|
||||
gpir_codegen_src_register_x = 4,
|
||||
gpir_codegen_src_register_y = 5,
|
||||
gpir_codegen_src_register_z = 6,
|
||||
gpir_codegen_src_register_w = 7,
|
||||
gpir_codegen_src_unknown_0 = 8,
|
||||
gpir_codegen_src_unknown_1 = 9,
|
||||
gpir_codegen_src_unknown_2 = 10,
|
||||
gpir_codegen_src_unknown_3 = 11,
|
||||
gpir_codegen_src_load_x = 12,
|
||||
gpir_codegen_src_load_y = 13,
|
||||
gpir_codegen_src_load_z = 14,
|
||||
gpir_codegen_src_load_w = 15,
|
||||
gpir_codegen_src_p1_acc_0 = 16,
|
||||
gpir_codegen_src_p1_acc_1 = 17,
|
||||
gpir_codegen_src_p1_mul_0 = 18,
|
||||
gpir_codegen_src_p1_mul_1 = 19,
|
||||
gpir_codegen_src_p1_pass = 20,
|
||||
gpir_codegen_src_unused = 21,
|
||||
gpir_codegen_src_ident = 22,
|
||||
gpir_codegen_src_p1_complex = 22,
|
||||
gpir_codegen_src_p2_pass = 23,
|
||||
gpir_codegen_src_p2_acc_0 = 24,
|
||||
gpir_codegen_src_p2_acc_1 = 25,
|
||||
gpir_codegen_src_p2_mul_0 = 26,
|
||||
gpir_codegen_src_p2_mul_1 = 27,
|
||||
gpir_codegen_src_p1_attrib_x = 28,
|
||||
gpir_codegen_src_p1_attrib_y = 29,
|
||||
gpir_codegen_src_p1_attrib_z = 30,
|
||||
gpir_codegen_src_p1_attrib_w = 31,
|
||||
} gpir_codegen_src;
|
||||
|
||||
typedef enum {
|
||||
gpir_codegen_load_off_ld_addr_0 = 1,
|
||||
gpir_codegen_load_off_ld_addr_1 = 2,
|
||||
gpir_codegen_load_off_ld_addr_2 = 3,
|
||||
gpir_codegen_load_off_none = 7,
|
||||
} gpir_codegen_load_off;
|
||||
|
||||
typedef enum {
|
||||
gpir_codegen_store_src_acc_0 = 0,
|
||||
gpir_codegen_store_src_acc_1 = 1,
|
||||
gpir_codegen_store_src_mul_0 = 2,
|
||||
gpir_codegen_store_src_mul_1 = 3,
|
||||
gpir_codegen_store_src_pass = 4,
|
||||
gpir_codegen_store_src_unknown = 5,
|
||||
gpir_codegen_store_src_complex = 6,
|
||||
gpir_codegen_store_src_none = 7,
|
||||
} gpir_codegen_store_src;
|
||||
|
||||
typedef enum {
|
||||
gpir_codegen_acc_op_add = 0,
|
||||
gpir_codegen_acc_op_floor = 1,
|
||||
gpir_codegen_acc_op_sign = 2,
|
||||
gpir_codegen_acc_op_ge = 4,
|
||||
gpir_codegen_acc_op_lt = 5,
|
||||
gpir_codegen_acc_op_min = 6,
|
||||
gpir_codegen_acc_op_max = 7,
|
||||
} gpir_codegen_acc_op;
|
||||
|
||||
typedef enum {
|
||||
gpir_codegen_complex_op_nop = 0,
|
||||
gpir_codegen_complex_op_exp2 = 2,
|
||||
gpir_codegen_complex_op_log2 = 3,
|
||||
gpir_codegen_complex_op_rsqrt = 4,
|
||||
gpir_codegen_complex_op_rcp = 5,
|
||||
gpir_codegen_complex_op_pass = 9,
|
||||
gpir_codegen_complex_op_temp_store_addr = 12,
|
||||
gpir_codegen_complex_op_temp_load_addr_0 = 13,
|
||||
gpir_codegen_complex_op_temp_load_addr_1 = 14,
|
||||
gpir_codegen_complex_op_temp_load_addr_2 = 15,
|
||||
} gpir_codegen_complex_op;
|
||||
|
||||
typedef enum {
|
||||
gpir_codegen_mul_op_mul = 0,
|
||||
gpir_codegen_mul_op_complex1 = 1,
|
||||
gpir_codegen_mul_op_complex2 = 3,
|
||||
gpir_codegen_mul_op_select = 4,
|
||||
} gpir_codegen_mul_op;
|
||||
|
||||
typedef enum {
|
||||
gpir_codegen_pass_op_pass = 2,
|
||||
gpir_codegen_pass_op_preexp2 = 4,
|
||||
gpir_codegen_pass_op_postlog2 = 5,
|
||||
gpir_codegen_pass_op_clamp = 6,
|
||||
} gpir_codegen_pass_op;
|
||||
|
||||
|
||||
typedef struct __attribute__((__packed__)) {
|
||||
gpir_codegen_src mul0_src0 : 5;
|
||||
gpir_codegen_src mul0_src1 : 5;
|
||||
gpir_codegen_src mul1_src0 : 5;
|
||||
gpir_codegen_src mul1_src1 : 5;
|
||||
bool mul0_neg : 1;
|
||||
bool mul1_neg : 1;
|
||||
gpir_codegen_src acc0_src0 : 5;
|
||||
gpir_codegen_src acc0_src1 : 5;
|
||||
gpir_codegen_src acc1_src0 : 5;
|
||||
gpir_codegen_src acc1_src1 : 5;
|
||||
bool acc0_src0_neg : 1;
|
||||
bool acc0_src1_neg : 1;
|
||||
bool acc1_src0_neg : 1;
|
||||
bool acc1_src1_neg : 1;
|
||||
unsigned load_addr : 9;
|
||||
gpir_codegen_load_off load_offset : 3;
|
||||
unsigned register0_addr : 4;
|
||||
bool register0_attribute : 1;
|
||||
unsigned register1_addr : 4;
|
||||
bool store0_temporary : 1;
|
||||
bool store1_temporary : 1;
|
||||
bool branch : 1;
|
||||
bool branch_target_lo : 1;
|
||||
gpir_codegen_store_src store0_src_x : 3;
|
||||
gpir_codegen_store_src store0_src_y : 3;
|
||||
gpir_codegen_store_src store1_src_z : 3;
|
||||
gpir_codegen_store_src store1_src_w : 3;
|
||||
gpir_codegen_acc_op acc_op : 3;
|
||||
gpir_codegen_complex_op complex_op : 4;
|
||||
unsigned store0_addr : 4;
|
||||
bool store0_varying : 1;
|
||||
unsigned store1_addr : 4;
|
||||
bool store1_varying : 1;
|
||||
gpir_codegen_mul_op mul_op : 3;
|
||||
gpir_codegen_pass_op pass_op : 3;
|
||||
gpir_codegen_src complex_src : 5;
|
||||
gpir_codegen_src pass_src : 5;
|
||||
unsigned unknown_1 : 4; /* 12: tmp_st, 13: branch */
|
||||
unsigned branch_target : 8;
|
||||
} gpir_codegen_instr;
|
||||
|
||||
void gpir_disassemble_program(gpir_codegen_instr *code, unsigned num_instr);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,568 @@
|
|||
/*
|
||||
* Copyright (c) 2018 Lima Project
|
||||
*
|
||||
* Copyright (c) 2013 Codethink (http://www.codethink.co.uk)
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gpir.h"
|
||||
#include "codegen.h"
|
||||
|
||||
typedef enum {
|
||||
unit_acc_0,
|
||||
unit_acc_1,
|
||||
unit_mul_0,
|
||||
unit_mul_1,
|
||||
unit_pass,
|
||||
unit_complex,
|
||||
num_units
|
||||
} gp_unit;
|
||||
|
||||
static const gpir_codegen_store_src gp_unit_to_store_src[num_units] = {
|
||||
[unit_acc_0] = gpir_codegen_store_src_acc_0,
|
||||
[unit_acc_1] = gpir_codegen_store_src_acc_1,
|
||||
[unit_mul_0] = gpir_codegen_store_src_mul_0,
|
||||
[unit_mul_1] = gpir_codegen_store_src_mul_1,
|
||||
[unit_pass] = gpir_codegen_store_src_pass,
|
||||
[unit_complex] = gpir_codegen_store_src_complex,
|
||||
};
|
||||
|
||||
static void
|
||||
print_dest(gpir_codegen_instr *instr, gp_unit unit, unsigned cur_dest_index)
|
||||
{
|
||||
printf("^%u", cur_dest_index + unit);
|
||||
|
||||
gpir_codegen_store_src src = gp_unit_to_store_src[unit];
|
||||
|
||||
if (instr->store0_src_x == src ||
|
||||
instr->store0_src_y == src) {
|
||||
if (instr->store0_temporary) {
|
||||
/* Temporary stores ignore the address, and always use whatever's
|
||||
* stored in address register 0.
|
||||
*/
|
||||
printf("/t[addr0]");
|
||||
} else {
|
||||
if (instr->store0_varying)
|
||||
printf("/v");
|
||||
else
|
||||
printf("/$");
|
||||
printf("%u", instr->store0_addr);
|
||||
}
|
||||
|
||||
printf(".");
|
||||
if (instr->store0_src_x == src)
|
||||
printf("x");
|
||||
if (instr->store0_src_y == src)
|
||||
printf("y");
|
||||
}
|
||||
|
||||
if (instr->store1_src_z == src ||
|
||||
instr->store1_src_w == src) {
|
||||
if (instr->store1_temporary) {
|
||||
printf("/t[addr0]");
|
||||
} else {
|
||||
if (instr->store1_varying)
|
||||
printf("/v");
|
||||
else
|
||||
printf("/$");
|
||||
printf("%u", instr->store1_addr);
|
||||
}
|
||||
|
||||
printf(".");
|
||||
if (instr->store1_src_z == src)
|
||||
printf("z");
|
||||
if (instr->store1_src_w == src)
|
||||
printf("w");
|
||||
}
|
||||
|
||||
if (unit == unit_complex) {
|
||||
switch (instr->complex_op) {
|
||||
case gpir_codegen_complex_op_temp_store_addr:
|
||||
printf("/addr0");
|
||||
break;
|
||||
case gpir_codegen_complex_op_temp_load_addr_0:
|
||||
printf("/addr1");
|
||||
break;
|
||||
case gpir_codegen_complex_op_temp_load_addr_1:
|
||||
printf("/addr2");
|
||||
break;
|
||||
case gpir_codegen_complex_op_temp_load_addr_2:
|
||||
printf("/addr3");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_src(gpir_codegen_src src, gp_unit unit, unsigned unit_src_num,
|
||||
gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
|
||||
unsigned cur_dest_index)
|
||||
{
|
||||
switch (src) {
|
||||
case gpir_codegen_src_attrib_x:
|
||||
case gpir_codegen_src_attrib_y:
|
||||
case gpir_codegen_src_attrib_z:
|
||||
case gpir_codegen_src_attrib_w:
|
||||
printf("%c%d.%c", instr->register0_attribute ? 'a' : '$',
|
||||
instr->register0_addr, "xyzw"[src - gpir_codegen_src_attrib_x]);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_register_x:
|
||||
case gpir_codegen_src_register_y:
|
||||
case gpir_codegen_src_register_z:
|
||||
case gpir_codegen_src_register_w:
|
||||
printf("$%d.%c", instr->register1_addr,
|
||||
"xyzw"[src - gpir_codegen_src_register_x]);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_unknown_0:
|
||||
case gpir_codegen_src_unknown_1:
|
||||
case gpir_codegen_src_unknown_2:
|
||||
case gpir_codegen_src_unknown_3:
|
||||
printf("unknown%d", src - gpir_codegen_src_unknown_0);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_load_x:
|
||||
case gpir_codegen_src_load_y:
|
||||
case gpir_codegen_src_load_z:
|
||||
case gpir_codegen_src_load_w:
|
||||
printf("t[%d", instr->load_addr);
|
||||
switch (instr->load_offset) {
|
||||
case gpir_codegen_load_off_ld_addr_0:
|
||||
printf("+addr1");
|
||||
break;
|
||||
case gpir_codegen_load_off_ld_addr_1:
|
||||
printf("+addr2");
|
||||
break;
|
||||
case gpir_codegen_load_off_ld_addr_2:
|
||||
printf("+addr3");
|
||||
break;
|
||||
case gpir_codegen_load_off_none:
|
||||
break;
|
||||
default:
|
||||
printf("+unk%d", instr->load_offset);
|
||||
}
|
||||
printf("].%c", "xyzw"[src - gpir_codegen_src_load_x]);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_p1_acc_0:
|
||||
printf("^%d", cur_dest_index - 1 * num_units + unit_acc_0);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_p1_acc_1:
|
||||
printf("^%d", cur_dest_index - 1 * num_units + unit_acc_1);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_p1_mul_0:
|
||||
printf("^%d", cur_dest_index - 1 * num_units + unit_mul_0);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_p1_mul_1:
|
||||
printf("^%d", cur_dest_index - 1 * num_units + unit_mul_1);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_p1_pass:
|
||||
printf("^%d", cur_dest_index - 1 * num_units + unit_pass);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_unused:
|
||||
printf("unused");
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_p1_complex: /* Also ident */
|
||||
switch (unit) {
|
||||
case unit_acc_0:
|
||||
case unit_acc_1:
|
||||
if (unit_src_num == 1) {
|
||||
printf("0");
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case unit_mul_0:
|
||||
case unit_mul_1:
|
||||
if (unit_src_num == 1) {
|
||||
printf("1");
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
printf("^%d", cur_dest_index - 1 * num_units + unit_complex);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_p2_pass:
|
||||
printf("^%d", cur_dest_index - 2 * num_units + unit_pass);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_p2_acc_0:
|
||||
printf("^%d", cur_dest_index - 2 * num_units + unit_acc_0);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_p2_acc_1:
|
||||
printf("^%d", cur_dest_index - 2 * num_units + unit_acc_1);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_p2_mul_0:
|
||||
printf("^%d", cur_dest_index - 2 * num_units + unit_mul_0);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_p2_mul_1:
|
||||
printf("^%d", cur_dest_index - 2 * num_units + unit_mul_1);
|
||||
break;
|
||||
|
||||
case gpir_codegen_src_p1_attrib_x:
|
||||
case gpir_codegen_src_p1_attrib_y:
|
||||
case gpir_codegen_src_p1_attrib_z:
|
||||
case gpir_codegen_src_p1_attrib_w:
|
||||
printf("%c%d.%c", prev_instr->register0_attribute ? 'a' : '$',
|
||||
prev_instr->register0_addr,
|
||||
"xyzw"[src - gpir_codegen_src_attrib_x]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_mul(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
|
||||
unsigned cur_dest_index)
|
||||
{
|
||||
switch (instr->mul_op) {
|
||||
case gpir_codegen_mul_op_mul:
|
||||
case gpir_codegen_mul_op_complex2:
|
||||
if (instr->mul0_src0 != gpir_codegen_src_unused &&
|
||||
instr->mul0_src1 != gpir_codegen_src_unused) {
|
||||
if (instr->mul0_src1 == gpir_codegen_src_ident &&
|
||||
!instr->mul0_neg) {
|
||||
printf("mov ");
|
||||
print_dest(instr, unit_mul_0, cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
} else {
|
||||
if (instr->mul_op == gpir_codegen_mul_op_complex2)
|
||||
printf("complex2 ");
|
||||
else
|
||||
printf("mul ");
|
||||
|
||||
print_dest(instr, unit_mul_0, cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
printf(" ");
|
||||
if (instr->mul0_neg)
|
||||
printf("-");
|
||||
print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
}
|
||||
|
||||
printf(", ");
|
||||
}
|
||||
|
||||
if (instr->mul1_src0 != gpir_codegen_src_unused &&
|
||||
instr->mul1_src1 != gpir_codegen_src_unused) {
|
||||
if (instr->mul1_src1 == gpir_codegen_src_ident &&
|
||||
!instr->mul1_neg) {
|
||||
printf("mov ");
|
||||
print_dest(instr, unit_mul_1, cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
} else {
|
||||
printf("mul ");
|
||||
print_dest(instr, unit_mul_1, cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
printf(" ");
|
||||
if (instr->mul1_neg)
|
||||
printf("-");
|
||||
print_src(instr->mul1_src1, unit_mul_0, 1, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
case gpir_codegen_mul_op_complex1:
|
||||
printf("complex1 ");
|
||||
print_dest(instr, unit_mul_0, cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul1_src1, unit_mul_1, 1, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
break;
|
||||
|
||||
case gpir_codegen_mul_op_select:
|
||||
printf("sel ");
|
||||
print_dest(instr, unit_mul_0, cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("unknown%u ", instr->mul_op);
|
||||
print_dest(instr, unit_mul_0, cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->mul1_src1, unit_mul_1, 1, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
break;
|
||||
}
|
||||
|
||||
printf(", ");
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
unsigned srcs;
|
||||
} acc_op_info;
|
||||
|
||||
#define CASE(_name, _srcs) \
|
||||
[gpir_codegen_acc_op_##_name] = { \
|
||||
.name = #_name, \
|
||||
.srcs = _srcs \
|
||||
}
|
||||
|
||||
static const acc_op_info acc_op_infos[8] = {
|
||||
CASE(add, 2),
|
||||
CASE(floor, 1),
|
||||
CASE(sign, 1),
|
||||
CASE(ge, 2),
|
||||
CASE(lt, 2),
|
||||
CASE(min, 2),
|
||||
CASE(max, 2),
|
||||
};
|
||||
|
||||
#undef CASE
|
||||
|
||||
static void
|
||||
print_acc(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
|
||||
unsigned cur_dest_index)
|
||||
{
|
||||
const acc_op_info op = acc_op_infos[instr->acc_op];
|
||||
|
||||
if (instr->acc0_src0 != gpir_codegen_src_unused &&
|
||||
instr->acc0_src1 != gpir_codegen_src_unused) {
|
||||
acc_op_info acc0_op = op;
|
||||
if (instr->acc0_src1 == gpir_codegen_src_ident &&
|
||||
instr->acc0_src1_neg) {
|
||||
/* add x, -0 -> mov x */
|
||||
acc0_op.name = "mov";
|
||||
acc0_op.srcs = 1;
|
||||
}
|
||||
|
||||
if (acc0_op.name)
|
||||
printf("%s ", acc0_op.name);
|
||||
else
|
||||
printf("op%u ", instr->acc_op);
|
||||
|
||||
print_dest(instr, unit_acc_0, cur_dest_index);
|
||||
printf(" ");
|
||||
if (instr->acc0_src0_neg)
|
||||
printf("-");
|
||||
print_src(instr->acc0_src0, unit_acc_0, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
if (acc0_op.srcs > 1) {
|
||||
printf(" ");
|
||||
if (instr->acc0_src1_neg)
|
||||
printf("-");
|
||||
print_src(instr->acc0_src1, unit_acc_0, 1, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
}
|
||||
|
||||
printf(", ");
|
||||
}
|
||||
|
||||
if (instr->acc1_src0 != gpir_codegen_src_unused &&
|
||||
instr->acc1_src1 != gpir_codegen_src_unused) {
|
||||
acc_op_info acc1_op = op;
|
||||
if (instr->acc1_src1 == gpir_codegen_src_ident &&
|
||||
instr->acc1_src1_neg) {
|
||||
/* add x, -0 -> mov x */
|
||||
acc1_op.name = "mov";
|
||||
acc1_op.srcs = 1;
|
||||
}
|
||||
|
||||
if (acc1_op.name)
|
||||
printf("%s ", acc1_op.name);
|
||||
else
|
||||
printf("op%u ", instr->acc_op);
|
||||
|
||||
print_dest(instr, unit_acc_1, cur_dest_index);
|
||||
printf(" ");
|
||||
if (instr->acc1_src0_neg)
|
||||
printf("-");
|
||||
print_src(instr->acc1_src0, unit_acc_1, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
if (acc1_op.srcs > 1) {
|
||||
printf(" ");
|
||||
if (instr->acc1_src1_neg)
|
||||
printf("-");
|
||||
print_src(instr->acc1_src1, unit_acc_1, 1, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
}
|
||||
|
||||
printf(", ");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_pass(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
|
||||
unsigned cur_dest_index)
|
||||
{
|
||||
if (instr->pass_src == gpir_codegen_src_unused)
|
||||
return;
|
||||
|
||||
switch (instr->pass_op) {
|
||||
case gpir_codegen_pass_op_pass:
|
||||
printf("mov ");
|
||||
break;
|
||||
case gpir_codegen_pass_op_preexp2:
|
||||
printf("preexp2 ");
|
||||
break;
|
||||
case gpir_codegen_pass_op_postlog2:
|
||||
printf("postlog2 ");
|
||||
break;
|
||||
case gpir_codegen_pass_op_clamp:
|
||||
printf("clamp ");
|
||||
break;
|
||||
default:
|
||||
printf("unk%u ", instr->pass_op);
|
||||
}
|
||||
|
||||
print_dest(instr, unit_pass, cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->pass_src, unit_pass, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
|
||||
if (instr->pass_op == gpir_codegen_pass_op_clamp) {
|
||||
printf(" ");
|
||||
print_src(gpir_codegen_src_load_x, unit_pass, 1, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(gpir_codegen_src_load_y, unit_pass, 2, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
}
|
||||
|
||||
printf(", ");
|
||||
}
|
||||
|
||||
static void
|
||||
print_complex(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
|
||||
unsigned cur_dest_index)
|
||||
{
|
||||
if (instr->complex_src == gpir_codegen_src_unused)
|
||||
return;
|
||||
|
||||
switch (instr->complex_op) {
|
||||
case gpir_codegen_complex_op_nop:
|
||||
return;
|
||||
|
||||
case gpir_codegen_complex_op_exp2:
|
||||
printf("exp2 ");
|
||||
break;
|
||||
case gpir_codegen_complex_op_log2:
|
||||
printf("log2 ");
|
||||
break;
|
||||
case gpir_codegen_complex_op_rsqrt:
|
||||
printf("rsqrt ");
|
||||
break;
|
||||
case gpir_codegen_complex_op_rcp:
|
||||
printf("rcp ");
|
||||
break;
|
||||
case gpir_codegen_complex_op_pass:
|
||||
case gpir_codegen_complex_op_temp_store_addr:
|
||||
case gpir_codegen_complex_op_temp_load_addr_0:
|
||||
case gpir_codegen_complex_op_temp_load_addr_1:
|
||||
case gpir_codegen_complex_op_temp_load_addr_2:
|
||||
printf("mov ");
|
||||
break;
|
||||
default:
|
||||
printf("unk%u ", instr->complex_op);
|
||||
}
|
||||
|
||||
print_dest(instr, unit_complex, cur_dest_index);
|
||||
printf(" ");
|
||||
print_src(instr->complex_src, unit_complex, 0, instr, prev_instr,
|
||||
cur_dest_index);
|
||||
printf(", ");
|
||||
}
|
||||
|
||||
static void
|
||||
print_instr(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
|
||||
unsigned instr_number, unsigned cur_dest_index)
|
||||
{
|
||||
printf("%03d: ", instr_number);
|
||||
print_mul(instr, prev_instr, cur_dest_index);
|
||||
print_acc(instr, prev_instr, cur_dest_index);
|
||||
print_complex(instr, prev_instr, cur_dest_index);
|
||||
print_pass(instr, prev_instr, cur_dest_index);
|
||||
|
||||
if (instr->branch) {
|
||||
/* The branch condition is taken from the current pass unit result */
|
||||
printf("branch ^%d %03d, ", cur_dest_index + unit_pass,
|
||||
instr->branch_target + (instr->branch_target_lo ? 0 : 0x100));
|
||||
}
|
||||
|
||||
if (instr->unknown_1 != 0)
|
||||
printf("unknown_1 %u", instr->unknown_1);
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void
|
||||
gpir_disassemble_program(gpir_codegen_instr *code, unsigned num_instr)
|
||||
{
|
||||
printf("=======disassembly:=======\n");
|
||||
|
||||
unsigned cur_dest_index = 0;
|
||||
unsigned cur_instr = 0;
|
||||
for (gpir_codegen_instr *instr = code; cur_instr < num_instr;
|
||||
instr++, cur_instr++, cur_dest_index += num_units) {
|
||||
print_instr(instr, instr - 1, cur_instr, cur_dest_index);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,392 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
* Copyright (c) 2013 Connor Abbott
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef LIMA_IR_GP_GPIR_H
|
||||
#define LIMA_IR_GP_GPIR_H
|
||||
|
||||
#include "util/list.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include "ir/lima_ir.h"
|
||||
|
||||
/* list of operations that a node can do. */
|
||||
typedef enum {
|
||||
gpir_op_mov,
|
||||
|
||||
/* mul ops */
|
||||
gpir_op_mul,
|
||||
gpir_op_select,
|
||||
gpir_op_complex1,
|
||||
gpir_op_complex2,
|
||||
|
||||
/* add ops */
|
||||
gpir_op_add,
|
||||
gpir_op_floor,
|
||||
gpir_op_sign,
|
||||
gpir_op_ge,
|
||||
gpir_op_lt,
|
||||
gpir_op_min,
|
||||
gpir_op_max,
|
||||
gpir_op_abs,
|
||||
gpir_op_not,
|
||||
|
||||
/* mul/add ops */
|
||||
gpir_op_neg,
|
||||
|
||||
/* passthrough ops */
|
||||
gpir_op_clamp_const,
|
||||
gpir_op_preexp2,
|
||||
gpir_op_postlog2,
|
||||
|
||||
/* complex ops */
|
||||
gpir_op_exp2_impl,
|
||||
gpir_op_log2_impl,
|
||||
gpir_op_rcp_impl,
|
||||
gpir_op_rsqrt_impl,
|
||||
|
||||
/* load/store ops */
|
||||
gpir_op_load_uniform,
|
||||
gpir_op_load_temp,
|
||||
gpir_op_load_attribute,
|
||||
gpir_op_load_reg,
|
||||
gpir_op_store_temp,
|
||||
gpir_op_store_reg,
|
||||
gpir_op_store_varying,
|
||||
gpir_op_store_temp_load_off0,
|
||||
gpir_op_store_temp_load_off1,
|
||||
gpir_op_store_temp_load_off2,
|
||||
|
||||
/* branch */
|
||||
gpir_op_branch_cond,
|
||||
|
||||
/* const (emulated) */
|
||||
gpir_op_const,
|
||||
|
||||
/* emulated ops */
|
||||
gpir_op_exp2,
|
||||
gpir_op_log2,
|
||||
gpir_op_rcp,
|
||||
gpir_op_rsqrt,
|
||||
gpir_op_ceil,
|
||||
gpir_op_exp,
|
||||
gpir_op_log,
|
||||
gpir_op_sin,
|
||||
gpir_op_cos,
|
||||
gpir_op_tan,
|
||||
gpir_op_branch_uncond,
|
||||
gpir_op_eq,
|
||||
gpir_op_ne,
|
||||
|
||||
/* auxiliary ops */
|
||||
gpir_op_dummy_f,
|
||||
gpir_op_dummy_m,
|
||||
|
||||
gpir_op_num,
|
||||
} gpir_op;
|
||||
|
||||
typedef enum {
|
||||
gpir_node_type_alu,
|
||||
gpir_node_type_const,
|
||||
gpir_node_type_load,
|
||||
gpir_node_type_store,
|
||||
gpir_node_type_branch,
|
||||
} gpir_node_type;
|
||||
|
||||
typedef struct {
|
||||
char *name;
|
||||
bool dest_neg;
|
||||
bool src_neg[4];
|
||||
int *slots;
|
||||
gpir_node_type type;
|
||||
bool spillless;
|
||||
bool may_consume_two_slots;
|
||||
} gpir_op_info;
|
||||
|
||||
extern const gpir_op_info gpir_op_infos[];
|
||||
|
||||
typedef struct {
|
||||
enum {
|
||||
GPIR_DEP_INPUT, /* def is the input of use */
|
||||
GPIR_DEP_OFFSET, /* def is the offset of use (i.e. temp store) */
|
||||
GPIR_DEP_READ_AFTER_WRITE,
|
||||
GPIR_DEP_WRITE_AFTER_READ,
|
||||
GPIR_DEP_VREG_READ_AFTER_WRITE,
|
||||
GPIR_DEP_VREG_WRITE_AFTER_READ,
|
||||
} type;
|
||||
|
||||
/* node execute before succ */
|
||||
struct gpir_node *pred;
|
||||
/* node execute after pred */
|
||||
struct gpir_node *succ;
|
||||
|
||||
/* for node pred_list */
|
||||
struct list_head pred_link;
|
||||
/* for ndoe succ_list */
|
||||
struct list_head succ_link;
|
||||
} gpir_dep;
|
||||
|
||||
typedef struct gpir_node {
|
||||
struct list_head list;
|
||||
gpir_op op;
|
||||
gpir_node_type type;
|
||||
int index;
|
||||
char name[16];
|
||||
bool printed;
|
||||
struct gpir_block *block;
|
||||
|
||||
/* for nodes relationship */
|
||||
/* for node who uses this node (successor) */
|
||||
struct list_head succ_list;
|
||||
/* for node this node uses (predecessor) */
|
||||
struct list_head pred_list;
|
||||
|
||||
/* for scheduler and regalloc */
|
||||
int value_reg;
|
||||
union {
|
||||
struct {
|
||||
int instr;
|
||||
int pos;
|
||||
int dist;
|
||||
int index;
|
||||
bool ready;
|
||||
bool inserted;
|
||||
} sched;
|
||||
struct {
|
||||
int parent_index;
|
||||
float reg_pressure;
|
||||
int est;
|
||||
bool scheduled;
|
||||
} rsched;
|
||||
struct {
|
||||
float index;
|
||||
struct gpir_node *last;
|
||||
} vreg;
|
||||
struct {
|
||||
int index;
|
||||
} preg;
|
||||
};
|
||||
} gpir_node;
|
||||
|
||||
typedef struct {
|
||||
gpir_node node;
|
||||
|
||||
gpir_node *children[3];
|
||||
bool children_negate[3];
|
||||
int num_child;
|
||||
|
||||
bool dest_negate;
|
||||
} gpir_alu_node;
|
||||
|
||||
typedef struct {
|
||||
gpir_node node;
|
||||
union fi value;
|
||||
} gpir_const_node;
|
||||
|
||||
typedef struct {
|
||||
int index;
|
||||
struct list_head list;
|
||||
|
||||
struct list_head defs_list;
|
||||
struct list_head uses_list;
|
||||
|
||||
int start, end;
|
||||
} gpir_reg;
|
||||
|
||||
typedef struct {
|
||||
gpir_node node;
|
||||
|
||||
unsigned index;
|
||||
unsigned component;
|
||||
|
||||
gpir_reg *reg;
|
||||
struct list_head reg_link;
|
||||
} gpir_load_node;
|
||||
|
||||
typedef struct {
|
||||
gpir_node node;
|
||||
|
||||
unsigned index;
|
||||
unsigned component;
|
||||
gpir_node *child;
|
||||
|
||||
gpir_reg *reg;
|
||||
struct list_head reg_link;
|
||||
} gpir_store_node;
|
||||
|
||||
enum gpir_instr_slot {
|
||||
GPIR_INSTR_SLOT_MUL0,
|
||||
GPIR_INSTR_SLOT_MUL1,
|
||||
GPIR_INSTR_SLOT_ADD0,
|
||||
GPIR_INSTR_SLOT_ADD1,
|
||||
GPIR_INSTR_SLOT_PASS,
|
||||
GPIR_INSTR_SLOT_COMPLEX,
|
||||
GPIR_INSTR_SLOT_BRANCH,
|
||||
GPIR_INSTR_SLOT_REG0_LOAD0,
|
||||
GPIR_INSTR_SLOT_REG0_LOAD1,
|
||||
GPIR_INSTR_SLOT_REG0_LOAD2,
|
||||
GPIR_INSTR_SLOT_REG0_LOAD3,
|
||||
GPIR_INSTR_SLOT_REG1_LOAD0,
|
||||
GPIR_INSTR_SLOT_REG1_LOAD1,
|
||||
GPIR_INSTR_SLOT_REG1_LOAD2,
|
||||
GPIR_INSTR_SLOT_REG1_LOAD3,
|
||||
GPIR_INSTR_SLOT_MEM_LOAD0,
|
||||
GPIR_INSTR_SLOT_MEM_LOAD1,
|
||||
GPIR_INSTR_SLOT_MEM_LOAD2,
|
||||
GPIR_INSTR_SLOT_MEM_LOAD3,
|
||||
GPIR_INSTR_SLOT_STORE0,
|
||||
GPIR_INSTR_SLOT_STORE1,
|
||||
GPIR_INSTR_SLOT_STORE2,
|
||||
GPIR_INSTR_SLOT_STORE3,
|
||||
GPIR_INSTR_SLOT_NUM,
|
||||
GPIR_INSTR_SLOT_END,
|
||||
GPIR_INSTR_SLOT_ALU_BEGIN = GPIR_INSTR_SLOT_MUL0,
|
||||
GPIR_INSTR_SLOT_ALU_END = GPIR_INSTR_SLOT_COMPLEX,
|
||||
GPIR_INSTR_SLOT_DIST_TWO_BEGIN = GPIR_INSTR_SLOT_MUL0,
|
||||
GPIR_INSTR_SLOT_DIST_TWO_END = GPIR_INSTR_SLOT_PASS,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
int index;
|
||||
struct list_head list;
|
||||
|
||||
gpir_node *slots[GPIR_INSTR_SLOT_NUM];
|
||||
|
||||
int alu_num_slot_free;
|
||||
int alu_num_slot_needed_by_store;
|
||||
|
||||
int reg0_use_count;
|
||||
bool reg0_is_attr;
|
||||
int reg0_index;
|
||||
|
||||
int reg1_use_count;
|
||||
int reg1_index;
|
||||
|
||||
int mem_use_count;
|
||||
bool mem_is_temp;
|
||||
int mem_index;
|
||||
|
||||
enum {
|
||||
GPIR_INSTR_STORE_NONE,
|
||||
GPIR_INSTR_STORE_VARYING,
|
||||
GPIR_INSTR_STORE_REG,
|
||||
GPIR_INSTR_STORE_TEMP,
|
||||
} store_content[2];
|
||||
int store_index[2];
|
||||
} gpir_instr;
|
||||
|
||||
typedef struct gpir_block {
|
||||
struct list_head list;
|
||||
struct list_head node_list;
|
||||
struct list_head instr_list;
|
||||
struct gpir_compiler *comp;
|
||||
|
||||
/* for scheduler */
|
||||
union {
|
||||
struct {
|
||||
int instr_index;
|
||||
} sched;
|
||||
struct {
|
||||
int node_index;
|
||||
} rsched;
|
||||
};
|
||||
} gpir_block;
|
||||
|
||||
typedef struct {
|
||||
gpir_node node;
|
||||
gpir_block *dest;
|
||||
} gpir_branch_node;
|
||||
|
||||
struct lima_vs_shader_state;
|
||||
|
||||
typedef struct gpir_compiler {
|
||||
struct list_head block_list;
|
||||
int cur_index;
|
||||
|
||||
/* array for searching ssa node */
|
||||
gpir_node **var_nodes;
|
||||
|
||||
/* for physical reg */
|
||||
struct list_head reg_list;
|
||||
int cur_reg;
|
||||
|
||||
struct lima_vs_shader_state *prog;
|
||||
int constant_base;
|
||||
} gpir_compiler;
|
||||
|
||||
#define GPIR_VALUE_REG_NUM 11
|
||||
#define GPIR_PHYSICAL_REG_NUM 64
|
||||
|
||||
void *gpir_node_create(gpir_block *block, gpir_op op);
|
||||
gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type);
|
||||
void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred);
|
||||
void gpir_node_replace_succ(gpir_node *dst, gpir_node *src);
|
||||
void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred);
|
||||
void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, gpir_node *new_child);
|
||||
void gpir_node_insert_child(gpir_node *parent, gpir_node *child, gpir_node *insert_child);
|
||||
void gpir_node_delete(gpir_node *node);
|
||||
void gpir_node_print_prog_dep(gpir_compiler *comp);
|
||||
void gpir_node_print_prog_seq(gpir_compiler *comp);
|
||||
|
||||
#define gpir_node_foreach_succ(node, dep) \
|
||||
list_for_each_entry(gpir_dep, dep, &node->succ_list, succ_link)
|
||||
#define gpir_node_foreach_succ_safe(node, dep) \
|
||||
list_for_each_entry_safe(gpir_dep, dep, &node->succ_list, succ_link)
|
||||
#define gpir_node_foreach_pred(node, dep) \
|
||||
list_for_each_entry(gpir_dep, dep, &node->pred_list, pred_link)
|
||||
#define gpir_node_foreach_pred_safe(node, dep) \
|
||||
list_for_each_entry_safe(gpir_dep, dep, &node->pred_list, pred_link)
|
||||
|
||||
static inline bool gpir_node_is_root(gpir_node *node)
|
||||
{
|
||||
return list_empty(&node->succ_list);
|
||||
}
|
||||
|
||||
static inline bool gpir_node_is_leaf(gpir_node *node)
|
||||
{
|
||||
return list_empty(&node->pred_list);
|
||||
}
|
||||
|
||||
#define gpir_node_to_alu(node) ((gpir_alu_node *)(node))
|
||||
#define gpir_node_to_const(node) ((gpir_const_node *)(node))
|
||||
#define gpir_node_to_load(node) ((gpir_load_node *)(node))
|
||||
#define gpir_node_to_store(node) ((gpir_store_node *)(node))
|
||||
|
||||
gpir_instr *gpir_instr_create(gpir_block *block);
|
||||
bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node);
|
||||
void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node);
|
||||
void gpir_instr_print_prog(gpir_compiler *comp);
|
||||
|
||||
bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2);
|
||||
|
||||
bool gpir_pre_rsched_lower_prog(gpir_compiler *comp);
|
||||
bool gpir_post_rsched_lower_prog(gpir_compiler *comp);
|
||||
bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp);
|
||||
bool gpir_value_regalloc_prog(gpir_compiler *comp);
|
||||
bool gpir_physical_regalloc_prog(gpir_compiler *comp);
|
||||
bool gpir_schedule_prog(gpir_compiler *comp);
|
||||
bool gpir_codegen_prog(gpir_compiler *comp);
|
||||
|
||||
gpir_reg *gpir_create_reg(gpir_compiler *comp);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,488 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "util/ralloc.h"
|
||||
|
||||
#include "gpir.h"
|
||||
|
||||
gpir_instr *gpir_instr_create(gpir_block *block)
|
||||
{
|
||||
gpir_instr *instr = rzalloc(block, gpir_instr);
|
||||
if (unlikely(!instr))
|
||||
return NULL;
|
||||
|
||||
instr->index = block->sched.instr_index++;
|
||||
instr->alu_num_slot_free = 6;
|
||||
|
||||
list_add(&instr->list, &block->instr_list);
|
||||
return instr;
|
||||
}
|
||||
|
||||
static gpir_node *gpir_instr_get_the_other_acc_node(gpir_instr *instr, int slot)
|
||||
{
|
||||
if (slot == GPIR_INSTR_SLOT_ADD0)
|
||||
return instr->slots[GPIR_INSTR_SLOT_ADD1];
|
||||
else if (slot == GPIR_INSTR_SLOT_ADD1)
|
||||
return instr->slots[GPIR_INSTR_SLOT_ADD0];
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool gpir_instr_check_acc_same_op(gpir_instr *instr, gpir_node *node, int slot)
|
||||
{
|
||||
/* two ACC slots must share the same op code */
|
||||
gpir_node *acc_node = gpir_instr_get_the_other_acc_node(instr, slot);
|
||||
|
||||
/* spill move case may get acc_node == node */
|
||||
if (acc_node && acc_node != node &&
|
||||
!gpir_codegen_acc_same_op(node->op, acc_node->op))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int gpir_instr_get_consume_slot(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
if (gpir_op_infos[node->op].may_consume_two_slots) {
|
||||
gpir_node *acc_node = gpir_instr_get_the_other_acc_node(instr, node->sched.pos);
|
||||
if (acc_node)
|
||||
/* at this point node must have the same acc op with acc_node,
|
||||
* so it just consumes the extra slot acc_node consumed */
|
||||
return 0;
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
if (!gpir_instr_check_acc_same_op(instr, node, node->sched.pos))
|
||||
return false;
|
||||
|
||||
int consume_slot = gpir_instr_get_consume_slot(instr, node);
|
||||
|
||||
/* check if this node is child of one store node.
|
||||
* complex1 won't be any of this instr's store node's child,
|
||||
* because it has two instr latency before store can use it.
|
||||
*/
|
||||
for (int i = GPIR_INSTR_SLOT_STORE0; i < GPIR_INSTR_SLOT_STORE3; i++) {
|
||||
gpir_store_node *s = gpir_node_to_store(instr->slots[i]);
|
||||
if (s && s->child == node) {
|
||||
/* acc node may consume 2 slots, so even it's the child of a
|
||||
* store node, it may not be inserted successfully, in which
|
||||
* case we need a move node for it */
|
||||
if (instr->alu_num_slot_free - consume_slot <
|
||||
instr->alu_num_slot_needed_by_store - 1)
|
||||
return false;
|
||||
|
||||
instr->alu_num_slot_needed_by_store--;
|
||||
instr->alu_num_slot_free -= consume_slot;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* not a child of any store node, so must reserve alu slot for store node */
|
||||
if (instr->alu_num_slot_free - consume_slot <
|
||||
instr->alu_num_slot_needed_by_store)
|
||||
return false;
|
||||
|
||||
instr->alu_num_slot_free -= consume_slot;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gpir_instr_remove_alu(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
int consume_slot = gpir_instr_get_consume_slot(instr, node);
|
||||
|
||||
for (int i = GPIR_INSTR_SLOT_STORE0; i < GPIR_INSTR_SLOT_STORE3; i++) {
|
||||
gpir_store_node *s = gpir_node_to_store(instr->slots[i]);
|
||||
if (s && s->child == node) {
|
||||
instr->alu_num_slot_needed_by_store++;
|
||||
instr->alu_num_slot_free += consume_slot;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
instr->alu_num_slot_free += consume_slot;
|
||||
}
|
||||
|
||||
static bool gpir_instr_insert_reg0_check(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
gpir_load_node *load = gpir_node_to_load(node);
|
||||
int i = node->sched.pos - GPIR_INSTR_SLOT_REG0_LOAD0;
|
||||
|
||||
if (load->component != i)
|
||||
return false;
|
||||
|
||||
if (instr->reg0_is_attr && node->op != gpir_op_load_attribute)
|
||||
return false;
|
||||
|
||||
if (instr->reg0_use_count) {
|
||||
if (instr->reg0_index != load->index)
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
instr->reg0_is_attr = node->op == gpir_op_load_attribute;
|
||||
instr->reg0_index = load->index;
|
||||
}
|
||||
|
||||
instr->reg0_use_count++;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gpir_instr_remove_reg0(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
instr->reg0_use_count--;
|
||||
if (!instr->reg0_use_count)
|
||||
instr->reg0_is_attr = false;
|
||||
}
|
||||
|
||||
static bool gpir_instr_insert_reg1_check(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
gpir_load_node *load = gpir_node_to_load(node);
|
||||
int i = node->sched.pos - GPIR_INSTR_SLOT_REG1_LOAD0;
|
||||
|
||||
if (load->component != i)
|
||||
return false;
|
||||
|
||||
if (instr->reg1_use_count) {
|
||||
if (instr->reg1_index != load->index)
|
||||
return false;
|
||||
}
|
||||
else
|
||||
instr->reg1_index = load->index;
|
||||
|
||||
instr->reg1_use_count++;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gpir_instr_remove_reg1(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
instr->reg1_use_count--;
|
||||
}
|
||||
|
||||
static bool gpir_instr_insert_mem_check(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
gpir_load_node *load = gpir_node_to_load(node);
|
||||
int i = node->sched.pos - GPIR_INSTR_SLOT_MEM_LOAD0;
|
||||
|
||||
if (load->component != i)
|
||||
return false;
|
||||
|
||||
if (instr->mem_is_temp && node->op != gpir_op_load_temp)
|
||||
return false;
|
||||
|
||||
if (instr->mem_use_count) {
|
||||
if (instr->mem_index != load->index)
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
instr->mem_is_temp = node->op == gpir_op_load_temp;
|
||||
instr->mem_index = load->index;
|
||||
}
|
||||
|
||||
instr->mem_use_count++;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gpir_instr_remove_mem(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
instr->mem_use_count--;
|
||||
if (!instr->mem_use_count)
|
||||
instr->mem_is_temp = false;
|
||||
}
|
||||
|
||||
static bool gpir_instr_insert_store_check(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
gpir_store_node *store = gpir_node_to_store(node);
|
||||
int i = node->sched.pos - GPIR_INSTR_SLOT_STORE0;
|
||||
|
||||
if (store->component != i)
|
||||
return false;
|
||||
|
||||
i >>= 1;
|
||||
switch (instr->store_content[i]) {
|
||||
case GPIR_INSTR_STORE_NONE:
|
||||
/* store temp has only one address reg for two store unit */
|
||||
if (node->op == gpir_op_store_temp &&
|
||||
instr->store_content[!i] == GPIR_INSTR_STORE_TEMP &&
|
||||
instr->store_index[!i] != store->index)
|
||||
return false;
|
||||
break;
|
||||
|
||||
case GPIR_INSTR_STORE_VARYING:
|
||||
if (node->op != gpir_op_store_varying ||
|
||||
instr->store_index[i] != store->index)
|
||||
return false;
|
||||
break;
|
||||
|
||||
case GPIR_INSTR_STORE_REG:
|
||||
if (node->op != gpir_op_store_reg ||
|
||||
instr->store_index[i] != store->index)
|
||||
return false;
|
||||
break;
|
||||
|
||||
case GPIR_INSTR_STORE_TEMP:
|
||||
if (node->op != gpir_op_store_temp ||
|
||||
instr->store_index[i] != store->index)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
||||
/* check if any store node has the same child as this node */
|
||||
for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) {
|
||||
gpir_store_node *s = gpir_node_to_store(instr->slots[j]);
|
||||
if (s && s->child == store->child)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* check if the child is alrady in this instr's alu slot,
|
||||
* this may happen when store an scheduled alu node to reg
|
||||
*/
|
||||
for (int j = GPIR_INSTR_SLOT_ALU_BEGIN; j <= GPIR_INSTR_SLOT_ALU_END; j++) {
|
||||
if (store->child == instr->slots[j])
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* no store node has the same child as this node, and child is not
|
||||
* already in this instr's alu slot, so instr must have some free
|
||||
* alu slot to insert this node's child
|
||||
*/
|
||||
if (instr->alu_num_slot_free <= instr->alu_num_slot_needed_by_store)
|
||||
return false;
|
||||
|
||||
instr->alu_num_slot_needed_by_store++;
|
||||
|
||||
out:
|
||||
if (instr->store_content[i] == GPIR_INSTR_STORE_NONE) {
|
||||
if (node->op == gpir_op_store_varying)
|
||||
instr->store_content[i] = GPIR_INSTR_STORE_VARYING;
|
||||
else if (node->op == gpir_op_store_reg)
|
||||
instr->store_content[i] = GPIR_INSTR_STORE_REG;
|
||||
else
|
||||
instr->store_content[i] = GPIR_INSTR_STORE_TEMP;
|
||||
|
||||
instr->store_index[i] = store->index;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gpir_instr_remove_store(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
gpir_store_node *store = gpir_node_to_store(node);
|
||||
int component = node->sched.pos - GPIR_INSTR_SLOT_STORE0;
|
||||
int other_slot = GPIR_INSTR_SLOT_STORE0 + (component ^ 1);
|
||||
|
||||
for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) {
|
||||
gpir_store_node *s = gpir_node_to_store(instr->slots[j]);
|
||||
if (s && s->child == store->child)
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (int j = GPIR_INSTR_SLOT_ALU_BEGIN; j <= GPIR_INSTR_SLOT_ALU_END; j++) {
|
||||
if (store->child == instr->slots[j])
|
||||
goto out;
|
||||
}
|
||||
|
||||
instr->alu_num_slot_needed_by_store--;
|
||||
|
||||
out:
|
||||
if (!instr->slots[other_slot])
|
||||
instr->store_content[component >> 1] = GPIR_INSTR_STORE_NONE;
|
||||
}
|
||||
|
||||
static bool gpir_instr_spill_move(gpir_instr *instr, int slot, int spill_to_start)
|
||||
{
|
||||
gpir_node *node = instr->slots[slot];
|
||||
if (!node)
|
||||
return true;
|
||||
|
||||
if (node->op != gpir_op_mov)
|
||||
return false;
|
||||
|
||||
for (int i = spill_to_start; i <= GPIR_INSTR_SLOT_DIST_TWO_END; i++) {
|
||||
if (i != slot && !instr->slots[i] &&
|
||||
gpir_instr_check_acc_same_op(instr, node, i)) {
|
||||
instr->slots[i] = node;
|
||||
instr->slots[slot] = NULL;
|
||||
node->sched.pos = i;
|
||||
|
||||
gpir_debug("instr %d spill move %d from slot %d to %d\n",
|
||||
instr->index, node->index, slot, i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool gpir_instr_slot_free(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
if (node->op == gpir_op_mov ||
|
||||
node->sched.pos > GPIR_INSTR_SLOT_DIST_TWO_END) {
|
||||
if (instr->slots[node->sched.pos])
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
/* for node needs dist two slot, if the slot has a move, we can
|
||||
* spill it to other dist two slot without any side effect */
|
||||
int spill_to_start = GPIR_INSTR_SLOT_MUL0;
|
||||
if (node->op == gpir_op_complex1 || node->op == gpir_op_select)
|
||||
spill_to_start = GPIR_INSTR_SLOT_ADD0;
|
||||
|
||||
if (!gpir_instr_spill_move(instr, node->sched.pos, spill_to_start))
|
||||
return false;
|
||||
|
||||
if (node->op == gpir_op_complex1 || node->op == gpir_op_select) {
|
||||
if (!gpir_instr_spill_move(instr, GPIR_INSTR_SLOT_MUL1, spill_to_start))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
if (!gpir_instr_slot_free(instr, node))
|
||||
return false;
|
||||
|
||||
if (node->sched.pos >= GPIR_INSTR_SLOT_ALU_BEGIN &&
|
||||
node->sched.pos <= GPIR_INSTR_SLOT_ALU_END) {
|
||||
if (!gpir_instr_insert_alu_check(instr, node))
|
||||
return false;
|
||||
}
|
||||
else if (node->sched.pos >= GPIR_INSTR_SLOT_REG0_LOAD0 &&
|
||||
node->sched.pos <= GPIR_INSTR_SLOT_REG0_LOAD3) {
|
||||
if (!gpir_instr_insert_reg0_check(instr, node))
|
||||
return false;
|
||||
}
|
||||
else if (node->sched.pos >= GPIR_INSTR_SLOT_REG1_LOAD0 &&
|
||||
node->sched.pos <= GPIR_INSTR_SLOT_REG1_LOAD3) {
|
||||
if (!gpir_instr_insert_reg1_check(instr, node))
|
||||
return false;
|
||||
}
|
||||
else if (node->sched.pos >= GPIR_INSTR_SLOT_MEM_LOAD0 &&
|
||||
node->sched.pos <= GPIR_INSTR_SLOT_MEM_LOAD3) {
|
||||
if (!gpir_instr_insert_mem_check(instr, node))
|
||||
return false;
|
||||
}
|
||||
else if (node->sched.pos >= GPIR_INSTR_SLOT_STORE0 &&
|
||||
node->sched.pos <= GPIR_INSTR_SLOT_STORE3) {
|
||||
if (!gpir_instr_insert_store_check(instr, node))
|
||||
return false;
|
||||
}
|
||||
|
||||
instr->slots[node->sched.pos] = node;
|
||||
|
||||
if (node->op == gpir_op_complex1 || node->op == gpir_op_select)
|
||||
instr->slots[GPIR_INSTR_SLOT_MUL1] = node;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
if (node->sched.pos >= GPIR_INSTR_SLOT_ALU_BEGIN &&
|
||||
node->sched.pos <= GPIR_INSTR_SLOT_ALU_END)
|
||||
gpir_instr_remove_alu(instr, node);
|
||||
else if (node->sched.pos >= GPIR_INSTR_SLOT_REG0_LOAD0 &&
|
||||
node->sched.pos <= GPIR_INSTR_SLOT_REG0_LOAD3)
|
||||
gpir_instr_remove_reg0(instr, node);
|
||||
else if (node->sched.pos >= GPIR_INSTR_SLOT_REG1_LOAD0 &&
|
||||
node->sched.pos <= GPIR_INSTR_SLOT_REG1_LOAD3)
|
||||
gpir_instr_remove_reg1(instr, node);
|
||||
else if (node->sched.pos >= GPIR_INSTR_SLOT_MEM_LOAD0 &&
|
||||
node->sched.pos <= GPIR_INSTR_SLOT_MEM_LOAD3)
|
||||
gpir_instr_remove_mem(instr, node);
|
||||
else if (node->sched.pos >= GPIR_INSTR_SLOT_STORE0 &&
|
||||
node->sched.pos <= GPIR_INSTR_SLOT_STORE3)
|
||||
gpir_instr_remove_store(instr, node);
|
||||
|
||||
instr->slots[node->sched.pos] = NULL;
|
||||
|
||||
if (node->op == gpir_op_complex1 || node->op == gpir_op_select)
|
||||
instr->slots[GPIR_INSTR_SLOT_MUL1] = NULL;
|
||||
}
|
||||
|
||||
void gpir_instr_print_prog(gpir_compiler *comp)
|
||||
{
|
||||
struct {
|
||||
int len;
|
||||
char *name;
|
||||
} fields[] = {
|
||||
[GPIR_INSTR_SLOT_MUL0] = { 4, "mul0" },
|
||||
[GPIR_INSTR_SLOT_MUL1] = { 4, "mul1" },
|
||||
[GPIR_INSTR_SLOT_ADD0] = { 4, "add0" },
|
||||
[GPIR_INSTR_SLOT_ADD1] = { 4, "add1" },
|
||||
[GPIR_INSTR_SLOT_REG0_LOAD3] = { 15, "load0" },
|
||||
[GPIR_INSTR_SLOT_REG1_LOAD3] = { 15, "load1" },
|
||||
[GPIR_INSTR_SLOT_MEM_LOAD3] = { 15, "load2" },
|
||||
[GPIR_INSTR_SLOT_BRANCH] = { 4, "bnch" },
|
||||
[GPIR_INSTR_SLOT_STORE3] = { 15, "store" },
|
||||
[GPIR_INSTR_SLOT_COMPLEX] = { 4, "cmpl" },
|
||||
[GPIR_INSTR_SLOT_PASS] = { 4, "pass" },
|
||||
};
|
||||
|
||||
printf("========prog instr========\n");
|
||||
printf(" ");
|
||||
for (int i = 0; i < GPIR_INSTR_SLOT_NUM; i++) {
|
||||
if (fields[i].len)
|
||||
printf("%-*s ", fields[i].len, fields[i].name);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
int index = 0;
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_instr, instr, &block->instr_list, list) {
|
||||
printf("%03d: ", index++);
|
||||
|
||||
char buff[16] = "null";
|
||||
int start = 0;
|
||||
for (int j = 0; j < GPIR_INSTR_SLOT_NUM; j++) {
|
||||
gpir_node *node = instr->slots[j];
|
||||
if (fields[j].len) {
|
||||
if (node)
|
||||
snprintf(buff + start, sizeof(buff) - start, "%d", node->index);
|
||||
printf("%-*s ", fields[j].len, buff);
|
||||
|
||||
strcpy(buff, "null");
|
||||
start = 0;
|
||||
}
|
||||
else {
|
||||
if (node)
|
||||
start += snprintf(buff + start, sizeof(buff) - start, "%d", node->index);
|
||||
start += snprintf(buff + start, sizeof(buff) - start, "|");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("-----------------------\n");
|
||||
}
|
||||
printf("==========================\n");
|
||||
}
|
|
@ -0,0 +1,529 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/ralloc.h"
|
||||
|
||||
#include "gpir.h"
|
||||
#include "lima_context.h"
|
||||
|
||||
static gpir_node *
|
||||
gpir_lower_create_insert_node(gpir_node *parent, gpir_node *child,
|
||||
gpir_node *child2, gpir_op op)
|
||||
{
|
||||
gpir_node *node = gpir_node_create(parent->block, op);
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
gpir_alu_node *alu = gpir_node_to_alu(node);
|
||||
alu->children[0] = child;
|
||||
alu->children[1] = child2;
|
||||
alu->num_child = 2;
|
||||
gpir_node_insert_child(parent, child, node);
|
||||
gpir_node_add_dep(node, child2, GPIR_DEP_INPUT);
|
||||
list_addtail(&node->list, &parent->list);
|
||||
return node;
|
||||
}
|
||||
|
||||
static bool gpir_lower_viewport_transform(gpir_compiler *comp)
|
||||
{
|
||||
gpir_node *rcpw = NULL;
|
||||
|
||||
/* rcpw = 1 / w */
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
if (node->op == gpir_op_store_varying) {
|
||||
gpir_store_node *store = gpir_node_to_store(node);
|
||||
if (store->index == 0 && store->component == 3) {
|
||||
gpir_node *w = store->child;
|
||||
|
||||
rcpw = gpir_node_create(block, gpir_op_rcp);
|
||||
if (!rcpw)
|
||||
return false;
|
||||
list_addtail(&rcpw->list, &node->list);
|
||||
|
||||
gpir_alu_node *alu = gpir_node_to_alu(rcpw);
|
||||
alu->children[0] = w;
|
||||
alu->num_child = 1;
|
||||
store->child = rcpw;
|
||||
|
||||
gpir_node_insert_child(node, w, rcpw);
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
found:
|
||||
assert(rcpw);
|
||||
|
||||
/* xyz = xyz * rcpw * scale + transition */
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
if (node->op == gpir_op_store_varying) {
|
||||
gpir_store_node *store = gpir_node_to_store(node);
|
||||
if (store->index == 0 && store->component < 3) {
|
||||
gpir_node *xyz = store->child;
|
||||
|
||||
gpir_node *mul1 =
|
||||
gpir_lower_create_insert_node(node, xyz, rcpw, gpir_op_mul);
|
||||
if (!mul1)
|
||||
return false;
|
||||
|
||||
gpir_load_node *scale = gpir_node_create(block, gpir_op_load_uniform);
|
||||
if (!scale)
|
||||
return false;
|
||||
scale->index = comp->constant_base;
|
||||
scale->component = store->component;
|
||||
list_addtail(&scale->node.list, &node->list);
|
||||
|
||||
gpir_node *mul2 =
|
||||
gpir_lower_create_insert_node(node, mul1, &scale->node, gpir_op_mul);
|
||||
if (!mul2)
|
||||
return false;
|
||||
|
||||
gpir_load_node *translate = gpir_node_create(block, gpir_op_load_uniform);
|
||||
if (!translate)
|
||||
return false;
|
||||
translate->index = comp->constant_base + 1;
|
||||
translate->component = store->component;
|
||||
list_addtail(&translate->node.list, &node->list);
|
||||
|
||||
gpir_node *add =
|
||||
gpir_lower_create_insert_node(node, mul2, &translate->node, gpir_op_add);
|
||||
if (!add)
|
||||
return false;
|
||||
|
||||
store->child = add;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
comp->constant_base += 2;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool gpir_lower_const(gpir_compiler *comp)
|
||||
{
|
||||
int num_constant = 0;
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
|
||||
if (node->op == gpir_op_const) {
|
||||
if (gpir_node_is_root(node))
|
||||
gpir_node_delete(node);
|
||||
else
|
||||
num_constant++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (num_constant) {
|
||||
union fi *constant = ralloc_array(comp->prog, union fi, num_constant);
|
||||
if (!constant)
|
||||
return false;
|
||||
|
||||
comp->prog->constant = constant;
|
||||
comp->prog->constant_size = num_constant * sizeof(union fi);
|
||||
|
||||
int index = 0;
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
|
||||
if (node->op == gpir_op_const) {
|
||||
gpir_const_node *c = gpir_node_to_const(node);
|
||||
|
||||
if (!gpir_node_is_root(node)) {
|
||||
gpir_load_node *load = gpir_node_create(block, gpir_op_load_uniform);
|
||||
if (unlikely(!load))
|
||||
return false;
|
||||
|
||||
load->index = comp->constant_base + (index >> 2);
|
||||
load->component = index % 4;
|
||||
constant[index++] = c->value;
|
||||
|
||||
gpir_node_replace_succ(&load->node, node);
|
||||
|
||||
list_addtail(&load->node.list, &node->list);
|
||||
|
||||
gpir_debug("lower const create uniform %d for const %d\n",
|
||||
load->node.index, node->index);
|
||||
}
|
||||
|
||||
gpir_node_delete(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* duplicate load to all its successors */
|
||||
static bool gpir_lower_load(gpir_compiler *comp)
|
||||
{
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
|
||||
if (node->type == gpir_node_type_load) {
|
||||
gpir_load_node *load = gpir_node_to_load(node);
|
||||
|
||||
bool first = true;
|
||||
gpir_node_foreach_succ_safe(node, dep) {
|
||||
gpir_node *succ = dep->succ;
|
||||
|
||||
if (first) {
|
||||
first = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
gpir_node *new = gpir_node_create(succ->block, node->op);
|
||||
if (unlikely(!new))
|
||||
return false;
|
||||
list_addtail(&new->list, &succ->list);
|
||||
|
||||
gpir_debug("lower load create %d from %d for succ %d\n",
|
||||
new->index, node->index, succ->index);
|
||||
|
||||
gpir_load_node *nload = gpir_node_to_load(new);
|
||||
nload->index = load->index;
|
||||
nload->component = load->component;
|
||||
if (load->reg) {
|
||||
nload->reg = load->reg;
|
||||
list_addtail(&nload->reg_link, &load->reg->uses_list);
|
||||
}
|
||||
|
||||
gpir_node_replace_pred(dep, new);
|
||||
gpir_node_replace_child(succ, node, new);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool gpir_lower_neg(gpir_block *block, gpir_node *node)
|
||||
{
|
||||
gpir_alu_node *neg = gpir_node_to_alu(node);
|
||||
gpir_node *child = neg->children[0];
|
||||
|
||||
/* check if child can dest negate */
|
||||
if (child->type == gpir_node_type_alu) {
|
||||
/* negate must be its only successor */
|
||||
if (list_is_singular(&child->succ_list) &&
|
||||
gpir_op_infos[child->op].dest_neg) {
|
||||
gpir_alu_node *alu = gpir_node_to_alu(child);
|
||||
alu->dest_negate = !alu->dest_negate;
|
||||
|
||||
gpir_node_replace_succ(child, node);
|
||||
gpir_node_delete(node);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* check if child can src negate */
|
||||
gpir_node_foreach_succ_safe(node, dep) {
|
||||
gpir_node *succ = dep->succ;
|
||||
if (succ->type != gpir_node_type_alu)
|
||||
continue;
|
||||
|
||||
bool success = true;
|
||||
gpir_alu_node *alu = gpir_node_to_alu(dep->succ);
|
||||
for (int i = 0; i < alu->num_child; i++) {
|
||||
if (alu->children[i] == node) {
|
||||
if (gpir_op_infos[succ->op].src_neg[i]) {
|
||||
alu->children_negate[i] = !alu->children_negate[i];
|
||||
alu->children[i] = child;
|
||||
}
|
||||
else
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (success)
|
||||
gpir_node_replace_pred(dep, child);
|
||||
}
|
||||
|
||||
if (gpir_node_is_root(node))
|
||||
gpir_node_delete(node);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
|
||||
{
|
||||
gpir_alu_node *alu = gpir_node_to_alu(node);
|
||||
gpir_node *child = alu->children[0];
|
||||
|
||||
gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2);
|
||||
if (unlikely(!complex2))
|
||||
return false;
|
||||
|
||||
complex2->children[0] = child;
|
||||
complex2->num_child = 1;
|
||||
gpir_node_add_dep(&complex2->node, child, GPIR_DEP_INPUT);
|
||||
list_addtail(&complex2->node.list, &node->list);
|
||||
|
||||
int impl_op = 0;
|
||||
switch (node->op) {
|
||||
case gpir_op_rcp:
|
||||
impl_op = gpir_op_rcp_impl;
|
||||
break;
|
||||
case gpir_op_rsqrt:
|
||||
impl_op = gpir_op_rsqrt_impl;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
gpir_alu_node *impl = gpir_node_create(block, impl_op);
|
||||
if (unlikely(!impl))
|
||||
return false;
|
||||
|
||||
impl->children[0] = child;
|
||||
impl->num_child = 1;
|
||||
gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT);
|
||||
list_addtail(&impl->node.list, &node->list);
|
||||
|
||||
/* change node to complex1 node */
|
||||
node->op = gpir_op_complex1;
|
||||
alu->children[0] = &impl->node;
|
||||
alu->children[1] = &complex2->node;
|
||||
alu->children[2] = child;
|
||||
alu->num_child = 3;
|
||||
gpir_node_add_dep(node, &impl->node, GPIR_DEP_INPUT);
|
||||
gpir_node_add_dep(node, &complex2->node, GPIR_DEP_INPUT);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool gpir_lower_node_may_consume_two_slots(gpir_compiler *comp)
|
||||
{
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
|
||||
if (gpir_op_infos[node->op].may_consume_two_slots) {
|
||||
/* dummy_f/m are auxiliary nodes for value reg alloc:
|
||||
* 1. before reg alloc, create fake nodes dummy_f, dummy_m,
|
||||
* so the tree become: (dummy_m (node dummy_f))
|
||||
* dummy_m can be spilled, but other nodes in the tree can't
|
||||
* be spilled.
|
||||
* 2. After reg allocation and fake dep add, merge all deps of
|
||||
* dummy_m and dummy_f to node and remove dummy_m & dummy_f
|
||||
*
|
||||
* We may also not use dummy_f/m, but alloc two value reg for
|
||||
* node. But that means we need to make sure there're 2 free
|
||||
* slot after the node successors, but we just need one slot
|
||||
* after to be able to schedule it because we can use one move for
|
||||
* the two slot node. It's also not easy to handle the spill case
|
||||
* for the alloc 2 value method.
|
||||
*
|
||||
* With the dummy_f/m method, there's no such requirement, the
|
||||
* node can be scheduled only when there's two slots for it,
|
||||
* otherwise a move. And the node can be spilled with one reg.
|
||||
*/
|
||||
gpir_node *dummy_m = gpir_node_create(block, gpir_op_dummy_m);
|
||||
if (unlikely(!dummy_m))
|
||||
return false;
|
||||
list_add(&dummy_m->list, &node->list);
|
||||
|
||||
gpir_node *dummy_f = gpir_node_create(block, gpir_op_dummy_f);
|
||||
if (unlikely(!dummy_f))
|
||||
return false;
|
||||
list_add(&dummy_f->list, &node->list);
|
||||
|
||||
gpir_alu_node *alu = gpir_node_to_alu(dummy_m);
|
||||
alu->children[0] = node;
|
||||
alu->children[1] = dummy_f;
|
||||
alu->num_child = 2;
|
||||
|
||||
gpir_node_replace_succ(dummy_m, node);
|
||||
gpir_node_add_dep(dummy_m, node, GPIR_DEP_INPUT);
|
||||
gpir_node_add_dep(dummy_m, dummy_f, GPIR_DEP_INPUT);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* There are no 'equal' or 'not-equal' opcodes.
|
||||
* eq (a == b) is lowered to and(a >= b, b >= a)
|
||||
* ne (a != b) is lowered to or(a < b, b < a)
|
||||
*/
|
||||
static bool gpir_lower_eq_ne(gpir_block *block, gpir_node *node)
|
||||
{
|
||||
gpir_op cmp_node_op;
|
||||
gpir_op node_new_op;
|
||||
switch (node->op) {
|
||||
case gpir_op_eq:
|
||||
cmp_node_op = gpir_op_ge;
|
||||
node_new_op = gpir_op_min; /* and */
|
||||
break;
|
||||
case gpir_op_ne:
|
||||
cmp_node_op = gpir_op_lt;
|
||||
node_new_op = gpir_op_max; /* or */
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
gpir_alu_node *e = gpir_node_to_alu(node);
|
||||
|
||||
gpir_alu_node *cmp1 = gpir_node_create(block, cmp_node_op);
|
||||
list_addtail(&cmp1->node.list, &node->list);
|
||||
gpir_alu_node *cmp2 = gpir_node_create(block, cmp_node_op);
|
||||
list_addtail(&cmp2->node.list, &node->list);
|
||||
|
||||
cmp1->children[0] = e->children[0];
|
||||
cmp1->children[1] = e->children[1];
|
||||
cmp1->num_child = 2;
|
||||
|
||||
cmp2->children[0] = e->children[1];
|
||||
cmp2->children[1] = e->children[0];
|
||||
cmp2->num_child = 2;
|
||||
|
||||
gpir_node_add_dep(&cmp1->node, e->children[0], GPIR_DEP_INPUT);
|
||||
gpir_node_add_dep(&cmp1->node, e->children[1], GPIR_DEP_INPUT);
|
||||
|
||||
gpir_node_add_dep(&cmp2->node, e->children[0], GPIR_DEP_INPUT);
|
||||
gpir_node_add_dep(&cmp2->node, e->children[1], GPIR_DEP_INPUT);
|
||||
|
||||
gpir_node_foreach_pred_safe(node, dep) {
|
||||
gpir_node_remove_dep(node, dep->pred);
|
||||
}
|
||||
|
||||
gpir_node_add_dep(node, &cmp1->node, GPIR_DEP_INPUT);
|
||||
gpir_node_add_dep(node, &cmp2->node, GPIR_DEP_INPUT);
|
||||
|
||||
node->op = node_new_op;
|
||||
e->children[0] = &cmp1->node;
|
||||
e->children[1] = &cmp2->node;
|
||||
e->num_child = 2;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* There is no 'abs' opcode.
|
||||
* abs(a) is lowered to max(a, -a)
|
||||
*/
|
||||
static bool gpir_lower_abs(gpir_block *block, gpir_node *node)
|
||||
{
|
||||
gpir_alu_node *alu = gpir_node_to_alu(node);
|
||||
|
||||
assert(node->op == gpir_op_abs);
|
||||
|
||||
node->op = gpir_op_max;
|
||||
|
||||
alu->children[1] = alu->children[0];
|
||||
alu->children_negate[1] = true;
|
||||
alu->num_child = 2;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* There is no 'not' opcode.
|
||||
* not(a) is lowered to add(1, -a)
|
||||
*/
|
||||
static bool gpir_lower_not(gpir_block *block, gpir_node *node)
|
||||
{
|
||||
gpir_alu_node *alu = gpir_node_to_alu(node);
|
||||
|
||||
assert(alu->node.op == gpir_op_not);
|
||||
|
||||
node->op = gpir_op_add;
|
||||
|
||||
gpir_node *node_const = gpir_node_create(block, gpir_op_const);
|
||||
gpir_const_node *c = gpir_node_to_const(node_const);
|
||||
|
||||
assert(c->node.op == gpir_op_const);
|
||||
|
||||
list_addtail(&c->node.list, &node->list);
|
||||
c->value.f = 1.0f;
|
||||
gpir_node_add_dep(&alu->node, &c->node, GPIR_DEP_INPUT);
|
||||
|
||||
alu->children_negate[1] = !alu->children_negate[0];
|
||||
alu->children[1] = alu->children[0];
|
||||
alu->children[0] = &c->node;
|
||||
alu->num_child = 2;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool (*gpir_pre_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = {
|
||||
[gpir_op_not] = gpir_lower_not,
|
||||
};
|
||||
|
||||
static bool (*gpir_post_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = {
|
||||
[gpir_op_neg] = gpir_lower_neg,
|
||||
[gpir_op_rcp] = gpir_lower_complex,
|
||||
[gpir_op_rsqrt] = gpir_lower_complex,
|
||||
[gpir_op_eq] = gpir_lower_eq_ne,
|
||||
[gpir_op_ne] = gpir_lower_eq_ne,
|
||||
[gpir_op_abs] = gpir_lower_abs,
|
||||
};
|
||||
|
||||
bool gpir_pre_rsched_lower_prog(gpir_compiler *comp)
|
||||
{
|
||||
if (!gpir_lower_viewport_transform(comp))
|
||||
return false;
|
||||
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
|
||||
if (gpir_pre_rsched_lower_funcs[node->op] &&
|
||||
!gpir_pre_rsched_lower_funcs[node->op](block, node))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!gpir_lower_const(comp))
|
||||
return false;
|
||||
|
||||
if (!gpir_lower_load(comp))
|
||||
return false;
|
||||
|
||||
gpir_debug("pre rsched lower prog\n");
|
||||
gpir_node_print_prog_seq(comp);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool gpir_post_rsched_lower_prog(gpir_compiler *comp)
|
||||
{
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
|
||||
if (gpir_post_rsched_lower_funcs[node->op] &&
|
||||
!gpir_post_rsched_lower_funcs[node->op](block, node))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!gpir_lower_node_may_consume_two_slots(comp))
|
||||
return false;
|
||||
|
||||
gpir_debug("post rsched lower prog\n");
|
||||
gpir_node_print_prog_seq(comp);
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,422 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
|
||||
#include "gpir.h"
|
||||
#include "lima_context.h"
|
||||
|
||||
|
||||
static inline void *gpir_node_create_ssa(gpir_block *block, gpir_op op, nir_ssa_def *ssa)
|
||||
{
|
||||
int index = ssa->index;
|
||||
gpir_node *node = gpir_node_create(block, op);
|
||||
|
||||
block->comp->var_nodes[index] = node;
|
||||
snprintf(node->name, sizeof(node->name), "ssa%d", index);
|
||||
list_addtail(&node->list, &block->node_list);
|
||||
return node;
|
||||
}
|
||||
|
||||
static inline void *gpir_node_create_reg(gpir_block *block, gpir_op op, nir_reg_dest *reg)
|
||||
{
|
||||
int index = reg->reg->index;
|
||||
gpir_node *node = gpir_node_create(block, op);
|
||||
gpir_store_node *store = gpir_node_create(block, gpir_op_store_reg);
|
||||
|
||||
snprintf(node->name, sizeof(node->name), "reg%d", index);
|
||||
|
||||
store->child = node;
|
||||
gpir_node_add_dep(&store->node, node, GPIR_DEP_INPUT);
|
||||
|
||||
list_for_each_entry(gpir_reg, reg, &block->comp->reg_list, list) {
|
||||
if (reg->index == index) {
|
||||
store->reg = reg;
|
||||
list_addtail(&store->reg_link, ®->defs_list);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
list_addtail(&node->list, &block->node_list);
|
||||
list_addtail(&store->node.list, &block->node_list);
|
||||
return node;
|
||||
}
|
||||
|
||||
static void *gpir_node_create_dest(gpir_block *block, gpir_op op, nir_dest *dest)
|
||||
{
|
||||
if (dest->is_ssa)
|
||||
return gpir_node_create_ssa(block, op, &dest->ssa);
|
||||
else
|
||||
return gpir_node_create_reg(block, op, &dest->reg);
|
||||
}
|
||||
|
||||
static gpir_node *gpir_node_find(gpir_block *block, gpir_node *succ, nir_src *src)
|
||||
{
|
||||
gpir_node *pred;
|
||||
|
||||
if (src->is_ssa) {
|
||||
pred = block->comp->var_nodes[src->ssa->index];
|
||||
assert(pred);
|
||||
}
|
||||
else {
|
||||
pred = gpir_node_create(block, gpir_op_load_reg);
|
||||
list_addtail(&pred->list, &succ->list);
|
||||
|
||||
gpir_load_node *load = gpir_node_to_load(pred);
|
||||
list_for_each_entry(gpir_reg, reg, &block->comp->reg_list, list) {
|
||||
if (reg->index == src->reg.reg->index) {
|
||||
load->reg = reg;
|
||||
list_addtail(&load->reg_link, ®->uses_list);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return pred;
|
||||
}
|
||||
|
||||
static int nir_to_gpir_opcodes[nir_num_opcodes] = {
|
||||
/* not supported */
|
||||
[0 ... nir_last_opcode] = -1,
|
||||
|
||||
[nir_op_fmul] = gpir_op_mul,
|
||||
[nir_op_fadd] = gpir_op_add,
|
||||
[nir_op_fneg] = gpir_op_neg,
|
||||
[nir_op_fnot] = gpir_op_not,
|
||||
[nir_op_fmin] = gpir_op_min,
|
||||
[nir_op_fmax] = gpir_op_max,
|
||||
[nir_op_frcp] = gpir_op_rcp,
|
||||
[nir_op_frsq] = gpir_op_rsqrt,
|
||||
[nir_op_slt] = gpir_op_lt,
|
||||
[nir_op_sge] = gpir_op_ge,
|
||||
[nir_op_bcsel] = gpir_op_select,
|
||||
[nir_op_ffloor] = gpir_op_floor,
|
||||
[nir_op_fsign] = gpir_op_sign,
|
||||
[nir_op_seq] = gpir_op_eq,
|
||||
[nir_op_sne] = gpir_op_ne,
|
||||
[nir_op_fand] = gpir_op_min,
|
||||
[nir_op_for] = gpir_op_max,
|
||||
[nir_op_fabs] = gpir_op_abs,
|
||||
};
|
||||
|
||||
static bool gpir_emit_alu(gpir_block *block, nir_instr *ni)
|
||||
{
|
||||
nir_alu_instr *instr = nir_instr_as_alu(ni);
|
||||
int op = nir_to_gpir_opcodes[instr->op];
|
||||
|
||||
if (op < 0) {
|
||||
gpir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
|
||||
return false;
|
||||
}
|
||||
|
||||
gpir_alu_node *node = gpir_node_create_dest(block, op, &instr->dest.dest);
|
||||
if (unlikely(!node))
|
||||
return false;
|
||||
|
||||
unsigned num_child = nir_op_infos[instr->op].num_inputs;
|
||||
assert(num_child <= ARRAY_SIZE(node->children));
|
||||
node->num_child = num_child;
|
||||
|
||||
for (int i = 0; i < num_child; i++) {
|
||||
nir_alu_src *src = instr->src + i;
|
||||
node->children_negate[i] = src->negate;
|
||||
|
||||
gpir_node *child = gpir_node_find(block, &node->node, &src->src);
|
||||
node->children[i] = child;
|
||||
|
||||
gpir_node_add_dep(&node->node, child, GPIR_DEP_INPUT);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool gpir_emit_intrinsic(gpir_block *block, nir_instr *ni)
|
||||
{
|
||||
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_input:
|
||||
{
|
||||
gpir_load_node *load =
|
||||
gpir_node_create_dest(block, gpir_op_load_attribute, &instr->dest);
|
||||
if (unlikely(!load))
|
||||
return false;
|
||||
|
||||
load->index = nir_intrinsic_base(instr);
|
||||
load->component = nir_intrinsic_component(instr);
|
||||
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_load_uniform:
|
||||
{
|
||||
gpir_load_node *load =
|
||||
gpir_node_create_dest(block, gpir_op_load_uniform, &instr->dest);
|
||||
if (unlikely(!load))
|
||||
return false;
|
||||
|
||||
int offset = nir_intrinsic_base(instr);
|
||||
|
||||
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
|
||||
assert(const_offset);
|
||||
offset += (int)const_offset->f32[0];
|
||||
|
||||
load->index = offset / 4;
|
||||
load->component = offset % 4;
|
||||
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_store_output:
|
||||
{
|
||||
gpir_store_node *store = gpir_node_create(block, gpir_op_store_varying);
|
||||
if (unlikely(!store))
|
||||
return false;
|
||||
list_addtail(&store->node.list, &block->node_list);
|
||||
|
||||
store->index = nir_intrinsic_base(instr);
|
||||
store->component = nir_intrinsic_component(instr);
|
||||
|
||||
gpir_node *child = gpir_node_find(block, &store->node, instr->src);
|
||||
store->child = child;
|
||||
gpir_node_add_dep(&store->node, child, GPIR_DEP_INPUT);
|
||||
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
gpir_error("unsupported nir_intrinsic_instr %d\n", instr->intrinsic);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool gpir_emit_load_const(gpir_block *block, nir_instr *ni)
|
||||
{
|
||||
nir_load_const_instr *instr = nir_instr_as_load_const(ni);
|
||||
gpir_const_node *node =
|
||||
gpir_node_create_ssa(block, gpir_op_const, &instr->def);
|
||||
if (unlikely(!node))
|
||||
return false;
|
||||
|
||||
assert(instr->def.bit_size == 32);
|
||||
assert(instr->def.num_components == 1);
|
||||
|
||||
node->value.i = instr->value.i32[0];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool gpir_emit_ssa_undef(gpir_block *block, nir_instr *ni)
|
||||
{
|
||||
gpir_error("nir_ssa_undef_instr not support\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool gpir_emit_tex(gpir_block *block, nir_instr *ni)
|
||||
{
|
||||
gpir_error("nir_jump_instr not support\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool gpir_emit_jump(gpir_block *block, nir_instr *ni)
|
||||
{
|
||||
gpir_error("nir_jump_instr not support\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool (*gpir_emit_instr[nir_instr_type_phi])(gpir_block *, nir_instr *) = {
|
||||
[nir_instr_type_alu] = gpir_emit_alu,
|
||||
[nir_instr_type_intrinsic] = gpir_emit_intrinsic,
|
||||
[nir_instr_type_load_const] = gpir_emit_load_const,
|
||||
[nir_instr_type_ssa_undef] = gpir_emit_ssa_undef,
|
||||
[nir_instr_type_tex] = gpir_emit_tex,
|
||||
[nir_instr_type_jump] = gpir_emit_jump,
|
||||
};
|
||||
|
||||
static gpir_block *gpir_block_create(gpir_compiler *comp)
|
||||
{
|
||||
gpir_block *block = ralloc(comp, gpir_block);
|
||||
if (!block)
|
||||
return NULL;
|
||||
|
||||
list_inithead(&block->node_list);
|
||||
list_inithead(&block->instr_list);
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
static bool gpir_emit_block(gpir_compiler *comp, nir_block *nblock)
|
||||
{
|
||||
gpir_block *block = gpir_block_create(comp);
|
||||
if (!block)
|
||||
return false;
|
||||
|
||||
list_addtail(&block->list, &comp->block_list);
|
||||
block->comp = comp;
|
||||
|
||||
nir_foreach_instr(instr, nblock) {
|
||||
assert(instr->type < nir_instr_type_phi);
|
||||
if (!gpir_emit_instr[instr->type](block, instr))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool gpir_emit_if(gpir_compiler *comp, nir_if *nif)
|
||||
{
|
||||
gpir_error("if nir_cf_node not support\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool gpir_emit_loop(gpir_compiler *comp, nir_loop *nloop)
|
||||
{
|
||||
gpir_error("loop nir_cf_node not support\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool gpir_emit_function(gpir_compiler *comp, nir_function_impl *nfunc)
|
||||
{
|
||||
gpir_error("function nir_cf_node not support\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool gpir_emit_cf_list(gpir_compiler *comp, struct exec_list *list)
|
||||
{
|
||||
foreach_list_typed(nir_cf_node, node, node, list) {
|
||||
bool ret;
|
||||
|
||||
switch (node->type) {
|
||||
case nir_cf_node_block:
|
||||
ret = gpir_emit_block(comp, nir_cf_node_as_block(node));
|
||||
break;
|
||||
case nir_cf_node_if:
|
||||
ret = gpir_emit_if(comp, nir_cf_node_as_if(node));
|
||||
break;
|
||||
case nir_cf_node_loop:
|
||||
ret = gpir_emit_loop(comp, nir_cf_node_as_loop(node));
|
||||
break;
|
||||
case nir_cf_node_function:
|
||||
ret = gpir_emit_function(comp, nir_cf_node_as_function(node));
|
||||
break;
|
||||
default:
|
||||
gpir_error("unknown NIR node type %d\n", node->type);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
gpir_reg *gpir_create_reg(gpir_compiler *comp)
|
||||
{
|
||||
gpir_reg *reg = ralloc(comp, gpir_reg);
|
||||
reg->index = comp->cur_reg++;
|
||||
list_addtail(®->list, &comp->reg_list);
|
||||
list_inithead(®->defs_list);
|
||||
list_inithead(®->uses_list);
|
||||
return reg;
|
||||
}
|
||||
|
||||
static gpir_compiler *gpir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
|
||||
{
|
||||
gpir_compiler *comp = rzalloc(prog, gpir_compiler);
|
||||
|
||||
list_inithead(&comp->block_list);
|
||||
list_inithead(&comp->reg_list);
|
||||
|
||||
for (int i = 0; i < num_reg; i++)
|
||||
gpir_create_reg(comp);
|
||||
|
||||
comp->var_nodes = rzalloc_array(comp, gpir_node *, num_ssa);
|
||||
comp->prog = prog;
|
||||
return comp;
|
||||
}
|
||||
|
||||
static int gpir_glsl_type_size(enum glsl_base_type type)
|
||||
{
|
||||
/* only support GLSL_TYPE_FLOAT */
|
||||
assert(type == GLSL_TYPE_FLOAT);
|
||||
return 4;
|
||||
}
|
||||
|
||||
bool gpir_compile_nir(struct lima_vs_shader_state *prog, struct nir_shader *nir)
|
||||
{
|
||||
nir_function_impl *func = nir_shader_get_entrypoint(nir);
|
||||
gpir_compiler *comp = gpir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
|
||||
if (!comp)
|
||||
return false;
|
||||
|
||||
comp->constant_base = nir->num_uniforms;
|
||||
prog->uniform_pending_offset = nir->num_uniforms * 16;
|
||||
|
||||
if (!gpir_emit_cf_list(comp, &func->body))
|
||||
goto err_out0;
|
||||
|
||||
gpir_node_print_prog_seq(comp);
|
||||
gpir_node_print_prog_dep(comp);
|
||||
|
||||
if (!gpir_pre_rsched_lower_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!gpir_reduce_reg_pressure_schedule_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!gpir_post_rsched_lower_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!gpir_value_regalloc_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!gpir_physical_regalloc_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!gpir_schedule_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!gpir_codegen_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
nir_foreach_variable(var, &nir->outputs) {
|
||||
if (var->data.location == VARYING_SLOT_POS)
|
||||
assert(var->data.driver_location == 0);
|
||||
|
||||
struct lima_varying_info *v = prog->varying + var->data.driver_location;
|
||||
if (!v->components) {
|
||||
v->component_size = gpir_glsl_type_size(glsl_get_base_type(var->type));
|
||||
prog->num_varying++;
|
||||
}
|
||||
|
||||
v->components += glsl_get_components(var->type);
|
||||
}
|
||||
|
||||
ralloc_free(comp);
|
||||
return true;
|
||||
|
||||
err_out0:
|
||||
ralloc_free(comp);
|
||||
return false;
|
||||
}
|
||||
|
|
@ -0,0 +1,492 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/u_math.h"
|
||||
#include "util/ralloc.h"
|
||||
|
||||
#include "gpir.h"
|
||||
|
||||
const gpir_op_info gpir_op_infos[] = {
|
||||
[gpir_op_mov] = {
|
||||
.name = "mov",
|
||||
.slots = (int []) {
|
||||
GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_MUL1,
|
||||
GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_MUL0,
|
||||
GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_COMPLEX,
|
||||
GPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[gpir_op_mul] = {
|
||||
.name = "mul",
|
||||
.dest_neg = true,
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_MUL1, GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
|
||||
},
|
||||
[gpir_op_select] = {
|
||||
.name = "select",
|
||||
.dest_neg = true,
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
|
||||
.may_consume_two_slots = true,
|
||||
},
|
||||
[gpir_op_complex1] = {
|
||||
.name = "complex1",
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
|
||||
.spillless = true,
|
||||
.may_consume_two_slots = true,
|
||||
},
|
||||
[gpir_op_complex2] = {
|
||||
.name = "complex2",
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
|
||||
.spillless = true,
|
||||
},
|
||||
[gpir_op_add] = {
|
||||
.name = "add",
|
||||
.src_neg = {true, true, false, false},
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
|
||||
},
|
||||
[gpir_op_floor] = {
|
||||
.name = "floor",
|
||||
.src_neg = {true, false, false, false},
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
|
||||
},
|
||||
[gpir_op_sign] = {
|
||||
.name = "sign",
|
||||
.src_neg = {true, false, false, false},
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
|
||||
},
|
||||
[gpir_op_ge] = {
|
||||
.name = "ge",
|
||||
.src_neg = {true, true, false, false},
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
|
||||
},
|
||||
[gpir_op_lt] = {
|
||||
.name = "lt",
|
||||
.src_neg = {true, true, false, false},
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
|
||||
},
|
||||
[gpir_op_min] = {
|
||||
.name = "min",
|
||||
.src_neg = {true, true, false, false},
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
|
||||
.spillless = true,
|
||||
.may_consume_two_slots = true,
|
||||
},
|
||||
[gpir_op_max] = {
|
||||
.name = "max",
|
||||
.src_neg = {true, true, false, false},
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
|
||||
.spillless = true,
|
||||
.may_consume_two_slots = true,
|
||||
},
|
||||
[gpir_op_abs] = {
|
||||
.name = "abs",
|
||||
.src_neg = {true, true, false, false},
|
||||
},
|
||||
[gpir_op_neg] = {
|
||||
.name = "neg",
|
||||
.slots = (int []) {
|
||||
GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_MUL1,
|
||||
GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_MUL0,
|
||||
GPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[gpir_op_not] = {
|
||||
.name = "not",
|
||||
.src_neg = {true, true, false, false},
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
|
||||
},
|
||||
[gpir_op_eq] = {
|
||||
.name = "eq",
|
||||
.slots = (int []) {
|
||||
GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[gpir_op_ne] = {
|
||||
.name = "ne",
|
||||
.slots = (int []) {
|
||||
GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[gpir_op_clamp_const] = {
|
||||
.name = "clamp_const",
|
||||
},
|
||||
[gpir_op_preexp2] = {
|
||||
.name = "preexp2",
|
||||
},
|
||||
[gpir_op_postlog2] = {
|
||||
.name = "postlog2",
|
||||
},
|
||||
[gpir_op_exp2_impl] = {
|
||||
.name = "exp2_impl",
|
||||
},
|
||||
[gpir_op_log2_impl] = {
|
||||
.name = "log2_impl",
|
||||
},
|
||||
[gpir_op_rcp_impl] = {
|
||||
.name = "rcp_impl",
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
|
||||
.spillless = true,
|
||||
},
|
||||
[gpir_op_rsqrt_impl] = {
|
||||
.name = "rsqrt_impl",
|
||||
.slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
|
||||
.spillless = true,
|
||||
},
|
||||
[gpir_op_load_uniform] = {
|
||||
.name = "ld_uni",
|
||||
.slots = (int []) {
|
||||
GPIR_INSTR_SLOT_MEM_LOAD0, GPIR_INSTR_SLOT_MEM_LOAD1,
|
||||
GPIR_INSTR_SLOT_MEM_LOAD2, GPIR_INSTR_SLOT_MEM_LOAD3,
|
||||
GPIR_INSTR_SLOT_END
|
||||
},
|
||||
.type = gpir_node_type_load,
|
||||
},
|
||||
[gpir_op_load_temp] = {
|
||||
.name = "ld_tmp",
|
||||
.type = gpir_node_type_load,
|
||||
},
|
||||
[gpir_op_load_attribute] = {
|
||||
.name = "ld_att",
|
||||
.slots = (int []) {
|
||||
GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1,
|
||||
GPIR_INSTR_SLOT_REG0_LOAD2, GPIR_INSTR_SLOT_REG0_LOAD3,
|
||||
GPIR_INSTR_SLOT_END
|
||||
},
|
||||
.type = gpir_node_type_load,
|
||||
},
|
||||
[gpir_op_load_reg] = {
|
||||
.name = "ld_reg",
|
||||
.slots = (int []) {
|
||||
GPIR_INSTR_SLOT_REG1_LOAD0, GPIR_INSTR_SLOT_REG1_LOAD1,
|
||||
GPIR_INSTR_SLOT_REG1_LOAD2, GPIR_INSTR_SLOT_REG1_LOAD3,
|
||||
GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1,
|
||||
GPIR_INSTR_SLOT_REG0_LOAD2, GPIR_INSTR_SLOT_REG0_LOAD3,
|
||||
GPIR_INSTR_SLOT_END
|
||||
},
|
||||
.type = gpir_node_type_load,
|
||||
.spillless = true,
|
||||
},
|
||||
[gpir_op_store_temp] = {
|
||||
.name = "st_tmp",
|
||||
.type = gpir_node_type_store,
|
||||
},
|
||||
[gpir_op_store_reg] = {
|
||||
.name = "st_reg",
|
||||
.slots = (int []) {
|
||||
GPIR_INSTR_SLOT_STORE0, GPIR_INSTR_SLOT_STORE1,
|
||||
GPIR_INSTR_SLOT_STORE2, GPIR_INSTR_SLOT_STORE3,
|
||||
GPIR_INSTR_SLOT_END
|
||||
},
|
||||
.type = gpir_node_type_store,
|
||||
.spillless = true,
|
||||
},
|
||||
[gpir_op_store_varying] = {
|
||||
.name = "st_var",
|
||||
.slots = (int []) {
|
||||
GPIR_INSTR_SLOT_STORE0, GPIR_INSTR_SLOT_STORE1,
|
||||
GPIR_INSTR_SLOT_STORE2, GPIR_INSTR_SLOT_STORE3,
|
||||
GPIR_INSTR_SLOT_END
|
||||
},
|
||||
.type = gpir_node_type_store,
|
||||
.spillless = true,
|
||||
},
|
||||
[gpir_op_store_temp_load_off0] = {
|
||||
.name = "st_of0",
|
||||
.type = gpir_node_type_store,
|
||||
},
|
||||
[gpir_op_store_temp_load_off1] = {
|
||||
.name = "st_of1",
|
||||
.type = gpir_node_type_store,
|
||||
},
|
||||
[gpir_op_store_temp_load_off2] = {
|
||||
.name = "st_of2",
|
||||
.type = gpir_node_type_store,
|
||||
},
|
||||
[gpir_op_branch_cond] = {
|
||||
.name = "branch_cond",
|
||||
.type = gpir_node_type_branch,
|
||||
},
|
||||
[gpir_op_const] = {
|
||||
.name = "const",
|
||||
.type = gpir_node_type_const,
|
||||
},
|
||||
[gpir_op_exp2] = {
|
||||
.name = "exp2",
|
||||
},
|
||||
[gpir_op_log2] = {
|
||||
.name = "log2",
|
||||
},
|
||||
[gpir_op_rcp] = {
|
||||
.name = "rcp",
|
||||
},
|
||||
[gpir_op_rsqrt] = {
|
||||
.name = "rsqrt",
|
||||
},
|
||||
[gpir_op_ceil] = {
|
||||
.name = "ceil",
|
||||
},
|
||||
[gpir_op_exp] = {
|
||||
.name = "exp",
|
||||
},
|
||||
[gpir_op_log] = {
|
||||
.name = "log",
|
||||
},
|
||||
[gpir_op_sin] = {
|
||||
.name = "sin",
|
||||
},
|
||||
[gpir_op_cos] = {
|
||||
.name = "cos",
|
||||
},
|
||||
[gpir_op_tan] = {
|
||||
.name = "tan",
|
||||
},
|
||||
[gpir_op_dummy_f] = {
|
||||
.name = "dummy_f",
|
||||
.type = gpir_node_type_alu,
|
||||
.spillless = true,
|
||||
},
|
||||
[gpir_op_dummy_m] = {
|
||||
.name = "dummy_m",
|
||||
.type = gpir_node_type_alu,
|
||||
},
|
||||
[gpir_op_branch_uncond] = {
|
||||
.name = "branch_uncond",
|
||||
.type = gpir_node_type_branch,
|
||||
},
|
||||
};
|
||||
|
||||
void *gpir_node_create(gpir_block *block, gpir_op op)
|
||||
{
|
||||
static const int node_size[] = {
|
||||
[gpir_node_type_alu] = sizeof(gpir_alu_node),
|
||||
[gpir_node_type_const] = sizeof(gpir_const_node),
|
||||
[gpir_node_type_load] = sizeof(gpir_load_node),
|
||||
[gpir_node_type_store] = sizeof(gpir_store_node),
|
||||
[gpir_node_type_branch] = sizeof(gpir_branch_node),
|
||||
};
|
||||
|
||||
gpir_node_type type = gpir_op_infos[op].type;
|
||||
int size = node_size[type];
|
||||
gpir_node *node = rzalloc_size(block, size);
|
||||
if (unlikely(!node))
|
||||
return NULL;
|
||||
|
||||
snprintf(node->name, sizeof(node->name), "new");
|
||||
|
||||
list_inithead(&node->succ_list);
|
||||
list_inithead(&node->pred_list);
|
||||
|
||||
node->op = op;
|
||||
node->type = type;
|
||||
node->index = block->comp->cur_index++;
|
||||
node->block = block;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type)
|
||||
{
|
||||
/* don't add dep for two nodes from different block */
|
||||
if (succ->block != pred->block)
|
||||
return NULL;
|
||||
|
||||
/* don't add self loop dep */
|
||||
if (succ == pred)
|
||||
return NULL;
|
||||
|
||||
/* don't add duplicated dep */
|
||||
gpir_node_foreach_pred(succ, dep) {
|
||||
if (dep->pred == pred) {
|
||||
/* use stronger dependency */
|
||||
if (dep->type > type)
|
||||
dep->type = type;
|
||||
return dep;
|
||||
}
|
||||
}
|
||||
|
||||
gpir_dep *dep = ralloc(succ, gpir_dep);
|
||||
dep->type = type;
|
||||
dep->pred = pred;
|
||||
dep->succ = succ;
|
||||
list_addtail(&dep->pred_link, &succ->pred_list);
|
||||
list_addtail(&dep->succ_link, &pred->succ_list);
|
||||
return dep;
|
||||
}
|
||||
|
||||
void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred)
|
||||
{
|
||||
gpir_node_foreach_pred(succ, dep) {
|
||||
if (dep->pred == pred) {
|
||||
list_del(&dep->succ_link);
|
||||
list_del(&dep->pred_link);
|
||||
ralloc_free(dep);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child,
|
||||
gpir_node *new_child)
|
||||
{
|
||||
if (parent->type == gpir_node_type_alu) {
|
||||
gpir_alu_node *alu = gpir_node_to_alu(parent);
|
||||
for (int i = 0; i < alu->num_child; i++) {
|
||||
if (alu->children[i] == old_child)
|
||||
alu->children[i] = new_child;
|
||||
}
|
||||
}
|
||||
else if (parent->type == gpir_node_type_store) {
|
||||
gpir_store_node *store = gpir_node_to_store(parent);
|
||||
if (store->child == old_child)
|
||||
store->child = new_child;
|
||||
}
|
||||
}
|
||||
|
||||
void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred)
|
||||
{
|
||||
list_del(&dep->succ_link);
|
||||
dep->pred = new_pred;
|
||||
list_addtail(&dep->succ_link, &new_pred->succ_list);
|
||||
}
|
||||
|
||||
void gpir_node_replace_succ(gpir_node *dst, gpir_node *src)
|
||||
{
|
||||
gpir_node_foreach_succ_safe(src, dep) {
|
||||
if (dep->type != GPIR_DEP_INPUT)
|
||||
continue;
|
||||
|
||||
gpir_node_replace_pred(dep, dst);
|
||||
gpir_node_replace_child(dep->succ, src, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void gpir_node_insert_child(gpir_node *parent, gpir_node *child,
|
||||
gpir_node *insert_child)
|
||||
{
|
||||
gpir_node_foreach_pred(parent, dep) {
|
||||
if (dep->pred == child) {
|
||||
gpir_node_replace_pred(dep, insert_child);
|
||||
break;
|
||||
}
|
||||
}
|
||||
gpir_node_add_dep(insert_child, child, GPIR_DEP_INPUT);
|
||||
}
|
||||
|
||||
void gpir_node_delete(gpir_node *node)
|
||||
{
|
||||
gpir_node_foreach_succ_safe(node, dep) {
|
||||
list_del(&dep->succ_link);
|
||||
list_del(&dep->pred_link);
|
||||
ralloc_free(dep);
|
||||
}
|
||||
|
||||
gpir_node_foreach_pred_safe(node, dep) {
|
||||
list_del(&dep->succ_link);
|
||||
list_del(&dep->pred_link);
|
||||
ralloc_free(dep);
|
||||
}
|
||||
|
||||
if (node->type == gpir_node_type_store) {
|
||||
gpir_store_node *store = gpir_node_to_store(node);
|
||||
if (store->reg)
|
||||
list_del(&store->reg_link);
|
||||
}
|
||||
else if (node->type == gpir_node_type_load) {
|
||||
gpir_load_node *load = gpir_node_to_load(node);
|
||||
if (load->reg)
|
||||
list_del(&load->reg_link);
|
||||
}
|
||||
|
||||
list_del(&node->list);
|
||||
ralloc_free(node);
|
||||
}
|
||||
|
||||
static void gpir_node_print_node(gpir_node *node, int type, int space)
|
||||
{
|
||||
static char *dep_name[] = {
|
||||
[GPIR_DEP_INPUT] = "input",
|
||||
[GPIR_DEP_OFFSET] = "offset",
|
||||
[GPIR_DEP_READ_AFTER_WRITE] = "RaW",
|
||||
[GPIR_DEP_WRITE_AFTER_READ] = "WaR",
|
||||
[GPIR_DEP_VREG_READ_AFTER_WRITE] = "vRaW",
|
||||
[GPIR_DEP_VREG_WRITE_AFTER_READ] = "vWaR",
|
||||
};
|
||||
|
||||
for (int i = 0; i < space; i++)
|
||||
printf(" ");
|
||||
printf("%s%s %d %s %s\n", node->printed && !gpir_node_is_leaf(node) ? "+" : "",
|
||||
gpir_op_infos[node->op].name, node->index, node->name, dep_name[type]);
|
||||
|
||||
if (!node->printed) {
|
||||
gpir_node_foreach_pred(node, dep) {
|
||||
gpir_node_print_node(dep->pred, dep->type, space + 2);
|
||||
}
|
||||
|
||||
node->printed = true;
|
||||
}
|
||||
}
|
||||
|
||||
void gpir_node_print_prog_dep(gpir_compiler *comp)
|
||||
{
|
||||
if (!(lima_debug & LIMA_DEBUG_GP))
|
||||
return;
|
||||
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
node->printed = false;
|
||||
}
|
||||
}
|
||||
|
||||
printf("======== node prog dep ========\n");
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
if (gpir_node_is_root(node))
|
||||
gpir_node_print_node(node, GPIR_DEP_INPUT, 0);
|
||||
}
|
||||
printf("----------------------------\n");
|
||||
}
|
||||
}
|
||||
|
||||
void gpir_node_print_prog_seq(gpir_compiler *comp)
|
||||
{
|
||||
if (!(lima_debug & LIMA_DEBUG_GP))
|
||||
return;
|
||||
|
||||
int index = 0;
|
||||
printf("======== node prog seq ========\n");
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
printf("%03d: %s %d %s pred", index++, gpir_op_infos[node->op].name,
|
||||
node->index, node->name);
|
||||
gpir_node_foreach_pred(node, dep) {
|
||||
printf(" %d", dep->pred->index);
|
||||
}
|
||||
printf(" succ");
|
||||
gpir_node_foreach_succ(node, dep) {
|
||||
printf(" %d", dep->succ->index);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("----------------------------\n");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "gpir.h"
|
||||
|
||||
/* Linear scan register alloc for physical reg alloc of each
|
||||
* load/store node
|
||||
*/
|
||||
|
||||
static void regalloc_print_result(gpir_compiler *comp)
|
||||
{
|
||||
if (!(lima_debug & LIMA_DEBUG_GP))
|
||||
return;
|
||||
|
||||
int index = 0;
|
||||
printf("======== physical regalloc ========\n");
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
if (node->op == gpir_op_load_reg) {
|
||||
gpir_load_node *load = gpir_node_to_load(node);
|
||||
printf("%03d: load %d use reg %d\n", index, node->index, load->reg->index);
|
||||
}
|
||||
else if (node->op == gpir_op_store_reg) {
|
||||
gpir_store_node *store = gpir_node_to_store(node);
|
||||
printf("%03d: store %d use reg %d\n", index, node->index, store->reg->index);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
printf("----------------------------\n");
|
||||
}
|
||||
}
|
||||
|
||||
bool gpir_physical_regalloc_prog(gpir_compiler *comp)
|
||||
{
|
||||
int index = 0;
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
node->preg.index = index++;
|
||||
}
|
||||
}
|
||||
|
||||
/* calculate each reg liveness interval */
|
||||
list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) {
|
||||
reg->start = INT_MAX;
|
||||
list_for_each_entry(gpir_store_node, store, ®->defs_list, reg_link) {
|
||||
if (store->node.preg.index < reg->start)
|
||||
reg->start = store->node.preg.index;
|
||||
}
|
||||
|
||||
reg->end = 0;
|
||||
list_for_each_entry(gpir_load_node, load, ®->uses_list, reg_link) {
|
||||
if (load->node.preg.index > reg->end)
|
||||
reg->end = load->node.preg.index;
|
||||
}
|
||||
}
|
||||
|
||||
/* sort reg list by start value */
|
||||
struct list_head reg_list;
|
||||
list_replace(&comp->reg_list, ®_list);
|
||||
list_inithead(&comp->reg_list);
|
||||
list_for_each_entry_safe(gpir_reg, reg, ®_list, list) {
|
||||
struct list_head *insert_pos = &comp->reg_list;
|
||||
list_for_each_entry(gpir_reg, creg, &comp->reg_list, list) {
|
||||
if (creg->start > reg->start) {
|
||||
insert_pos = &creg->list;
|
||||
break;
|
||||
}
|
||||
}
|
||||
list_del(®->list);
|
||||
list_addtail(®->list, insert_pos);
|
||||
}
|
||||
|
||||
/* do linear scan reg alloc */
|
||||
gpir_reg *active[GPIR_PHYSICAL_REG_NUM] = {0};
|
||||
list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) {
|
||||
int i;
|
||||
|
||||
/* if some reg is expired */
|
||||
for (i = 0; i < GPIR_PHYSICAL_REG_NUM; i++) {
|
||||
if (active[i] && active[i]->end <= reg->start)
|
||||
active[i] = NULL;
|
||||
}
|
||||
|
||||
/* find a free reg value for this reg */
|
||||
for (i = 0; i < GPIR_PHYSICAL_REG_NUM; i++) {
|
||||
if (!active[i]) {
|
||||
active[i] = reg;
|
||||
reg->index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: support spill to temp memory */
|
||||
assert(i < GPIR_PHYSICAL_REG_NUM);
|
||||
}
|
||||
|
||||
/* update load/store node info for the real reg */
|
||||
list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) {
|
||||
list_for_each_entry(gpir_store_node, store, ®->defs_list, reg_link) {
|
||||
store->index = reg->index >> 2;
|
||||
store->component = reg->index % 4;
|
||||
}
|
||||
|
||||
list_for_each_entry(gpir_load_node, load, ®->uses_list, reg_link) {
|
||||
load->index = reg->index >> 2;
|
||||
load->index = reg->index % 4;
|
||||
}
|
||||
}
|
||||
|
||||
regalloc_print_result(comp);
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,220 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "gpir.h"
|
||||
|
||||
/* Register sensitive schedule algorithm from paper:
|
||||
* "Register-Sensitive Selection, Duplication, and Sequencing of Instructions"
|
||||
* Author: Vivek Sarkar, Mauricio J. Serrano, Barbara B. Simons
|
||||
*/
|
||||
|
||||
static void schedule_calc_sched_info(gpir_node *node)
|
||||
{
|
||||
int n = 0;
|
||||
float extra_reg = 1.0f;
|
||||
|
||||
/* update all children's sched info */
|
||||
gpir_node_foreach_pred(node, dep) {
|
||||
gpir_node *pred = dep->pred;
|
||||
|
||||
if (pred->rsched.reg_pressure < 0)
|
||||
schedule_calc_sched_info(pred);
|
||||
|
||||
int est = pred->rsched.est + 1;
|
||||
if (node->rsched.est < est)
|
||||
node->rsched.est = est;
|
||||
|
||||
float reg_weight = 1.0f - 1.0f / list_length(&pred->succ_list);
|
||||
if (extra_reg > reg_weight)
|
||||
extra_reg = reg_weight;
|
||||
|
||||
n++;
|
||||
}
|
||||
|
||||
/* leaf instr */
|
||||
if (!n) {
|
||||
node->rsched.reg_pressure = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
float reg[n];
|
||||
gpir_node_foreach_pred(node, dep) {
|
||||
gpir_node *pred = dep->pred;
|
||||
reg[i++] = pred->rsched.reg_pressure;
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = 0; i < n - 1; i++) {
|
||||
for (int j = 0; j < n - i - 1; j++) {
|
||||
if (reg[j] > reg[j + 1]) {
|
||||
float tmp = reg[j + 1];
|
||||
reg[j + 1] = reg[j];
|
||||
reg[j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
float pressure = reg[i] + n - (i + 1);
|
||||
if (pressure > node->rsched.reg_pressure)
|
||||
node->rsched.reg_pressure = pressure;
|
||||
}
|
||||
|
||||
/* If all children of this node have multi parents, then this
|
||||
* node need an extra reg to store its result. For example,
|
||||
* it's not fair for parent has the same reg pressure as child
|
||||
* if n==1 and child's successor>1, because we need 2 reg for
|
||||
* this.
|
||||
*
|
||||
* But we can't add a full reg to the reg_pressure, because the
|
||||
* last parent of a multi-successor child doesn't need an extra
|
||||
* reg. For example, a single child (with multi successor) node
|
||||
* should has less reg pressure than a two children (with single
|
||||
* successor) instr.
|
||||
*
|
||||
* extra reg = min(all child)(1.0 - 1.0 / num successor)
|
||||
*/
|
||||
node->rsched.reg_pressure += extra_reg;
|
||||
}
|
||||
|
||||
static void schedule_insert_ready_list(struct list_head *ready_list,
|
||||
gpir_node *insert_node)
|
||||
{
|
||||
struct list_head *insert_pos = ready_list;
|
||||
|
||||
list_for_each_entry(gpir_node, node, ready_list, list) {
|
||||
if (insert_node->rsched.parent_index < node->rsched.parent_index ||
|
||||
(insert_node->rsched.parent_index == node->rsched.parent_index &&
|
||||
(insert_node->rsched.reg_pressure < node->rsched.reg_pressure ||
|
||||
(insert_node->rsched.reg_pressure == node->rsched.reg_pressure &&
|
||||
(insert_node->rsched.est >= node->rsched.est))))) {
|
||||
insert_pos = &node->list;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
list_del(&insert_node->list);
|
||||
list_addtail(&insert_node->list, insert_pos);
|
||||
}
|
||||
|
||||
static void schedule_ready_list(gpir_block *block, struct list_head *ready_list)
|
||||
{
|
||||
if (list_empty(ready_list))
|
||||
return;
|
||||
|
||||
gpir_node *node = list_first_entry(ready_list, gpir_node, list);
|
||||
list_del(&node->list);
|
||||
|
||||
/* schedule the node to the block node list */
|
||||
list_add(&node->list, &block->node_list);
|
||||
node->rsched.scheduled = true;
|
||||
block->rsched.node_index--;
|
||||
|
||||
gpir_node_foreach_pred(node, dep) {
|
||||
gpir_node *pred = dep->pred;
|
||||
pred->rsched.parent_index = block->rsched.node_index;
|
||||
|
||||
bool ready = true;
|
||||
gpir_node_foreach_succ(pred, dep) {
|
||||
gpir_node *succ = dep->succ;
|
||||
if (!succ->rsched.scheduled) {
|
||||
ready = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* all successor have been scheduled */
|
||||
if (ready)
|
||||
schedule_insert_ready_list(ready_list, pred);
|
||||
}
|
||||
|
||||
schedule_ready_list(block, ready_list);
|
||||
}
|
||||
|
||||
static void schedule_block(gpir_block *block)
|
||||
{
|
||||
/* move all nodes to node_list, block->node_list will
|
||||
* contain schedule result */
|
||||
struct list_head node_list;
|
||||
list_replace(&block->node_list, &node_list);
|
||||
list_inithead(&block->node_list);
|
||||
|
||||
/* step 2 & 3 */
|
||||
list_for_each_entry(gpir_node, node, &node_list, list) {
|
||||
if (gpir_node_is_root(node))
|
||||
schedule_calc_sched_info(node);
|
||||
block->rsched.node_index++;
|
||||
}
|
||||
|
||||
/* step 4 */
|
||||
struct list_head ready_list;
|
||||
list_inithead(&ready_list);
|
||||
|
||||
/* step 5 */
|
||||
list_for_each_entry_safe(gpir_node, node, &node_list, list) {
|
||||
if (gpir_node_is_root(node)) {
|
||||
node->rsched.parent_index = INT_MAX;
|
||||
schedule_insert_ready_list(&ready_list, node);
|
||||
}
|
||||
}
|
||||
|
||||
/* step 6 */
|
||||
schedule_ready_list(block, &ready_list);
|
||||
}
|
||||
|
||||
bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp)
|
||||
{
|
||||
/* No need to build physical reg load/store dependency here,
|
||||
* because we just exit SSA form, there should be at most
|
||||
* one load and one store pair for a physical reg within a
|
||||
* block, and the store must be after load with the output
|
||||
* of load as input after some calculation. So we don't need to
|
||||
* insert extra write-after-read or read-after-write dependecy
|
||||
* for load/store nodes to maintain the right sequence before
|
||||
* scheduling.
|
||||
*
|
||||
* Also no need to handle SSA def/use in difference block,
|
||||
* because we'll load/store SSA to a physical reg if def/use
|
||||
* are not in the same block.
|
||||
*/
|
||||
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
block->rsched.node_index = 0;
|
||||
list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
|
||||
node->rsched.reg_pressure = -1;
|
||||
node->rsched.est = 0;
|
||||
node->rsched.scheduled = false;
|
||||
}
|
||||
}
|
||||
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
schedule_block(block);
|
||||
}
|
||||
|
||||
gpir_debug("after reduce scheduler\n");
|
||||
gpir_node_print_prog_seq(comp);
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,809 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "gpir.h"
|
||||
|
||||
/*
|
||||
* GP schedule algorithm (by Connor Abbott <cwabbott0@gmail.com>)
|
||||
*
|
||||
* Pre schedule phase:
|
||||
* 1. order all nodes in a sequence
|
||||
* 2. convert the real reg read/write to GP load/store node, now all
|
||||
* variable is SSA
|
||||
* 3. do reg alloc for all SSA with 11 reg (value reg) and spill with
|
||||
* load/store to real reg if needed
|
||||
* 4. add fake dependency like this:
|
||||
* after step 3, node sequence is
|
||||
* 01: r1=r2+r3
|
||||
* 02: r4=r1+r2
|
||||
* 03: r1=r5+r6
|
||||
* we should add a fake dependency of node 3 to node 2 like a
|
||||
* write-after-read dep. But this is not really write-after-read
|
||||
* dep because there's no r1 really, because it's a value register.
|
||||
* We need this fake dep in the schedule phase to make sure in any
|
||||
* schedule point, there're only <=11 input needed by the past
|
||||
* scheduled nodes.
|
||||
* 5. build DAG according to all the real and fake dep
|
||||
*
|
||||
* Schedule phase:
|
||||
* 1. Compute the nodes ready to schedule, if no nodes, exit
|
||||
* 2. Create a new GP instruction, and call it as current instr
|
||||
* 3. For any nodes with a use 2 cycles ago with a definition ready to
|
||||
* schedule, schedule that definition immediately if possible, or else
|
||||
* schedule a move.
|
||||
* 4. For any nodes with a use 2 cycles ago but the definition not
|
||||
* scheduled and not ready to schedule, schedule a move immediately
|
||||
* to prevent the value from falling off the queue.
|
||||
* 5. Calculate the number of remaining nodes with a use 1 cycle ago but
|
||||
* the definition not yet scheduled, and if there are more than 5,
|
||||
* schedule moves or definitions for the rest now.
|
||||
* 6. Schedule the rest of the available nodes using your favorite heuristic
|
||||
* to current instr.
|
||||
* 7. go to step 1
|
||||
*
|
||||
* Step 5 for the current instruction guarantees that steps 3 and 4 for
|
||||
* the next instruction will always succeed, so it's only step 5 that can
|
||||
* possibly fail. Now, note that the nodes whose definitions have not yet
|
||||
* been scheduled but one or more use has been scheduled, are exactly the
|
||||
* nodes that are live in the final schedule. Therefore there will never
|
||||
* be more than 11 of them (guarenteed by the 11 value reg alloc and the
|
||||
* fake dep added before schedule). The worst case for step 5 is that all of
|
||||
* these nodes had a use 1 cycle ago, which means that none of them hit
|
||||
* case 3 or 4 already, so there are 6 slots still available so step 5
|
||||
* will always succeed. In general, even if there are exactly 11 values
|
||||
* live, if n are scheduled in steps 3 and 4, there are 11-n left in step
|
||||
* 4 so at most 11-n-5 = 6-n are scheduled in step 5 and therefore 6 are
|
||||
* scheduled total, below the limit. So the algorithm will always succeed.
|
||||
*/
|
||||
|
||||
static int gpir_min_dist_alu(gpir_dep *dep)
|
||||
{
|
||||
switch (dep->pred->op) {
|
||||
case gpir_op_load_uniform:
|
||||
case gpir_op_load_temp:
|
||||
case gpir_op_load_reg:
|
||||
case gpir_op_load_attribute:
|
||||
return 0;
|
||||
|
||||
case gpir_op_complex1:
|
||||
return 2;
|
||||
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static int gpir_get_min_dist(gpir_dep *dep)
|
||||
{
|
||||
switch (dep->type) {
|
||||
case GPIR_DEP_INPUT:
|
||||
switch (dep->succ->op) {
|
||||
case gpir_op_store_temp:
|
||||
case gpir_op_store_reg:
|
||||
case gpir_op_store_varying:
|
||||
/* store must use alu node as input */
|
||||
if (dep->pred->type == gpir_node_type_load)
|
||||
return INT_MAX >> 2;
|
||||
else
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return gpir_min_dist_alu(dep);
|
||||
}
|
||||
|
||||
case GPIR_DEP_OFFSET:
|
||||
assert(dep->succ->op == gpir_op_store_temp);
|
||||
return gpir_min_dist_alu(dep);
|
||||
|
||||
case GPIR_DEP_READ_AFTER_WRITE:
|
||||
switch (dep->succ->op) {
|
||||
case gpir_op_load_temp:
|
||||
assert(dep->pred->op == gpir_op_store_temp);
|
||||
return 4;
|
||||
case gpir_op_load_reg:
|
||||
assert(dep->pred->op == gpir_op_store_reg);
|
||||
return 3;
|
||||
case gpir_op_load_uniform:
|
||||
assert(dep->pred->op == gpir_op_store_temp_load_off0 ||
|
||||
dep->pred->op == gpir_op_store_temp_load_off1 ||
|
||||
dep->pred->op == gpir_op_store_temp_load_off2);
|
||||
return 4;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
case GPIR_DEP_WRITE_AFTER_READ:
|
||||
switch (dep->pred->op) {
|
||||
case gpir_op_load_temp:
|
||||
assert(dep->succ->op == gpir_op_store_temp);
|
||||
return -3;
|
||||
case gpir_op_load_reg:
|
||||
assert(dep->succ->op == gpir_op_store_reg);
|
||||
return -2;
|
||||
case gpir_op_load_uniform:
|
||||
assert(dep->succ->op == gpir_op_store_temp_load_off0 ||
|
||||
dep->succ->op == gpir_op_store_temp_load_off1 ||
|
||||
dep->succ->op == gpir_op_store_temp_load_off2);
|
||||
return -3;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
case GPIR_DEP_VREG_WRITE_AFTER_READ:
|
||||
return 0;
|
||||
|
||||
case GPIR_DEP_VREG_READ_AFTER_WRITE:
|
||||
assert(0); /* not possible, this is GPIR_DEP_INPUT */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gpir_max_dist_alu(gpir_dep *dep)
|
||||
{
|
||||
switch (dep->pred->op) {
|
||||
case gpir_op_load_uniform:
|
||||
case gpir_op_load_temp:
|
||||
return 0;
|
||||
case gpir_op_load_attribute:
|
||||
return 1;
|
||||
case gpir_op_load_reg:
|
||||
if (dep->pred->sched.pos < GPIR_INSTR_SLOT_REG0_LOAD0 ||
|
||||
dep->pred->sched.pos > GPIR_INSTR_SLOT_REG0_LOAD3)
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
case gpir_op_exp2_impl:
|
||||
case gpir_op_log2_impl:
|
||||
case gpir_op_rcp_impl:
|
||||
case gpir_op_rsqrt_impl:
|
||||
case gpir_op_store_temp_load_off0:
|
||||
case gpir_op_store_temp_load_off1:
|
||||
case gpir_op_store_temp_load_off2:
|
||||
return 1;
|
||||
case gpir_op_mov:
|
||||
if (dep->pred->sched.pos == GPIR_INSTR_SLOT_COMPLEX)
|
||||
return 1;
|
||||
else
|
||||
return 2;
|
||||
default:
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int gpir_get_max_dist(gpir_dep *dep)
|
||||
{
|
||||
switch (dep->type) {
|
||||
case GPIR_DEP_INPUT:
|
||||
switch (dep->succ->op) {
|
||||
case gpir_op_store_temp:
|
||||
case gpir_op_store_reg:
|
||||
case gpir_op_store_varying:
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return gpir_max_dist_alu(dep);
|
||||
}
|
||||
|
||||
case GPIR_DEP_OFFSET:
|
||||
assert(dep->succ->op == gpir_op_store_temp);
|
||||
return gpir_max_dist_alu(dep);
|
||||
|
||||
default:
|
||||
return INT_MAX >> 2; /* Don't want to overflow... */
|
||||
}
|
||||
}
|
||||
|
||||
static void schedule_update_distance(gpir_node *node)
|
||||
{
|
||||
if (gpir_node_is_leaf(node)) {
|
||||
node->sched.dist = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
gpir_node_foreach_pred(node, dep) {
|
||||
gpir_node *pred = dep->pred;
|
||||
|
||||
if (pred->sched.dist < 0)
|
||||
schedule_update_distance(pred);
|
||||
|
||||
int dist = pred->sched.dist + 1;
|
||||
if (node->sched.dist < dist)
|
||||
node->sched.dist = dist;
|
||||
}
|
||||
}
|
||||
|
||||
static void schedule_insert_ready_list(struct list_head *ready_list,
|
||||
gpir_node *insert_node)
|
||||
{
|
||||
/* if this node is fully ready or partially ready
|
||||
* fully ready: all successors have been scheduled
|
||||
* partially ready: part of input successors have been scheduled
|
||||
*
|
||||
* either fully ready or partially ready node need be inserted to
|
||||
* the ready list, but we only schedule a move node for partially
|
||||
* ready node.
|
||||
*/
|
||||
bool ready = true, insert = false;
|
||||
gpir_node_foreach_succ(insert_node, dep) {
|
||||
gpir_node *succ = dep->succ;
|
||||
if (succ->sched.instr >= 0) {
|
||||
if (dep->type == GPIR_DEP_INPUT)
|
||||
insert = true;
|
||||
}
|
||||
else
|
||||
ready = false;
|
||||
}
|
||||
|
||||
insert_node->sched.ready = ready;
|
||||
/* for root node */
|
||||
insert |= ready;
|
||||
|
||||
if (!insert || insert_node->sched.inserted)
|
||||
return;
|
||||
|
||||
struct list_head *insert_pos = ready_list;
|
||||
list_for_each_entry(gpir_node, node, ready_list, list) {
|
||||
if (insert_node->sched.dist > node->sched.dist) {
|
||||
insert_pos = &node->list;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
list_addtail(&insert_node->list, insert_pos);
|
||||
insert_node->sched.inserted = true;
|
||||
}
|
||||
|
||||
static int gpir_get_max_start(gpir_node *node)
|
||||
{
|
||||
int max_start = 0;
|
||||
|
||||
/* find the max start instr constrainted by all successors */
|
||||
gpir_node_foreach_succ(node, dep) {
|
||||
gpir_node *succ = dep->succ;
|
||||
if (succ->sched.instr < 0)
|
||||
continue;
|
||||
|
||||
int start = succ->sched.instr + gpir_get_min_dist(dep);
|
||||
if (start > max_start)
|
||||
max_start = start;
|
||||
}
|
||||
|
||||
return max_start;
|
||||
}
|
||||
|
||||
static int gpir_get_min_end(gpir_node *node)
|
||||
{
|
||||
int min_end = INT_MAX;
|
||||
|
||||
/* find the min end instr constrainted by all successors */
|
||||
gpir_node_foreach_succ(node, dep) {
|
||||
gpir_node *succ = dep->succ;
|
||||
if (succ->sched.instr < 0)
|
||||
continue;
|
||||
|
||||
int end = succ->sched.instr + gpir_get_max_dist(dep);
|
||||
if (end < min_end)
|
||||
min_end = end;
|
||||
}
|
||||
|
||||
return min_end;
|
||||
}
|
||||
|
||||
static gpir_node *gpir_sched_instr_has_load(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
gpir_load_node *load = gpir_node_to_load(node);
|
||||
|
||||
for (int i = GPIR_INSTR_SLOT_REG0_LOAD0; i <= GPIR_INSTR_SLOT_MEM_LOAD3; i++) {
|
||||
if (!instr->slots[i])
|
||||
continue;
|
||||
|
||||
gpir_load_node *iload = gpir_node_to_load(instr->slots[i]);
|
||||
if (load->node.op == iload->node.op &&
|
||||
load->index == iload->index &&
|
||||
load->component == iload->component)
|
||||
return &iload->node;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool schedule_try_place_node(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
if (node->type == gpir_node_type_load) {
|
||||
gpir_node *load = gpir_sched_instr_has_load(instr, node);
|
||||
if (load) {
|
||||
gpir_debug("same load %d in instr %d for node %d\n",
|
||||
load->index, instr->index, node->index);
|
||||
|
||||
/* not really merge two node, just fake scheduled same place */
|
||||
node->sched.instr = load->sched.instr;
|
||||
node->sched.pos = load->sched.pos;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
node->sched.instr = instr->index;
|
||||
|
||||
int *slots = gpir_op_infos[node->op].slots;
|
||||
for (int i = 0; slots[i] != GPIR_INSTR_SLOT_END; i++) {
|
||||
node->sched.pos = slots[i];
|
||||
if (node->sched.instr >= gpir_get_max_start(node) &&
|
||||
node->sched.instr <= gpir_get_min_end(node) &&
|
||||
gpir_instr_try_insert_node(instr, node))
|
||||
return true;
|
||||
}
|
||||
|
||||
node->sched.instr = -1;
|
||||
node->sched.pos = -1;
|
||||
return false;
|
||||
}
|
||||
|
||||
static gpir_node *schedule_create_move_node(gpir_node *node)
|
||||
{
|
||||
gpir_alu_node *move = gpir_node_create(node->block, gpir_op_mov);
|
||||
if (unlikely(!move))
|
||||
return NULL;
|
||||
|
||||
move->children[0] = node;
|
||||
move->num_child = 1;
|
||||
|
||||
move->node.sched.instr = -1;
|
||||
move->node.sched.pos = -1;
|
||||
move->node.sched.dist = node->sched.dist;
|
||||
|
||||
gpir_debug("create move %d for %d\n", move->node.index, node->index);
|
||||
return &move->node;
|
||||
}
|
||||
|
||||
static gpir_node *gpir_sched_node(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
if (node->op == gpir_op_mov) {
|
||||
gpir_node *child = gpir_node_to_alu(node)->children[0];
|
||||
gpir_node_foreach_succ_safe(node, dep) {
|
||||
gpir_node *succ = dep->succ;
|
||||
if (succ->sched.instr < 0 ||
|
||||
instr->index < succ->sched.instr + gpir_get_min_dist(dep)) {
|
||||
gpir_node_replace_pred(dep, child);
|
||||
if (dep->type == GPIR_DEP_INPUT)
|
||||
gpir_node_replace_child(succ, node, child);
|
||||
}
|
||||
}
|
||||
MAYBE_UNUSED bool result = schedule_try_place_node(instr, node);
|
||||
assert(result);
|
||||
return node;
|
||||
}
|
||||
else {
|
||||
gpir_node *move = schedule_create_move_node(node);
|
||||
list_del(&node->list);
|
||||
node->sched.ready = false;
|
||||
node->sched.inserted = false;
|
||||
gpir_node_replace_succ(move, node);
|
||||
gpir_node_add_dep(move, node, GPIR_DEP_INPUT);
|
||||
return move;
|
||||
}
|
||||
}
|
||||
|
||||
static bool gpir_is_input_node(gpir_node *node)
|
||||
{
|
||||
gpir_node_foreach_succ(node, dep) {
|
||||
if (dep->type == GPIR_DEP_INPUT)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int gpir_get_min_scheduled_succ(gpir_node *node)
|
||||
{
|
||||
int min = INT_MAX;
|
||||
gpir_node_foreach_succ(node, dep) {
|
||||
gpir_node *succ = dep->succ;
|
||||
if (succ->sched.instr >= 0 && dep->type == GPIR_DEP_INPUT) {
|
||||
if (min > succ->sched.instr)
|
||||
min = succ->sched.instr;
|
||||
}
|
||||
}
|
||||
return min;
|
||||
}
|
||||
|
||||
static gpir_node *gpir_sched_instr_pass(gpir_instr *instr,
|
||||
struct list_head *ready_list)
|
||||
{
|
||||
/* fully ready node reach its max dist with any of its successor */
|
||||
list_for_each_entry_safe(gpir_node, node, ready_list, list) {
|
||||
if (node->sched.ready) {
|
||||
int end = gpir_get_min_end(node);
|
||||
assert(end >= instr->index);
|
||||
if (instr->index < end)
|
||||
continue;
|
||||
|
||||
gpir_debug("fully ready max node %d\n", node->index);
|
||||
|
||||
if (schedule_try_place_node(instr, node))
|
||||
return node;
|
||||
|
||||
return gpir_sched_node(instr, node);
|
||||
}
|
||||
}
|
||||
|
||||
/* partially ready node reach its max dist with any of its successor */
|
||||
list_for_each_entry_safe(gpir_node, node, ready_list, list) {
|
||||
if (!node->sched.ready) {
|
||||
int end = gpir_get_min_end(node);
|
||||
assert(end >= instr->index);
|
||||
if (instr->index < end)
|
||||
continue;
|
||||
|
||||
gpir_debug("partially ready max node %d\n", node->index);
|
||||
|
||||
return gpir_sched_node(instr, node);
|
||||
}
|
||||
}
|
||||
|
||||
/* schedule node used by previous instr when count > 5 */
|
||||
int count = 0;
|
||||
list_for_each_entry(gpir_node, node, ready_list, list) {
|
||||
if (gpir_is_input_node(node)) {
|
||||
int min = gpir_get_min_scheduled_succ(node);
|
||||
assert(min >= instr->index - 1);
|
||||
if (min == instr->index - 1)
|
||||
count += gpir_op_infos[node->op].may_consume_two_slots ? 2 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (count > 5) {
|
||||
/* schedule fully ready node first */
|
||||
list_for_each_entry(gpir_node, node, ready_list, list) {
|
||||
if (gpir_is_input_node(node)) {
|
||||
int min = gpir_get_min_scheduled_succ(node);
|
||||
if (min == instr->index - 1 && node->sched.ready) {
|
||||
gpir_debug(">5 ready node %d\n", node->index);
|
||||
|
||||
if (schedule_try_place_node(instr, node))
|
||||
return node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* no fully ready node be scheduled, schedule partially ready node */
|
||||
list_for_each_entry_safe(gpir_node, node, ready_list, list) {
|
||||
if (gpir_is_input_node(node)) {
|
||||
int min = gpir_get_min_scheduled_succ(node);
|
||||
if (min == instr->index - 1 && !node->sched.ready) {
|
||||
gpir_debug(">5 partially ready node %d\n", node->index);
|
||||
|
||||
return gpir_sched_node(instr, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* finally schedule move for fully ready node */
|
||||
list_for_each_entry_safe(gpir_node, node, ready_list, list) {
|
||||
if (gpir_is_input_node(node)) {
|
||||
int min = gpir_get_min_scheduled_succ(node);
|
||||
if (min == instr->index - 1 && node->sched.ready) {
|
||||
gpir_debug(">5 fully ready move node %d\n", node->index);
|
||||
|
||||
return gpir_sched_node(instr, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* schedule remain fully ready nodes */
|
||||
list_for_each_entry(gpir_node, node, ready_list, list) {
|
||||
if (node->sched.ready) {
|
||||
gpir_debug("remain fully ready node %d\n", node->index);
|
||||
|
||||
if (schedule_try_place_node(instr, node))
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void schedule_print_pre_one_instr(gpir_instr *instr,
|
||||
struct list_head *ready_list)
|
||||
{
|
||||
if (!(lima_debug & LIMA_DEBUG_GP))
|
||||
return;
|
||||
|
||||
printf("instr %d for ready list:", instr->index);
|
||||
list_for_each_entry(gpir_node, node, ready_list, list) {
|
||||
printf(" %d/%c", node->index, node->sched.ready ? 'r' : 'p');
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void schedule_print_post_one_instr(gpir_instr *instr)
|
||||
{
|
||||
if (!(lima_debug & LIMA_DEBUG_GP))
|
||||
return;
|
||||
|
||||
printf("post schedule instr");
|
||||
for (int i = 0; i < GPIR_INSTR_SLOT_NUM; i++) {
|
||||
if (instr->slots[i])
|
||||
printf(" %d/%d", i, instr->slots[i]->index);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
static bool schedule_one_instr(gpir_block *block, struct list_head *ready_list)
|
||||
{
|
||||
gpir_instr *instr = gpir_instr_create(block);
|
||||
if (unlikely(!instr))
|
||||
return false;
|
||||
|
||||
schedule_print_pre_one_instr(instr, ready_list);
|
||||
|
||||
while (true) {
|
||||
gpir_node *node = gpir_sched_instr_pass(instr, ready_list);
|
||||
if (!node)
|
||||
break;
|
||||
|
||||
if (node->sched.instr < 0)
|
||||
schedule_insert_ready_list(ready_list, node);
|
||||
else {
|
||||
list_del(&node->list);
|
||||
list_add(&node->list, &block->node_list);
|
||||
|
||||
gpir_node_foreach_pred(node, dep) {
|
||||
gpir_node *pred = dep->pred;
|
||||
schedule_insert_ready_list(ready_list, pred);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
schedule_print_post_one_instr(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool schedule_block(gpir_block *block)
|
||||
{
|
||||
/* calculate distance */
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
if (gpir_node_is_root(node))
|
||||
schedule_update_distance(node);
|
||||
}
|
||||
|
||||
struct list_head ready_list;
|
||||
list_inithead(&ready_list);
|
||||
|
||||
/* construct the ready list from root nodes */
|
||||
list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
|
||||
if (gpir_node_is_root(node))
|
||||
schedule_insert_ready_list(&ready_list, node);
|
||||
}
|
||||
|
||||
list_inithead(&block->node_list);
|
||||
while (!list_empty(&ready_list)) {
|
||||
if (!schedule_one_instr(block, &ready_list))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void schedule_build_vreg_dependency(gpir_block *block)
|
||||
{
|
||||
gpir_node *regs[GPIR_VALUE_REG_NUM] = {0};
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
/* store node has no value reg assigned */
|
||||
if (node->value_reg < 0)
|
||||
continue;
|
||||
|
||||
gpir_node *reg = regs[node->value_reg];
|
||||
if (reg) {
|
||||
gpir_node_foreach_succ(reg, dep) {
|
||||
/* write after read dep should only apply to real 'read' */
|
||||
if (dep->type != GPIR_DEP_INPUT)
|
||||
continue;
|
||||
|
||||
gpir_node *succ = dep->succ;
|
||||
gpir_node_add_dep(node, succ, GPIR_DEP_VREG_WRITE_AFTER_READ);
|
||||
}
|
||||
}
|
||||
regs[node->value_reg] = node;
|
||||
}
|
||||
|
||||
/* merge dummy_f/m to the node created from */
|
||||
list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
|
||||
if (node->op == gpir_op_dummy_m) {
|
||||
gpir_alu_node *alu = gpir_node_to_alu(node);
|
||||
gpir_node *origin = alu->children[0];
|
||||
gpir_node *dummy_f = alu->children[1];
|
||||
|
||||
gpir_node_foreach_succ(node, dep) {
|
||||
gpir_node *succ = dep->succ;
|
||||
/* origin and node may have same succ (by VREG/INPUT or
|
||||
* VREG/VREG dep), so use gpir_node_add_dep() instead of
|
||||
* gpir_node_replace_pred() */
|
||||
gpir_node_add_dep(succ, origin, dep->type);
|
||||
gpir_node_replace_child(succ, node, origin);
|
||||
}
|
||||
gpir_node_delete(dummy_f);
|
||||
gpir_node_delete(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void schedule_build_preg_dependency(gpir_compiler *comp)
|
||||
{
|
||||
/* merge reg with the same index */
|
||||
gpir_reg *regs[GPIR_VALUE_REG_NUM] = {0};
|
||||
list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) {
|
||||
if (!regs[reg->index])
|
||||
regs[reg->index] = reg;
|
||||
else {
|
||||
list_splicetail(®->defs_list, ®s[reg->index]->defs_list);
|
||||
list_splicetail(®->uses_list, ®s[reg->index]->uses_list);
|
||||
}
|
||||
}
|
||||
|
||||
/* calculate physical reg read/write dependency for load/store nodes */
|
||||
for (int i = 0; i < GPIR_VALUE_REG_NUM; i++) {
|
||||
gpir_reg *reg = regs[i];
|
||||
if (!reg)
|
||||
continue;
|
||||
|
||||
/* sort reg write */
|
||||
struct list_head tmp_list;
|
||||
list_replace(®->defs_list, &tmp_list);
|
||||
list_inithead(®->defs_list);
|
||||
list_for_each_entry_safe(gpir_store_node, store, &tmp_list, reg_link) {
|
||||
struct list_head *insert_pos = ®->defs_list;
|
||||
list_for_each_entry(gpir_store_node, st, ®->defs_list, reg_link) {
|
||||
if (st->node.sched.index > store->node.sched.index) {
|
||||
insert_pos = &st->reg_link;
|
||||
break;
|
||||
}
|
||||
}
|
||||
list_del(&store->reg_link);
|
||||
list_addtail(&store->reg_link, insert_pos);
|
||||
}
|
||||
|
||||
/* sort reg read */
|
||||
list_replace(®->uses_list, &tmp_list);
|
||||
list_inithead(®->uses_list);
|
||||
list_for_each_entry_safe(gpir_load_node, load, &tmp_list, reg_link) {
|
||||
struct list_head *insert_pos = ®->uses_list;
|
||||
list_for_each_entry(gpir_load_node, ld, ®->uses_list, reg_link) {
|
||||
if (ld->node.sched.index > load->node.sched.index) {
|
||||
insert_pos = &ld->reg_link;
|
||||
break;
|
||||
}
|
||||
}
|
||||
list_del(&load->reg_link);
|
||||
list_addtail(&load->reg_link, insert_pos);
|
||||
}
|
||||
|
||||
/* insert dependency */
|
||||
gpir_store_node *store =
|
||||
list_first_entry(®->defs_list, gpir_store_node, reg_link);
|
||||
gpir_store_node *next = store->reg_link.next != ®->defs_list ?
|
||||
list_first_entry(&store->reg_link, gpir_store_node, reg_link) : NULL;
|
||||
|
||||
list_for_each_entry(gpir_load_node, load, ®->uses_list, reg_link) {
|
||||
/* loop until load is between store and next */
|
||||
while (next && next->node.sched.index < load->node.sched.index) {
|
||||
store = next;
|
||||
next = store->reg_link.next != ®->defs_list ?
|
||||
list_first_entry(&store->reg_link, gpir_store_node, reg_link) : NULL;
|
||||
}
|
||||
|
||||
gpir_node_add_dep(&load->node, &store->node, GPIR_DEP_READ_AFTER_WRITE);
|
||||
if (next)
|
||||
gpir_node_add_dep(&next->node, &load->node, GPIR_DEP_WRITE_AFTER_READ);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void print_statistic(gpir_compiler *comp, int save_index)
|
||||
{
|
||||
int num_nodes[gpir_op_num] = {0};
|
||||
int num_created_nodes[gpir_op_num] = {0};
|
||||
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
num_nodes[node->op]++;
|
||||
if (node->index >= save_index)
|
||||
num_created_nodes[node->op]++;
|
||||
}
|
||||
}
|
||||
|
||||
printf("====== gpir scheduler statistic ======\n");
|
||||
printf("---- how many nodes are scheduled ----\n");
|
||||
int n = 0, l = 0;
|
||||
for (int i = 0; i < gpir_op_num; i++) {
|
||||
if (num_nodes[i]) {
|
||||
printf("%10s:%-6d", gpir_op_infos[i].name, num_nodes[i]);
|
||||
n += num_nodes[i];
|
||||
if (!(++l % 4))
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
if (l % 4)
|
||||
printf("\n");
|
||||
printf("\ntotal: %d\n", n);
|
||||
|
||||
printf("---- how many nodes are created ----\n");
|
||||
n = l = 0;
|
||||
for (int i = 0; i < gpir_op_num; i++) {
|
||||
if (num_created_nodes[i]) {
|
||||
printf("%10s:%-6d", gpir_op_infos[i].name, num_created_nodes[i]);
|
||||
n += num_created_nodes[i];
|
||||
if (!(++l % 4))
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
if (l % 4)
|
||||
printf("\n");
|
||||
printf("\ntotal: %d\n", n);
|
||||
printf("------------------------------------\n");
|
||||
}
|
||||
|
||||
bool gpir_schedule_prog(gpir_compiler *comp)
|
||||
{
|
||||
int save_index = comp->cur_index;
|
||||
|
||||
/* init schedule info */
|
||||
int index = 0;
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
block->sched.instr_index = 0;
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
node->sched.instr = -1;
|
||||
node->sched.pos = -1;
|
||||
node->sched.index = index++;
|
||||
node->sched.dist = -1;
|
||||
node->sched.ready = false;
|
||||
node->sched.inserted = false;
|
||||
}
|
||||
}
|
||||
|
||||
/* build fake/virtual dependency */
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
schedule_build_vreg_dependency(block);
|
||||
}
|
||||
schedule_build_preg_dependency(comp);
|
||||
|
||||
//gpir_debug("after scheduler build reg dependency\n");
|
||||
//gpir_node_print_prog_dep(comp);
|
||||
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
if (!schedule_block(block)) {
|
||||
gpir_error("fail schedule block\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (lima_debug & LIMA_DEBUG_GP) {
|
||||
print_statistic(comp, save_index);
|
||||
gpir_instr_print_prog(comp);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,170 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gpir.h"
|
||||
|
||||
/* Linear scan register alloc for value reg alloc of each node */
|
||||
|
||||
static int regalloc_spill_active_node(gpir_node *active[])
|
||||
{
|
||||
gpir_node *spill = NULL;
|
||||
for (int i = 0; i < GPIR_VALUE_REG_NUM; i++) {
|
||||
if (gpir_op_infos[active[i]->op].spillless)
|
||||
continue;
|
||||
|
||||
/* spill farest node */
|
||||
if (!spill ||
|
||||
spill->vreg.last->vreg.index < active[i]->vreg.last->vreg.index) {
|
||||
spill = active[i];
|
||||
}
|
||||
}
|
||||
|
||||
assert(spill);
|
||||
gpir_debug("value regalloc spill node %d for value reg %d\n",
|
||||
spill->index, spill->value_reg);
|
||||
|
||||
/* create store node for spilled node */
|
||||
gpir_store_node *store = gpir_node_create(spill->block, gpir_op_store_reg);
|
||||
store->child = spill;
|
||||
/* no need to calculate other vreg values because store & spill won't
|
||||
* be used in the following schedule again */
|
||||
store->node.value_reg = spill->value_reg;
|
||||
list_addtail(&store->node.list, &spill->list);
|
||||
|
||||
gpir_reg *reg = gpir_create_reg(spill->block->comp);
|
||||
store->reg = reg;
|
||||
list_addtail(&store->reg_link, ®->defs_list);
|
||||
|
||||
gpir_node_foreach_succ_safe(spill, dep) {
|
||||
gpir_node *succ = dep->succ;
|
||||
gpir_load_node *load = gpir_node_create(succ->block, gpir_op_load_reg);
|
||||
gpir_node_replace_pred(dep, &load->node);
|
||||
gpir_node_replace_child(succ, spill, &load->node);
|
||||
list_addtail(&load->node.list, &succ->list);
|
||||
|
||||
/* only valid for succ already scheduled, succ not scheduled will
|
||||
* re-write this value */
|
||||
load->node.value_reg = spill->value_reg;
|
||||
load->node.vreg.index =
|
||||
(list_first_entry(&load->node.list, gpir_node, list)->vreg.index +
|
||||
list_last_entry(&load->node.list, gpir_node, list)->vreg.index) / 2.0f;
|
||||
load->node.vreg.last = succ;
|
||||
|
||||
load->reg = reg;
|
||||
list_addtail(&load->reg_link, ®->uses_list);
|
||||
}
|
||||
|
||||
gpir_node_add_dep(&store->node, spill, GPIR_DEP_INPUT);
|
||||
return spill->value_reg;
|
||||
}
|
||||
|
||||
static void regalloc_block(gpir_block *block)
|
||||
{
|
||||
/* build each node sequence index in the block node list */
|
||||
int index = 0;
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
node->vreg.index = index++;
|
||||
}
|
||||
|
||||
/* find the last successor of each node by the sequence index */
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
node->vreg.last = NULL;
|
||||
gpir_node_foreach_succ(node, dep) {
|
||||
gpir_node *succ = dep->succ;
|
||||
if (!node->vreg.last || node->vreg.last->vreg.index < succ->vreg.index)
|
||||
node->vreg.last = succ;
|
||||
}
|
||||
}
|
||||
|
||||
/* do linear scan regalloc */
|
||||
int reg_search_start = 0;
|
||||
gpir_node *active[GPIR_VALUE_REG_NUM] = {0};
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
/* if some reg is expired */
|
||||
gpir_node_foreach_pred(node, dep) {
|
||||
gpir_node *pred = dep->pred;
|
||||
if (pred->vreg.last == node)
|
||||
active[pred->value_reg] = NULL;
|
||||
}
|
||||
|
||||
/* no need to alloc value reg for root node */
|
||||
if (gpir_node_is_root(node)) {
|
||||
node->value_reg = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* find a free reg for this node */
|
||||
int i;
|
||||
for (i = 0; i < GPIR_VALUE_REG_NUM; i++) {
|
||||
/* round robin reg select to reduce false dep when schedule */
|
||||
int reg = (reg_search_start + i) % GPIR_VALUE_REG_NUM;
|
||||
if (!active[reg]) {
|
||||
active[reg] = node;
|
||||
node->value_reg = reg;
|
||||
reg_search_start++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* need spill */
|
||||
if (i == GPIR_VALUE_REG_NUM) {
|
||||
int spilled_reg = regalloc_spill_active_node(active);
|
||||
active[spilled_reg] = node;
|
||||
node->value_reg = spilled_reg;
|
||||
gpir_debug("value regalloc node %d reuse reg %d\n",
|
||||
node->index, spilled_reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void regalloc_print_result(gpir_compiler *comp)
|
||||
{
|
||||
if (!(lima_debug & LIMA_DEBUG_GP))
|
||||
return;
|
||||
|
||||
int index = 0;
|
||||
printf("======== value regalloc ========\n");
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
printf("%03d: %d/%d %s ", index++, node->index, node->value_reg,
|
||||
gpir_op_infos[node->op].name);
|
||||
gpir_node_foreach_pred(node, dep) {
|
||||
gpir_node *pred = dep->pred;
|
||||
printf(" %d/%d", pred->index, pred->value_reg);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("----------------------------\n");
|
||||
}
|
||||
}
|
||||
|
||||
bool gpir_value_regalloc_prog(gpir_compiler *comp)
|
||||
{
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
regalloc_block(block);
|
||||
}
|
||||
|
||||
regalloc_print_result(comp);
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef LIMA_IR_H
|
||||
#define LIMA_IR_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <lima_screen.h>
|
||||
|
||||
#include "nir.h"
|
||||
|
||||
#define gpir_debug(...) \
|
||||
do { \
|
||||
if (lima_debug & LIMA_DEBUG_GP) \
|
||||
printf("gpir: " __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define gpir_error(...) \
|
||||
fprintf(stderr, "gpir: " __VA_ARGS__)
|
||||
|
||||
#define ppir_debug(...) \
|
||||
do { \
|
||||
if (lima_debug & LIMA_DEBUG_PP) \
|
||||
printf("ppir: " __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define ppir_error(...) \
|
||||
fprintf(stderr, "ppir: " __VA_ARGS__)
|
||||
|
||||
|
||||
struct ra_regs;
|
||||
struct lima_vs_shader_state;
|
||||
struct lima_fs_shader_state;
|
||||
|
||||
/* gpir interface */
|
||||
bool gpir_compile_nir(struct lima_vs_shader_state *prog, struct nir_shader *nir);
|
||||
|
||||
|
||||
/* ppir interface */
|
||||
bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
|
||||
struct ra_regs *ra);
|
||||
struct ra_regs *ppir_regalloc_init(void *mem_ctx);
|
||||
|
||||
void lima_nir_lower_uniform_to_scalar(nir_shader *shader);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright (c) 2019 Qiang Yu <yuq825@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "lima_ir.h"
|
||||
|
||||
static void
|
||||
lower_load_uniform_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
|
||||
{
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
nir_ssa_def *loads[4];
|
||||
for (unsigned i = 0; i < intr->num_components; i++) {
|
||||
nir_intrinsic_instr *chan_intr =
|
||||
nir_intrinsic_instr_create(b->shader, intr->intrinsic);
|
||||
nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest,
|
||||
1, intr->dest.ssa.bit_size, NULL);
|
||||
chan_intr->num_components = 1;
|
||||
|
||||
nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr) * 4 + i);
|
||||
nir_intrinsic_set_range(chan_intr, nir_intrinsic_range(intr) * 4);
|
||||
|
||||
chan_intr->src[0] =
|
||||
nir_src_for_ssa(nir_fmul_imm(b, intr->src[0].ssa, 4));
|
||||
|
||||
nir_builder_instr_insert(b, &chan_intr->instr);
|
||||
|
||||
loads[i] = &chan_intr->dest.ssa;
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa,
|
||||
nir_src_for_ssa(nir_vec(b, loads,
|
||||
intr->num_components)));
|
||||
nir_instr_remove(&intr->instr);
|
||||
}
|
||||
|
||||
void
|
||||
lima_nir_lower_uniform_to_scalar(nir_shader *shader)
|
||||
{
|
||||
nir_foreach_function(function, shader) {
|
||||
if (function->impl) {
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, function->impl);
|
||||
|
||||
nir_foreach_block(block, function->impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intr->intrinsic != nir_intrinsic_load_uniform ||
|
||||
intr->num_components == 1)
|
||||
continue;
|
||||
|
||||
lower_load_uniform_to_scalar(&b, intr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,669 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_half.h"
|
||||
#include "util/bitscan.h"
|
||||
|
||||
#include "ppir.h"
|
||||
#include "codegen.h"
|
||||
#include "lima_context.h"
|
||||
|
||||
static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
|
||||
{
|
||||
unsigned ret = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_scl_reg_index(ppir_src *src, int component)
|
||||
{
|
||||
int ret = ppir_target_get_src_reg_index(src);
|
||||
ret += src->swizzle[component];
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ppir_codegen_encode_varying(ppir_node *node, void *code)
|
||||
{
|
||||
ppir_codegen_field_varying *f = code;
|
||||
ppir_load_node *load = ppir_node_to_load(node);
|
||||
ppir_dest *dest = &load->dest;
|
||||
int index = ppir_target_get_dest_reg_index(dest);
|
||||
int num_components = load->num_components;
|
||||
|
||||
if (num_components) {
|
||||
assert(node->op == ppir_op_load_varying || node->op == ppir_op_load_coords);
|
||||
|
||||
f->imm.dest = index >> 2;
|
||||
f->imm.mask = dest->write_mask << (index & 0x3);
|
||||
|
||||
int alignment = num_components == 3 ? 3 : num_components - 1;
|
||||
f->imm.alignment = alignment;
|
||||
f->imm.offset_vector = 0xf;
|
||||
|
||||
if (alignment == 3)
|
||||
f->imm.index = load->index >> 2;
|
||||
else
|
||||
f->imm.index = load->index >> alignment;
|
||||
}
|
||||
else {
|
||||
assert(node->op == ppir_op_load_coords);
|
||||
|
||||
f->reg.dest = index >> 2;
|
||||
f->reg.mask = dest->write_mask << (index & 0x3);
|
||||
|
||||
f->reg.source_type = 1;
|
||||
|
||||
ppir_src *src = &load->src;
|
||||
index = ppir_target_get_src_reg_index(src);
|
||||
f->reg.source = index >> 2;
|
||||
f->reg.negate = src->negate;
|
||||
f->reg.absolute = src->absolute;
|
||||
f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void ppir_codegen_encode_texld(ppir_node *node, void *code)
|
||||
{
|
||||
ppir_codegen_field_sampler *f = code;
|
||||
ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
|
||||
|
||||
f->index = ldtex->sampler;
|
||||
f->lod_bias_en = 0;
|
||||
f->type = ppir_codegen_sampler_type_2d;
|
||||
f->offset_en = 0;
|
||||
f->unknown_2 = 0x39001;
|
||||
}
|
||||
|
||||
static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
|
||||
{
|
||||
ppir_codegen_field_uniform *f = code;
|
||||
ppir_load_node *load = ppir_node_to_load(node);
|
||||
|
||||
switch (node->op) {
|
||||
case ppir_op_load_uniform:
|
||||
f->source = ppir_codegen_uniform_src_uniform;
|
||||
break;
|
||||
case ppir_op_load_temp:
|
||||
f->source = ppir_codegen_uniform_src_temporary;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
int num_components = load->num_components;
|
||||
int alignment = num_components == 4 ? 2 : num_components - 1;
|
||||
|
||||
f->alignment = alignment;
|
||||
|
||||
/* TODO: uniform can be also combined like varying */
|
||||
f->index = load->index << (2 - alignment);
|
||||
}
|
||||
|
||||
static unsigned shift_to_op(int shift)
|
||||
{
|
||||
assert(shift >= -3 && shift <= 3);
|
||||
return shift < 0 ? shift + 8 : shift;
|
||||
}
|
||||
|
||||
static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
|
||||
{
|
||||
ppir_codegen_field_vec4_mul *f = code;
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
|
||||
ppir_dest *dest = &alu->dest;
|
||||
int dest_shift = 0;
|
||||
if (dest->type != ppir_target_pipeline) {
|
||||
int index = ppir_target_get_dest_reg_index(dest);
|
||||
dest_shift = index & 0x3;
|
||||
f->dest = index >> 2;
|
||||
f->mask = dest->write_mask << dest_shift;
|
||||
}
|
||||
f->dest_modifier = dest->modifier;
|
||||
|
||||
switch (node->op) {
|
||||
case ppir_op_mul:
|
||||
f->op = shift_to_op(alu->shift);
|
||||
break;
|
||||
case ppir_op_mov:
|
||||
f->op = ppir_codegen_vec4_mul_op_mov;
|
||||
break;
|
||||
case ppir_op_max:
|
||||
f->op = ppir_codegen_vec4_mul_op_max;
|
||||
break;
|
||||
case ppir_op_min:
|
||||
f->op = ppir_codegen_vec4_mul_op_min;
|
||||
break;
|
||||
case ppir_op_and:
|
||||
f->op = ppir_codegen_vec4_mul_op_and;
|
||||
break;
|
||||
case ppir_op_or:
|
||||
f->op = ppir_codegen_vec4_mul_op_or;
|
||||
break;
|
||||
case ppir_op_xor:
|
||||
f->op = ppir_codegen_vec4_mul_op_xor;
|
||||
break;
|
||||
case ppir_op_gt:
|
||||
f->op = ppir_codegen_vec4_mul_op_gt;
|
||||
break;
|
||||
case ppir_op_ge:
|
||||
f->op = ppir_codegen_vec4_mul_op_ge;
|
||||
break;
|
||||
case ppir_op_eq:
|
||||
f->op = ppir_codegen_vec4_mul_op_eq;
|
||||
break;
|
||||
case ppir_op_ne:
|
||||
f->op = ppir_codegen_vec4_mul_op_ne;
|
||||
break;
|
||||
case ppir_op_not:
|
||||
f->op = ppir_codegen_vec4_mul_op_not;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
ppir_src *src = alu->src;
|
||||
int index = ppir_target_get_src_reg_index(src);
|
||||
f->arg0_source = index >> 2;
|
||||
f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
|
||||
f->arg0_absolute = src->absolute;
|
||||
f->arg0_negate = src->negate;
|
||||
|
||||
if (alu->num_src == 2) {
|
||||
src = alu->src + 1;
|
||||
index = ppir_target_get_src_reg_index(src);
|
||||
f->arg1_source = index >> 2;
|
||||
f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
|
||||
f->arg1_absolute = src->absolute;
|
||||
f->arg1_negate = src->negate;
|
||||
}
|
||||
}
|
||||
|
||||
static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
|
||||
{
|
||||
ppir_codegen_field_float_mul *f = code;
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
|
||||
ppir_dest *dest = &alu->dest;
|
||||
int dest_component = ffs(dest->write_mask) - 1;
|
||||
assert(dest_component >= 0);
|
||||
|
||||
if (dest->type != ppir_target_pipeline) {
|
||||
f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
|
||||
f->output_en = true;
|
||||
}
|
||||
f->dest_modifier = dest->modifier;
|
||||
|
||||
switch (node->op) {
|
||||
case ppir_op_mul:
|
||||
f->op = shift_to_op(alu->shift);
|
||||
break;
|
||||
case ppir_op_mov:
|
||||
f->op = ppir_codegen_float_mul_op_mov;
|
||||
break;
|
||||
case ppir_op_max:
|
||||
f->op = ppir_codegen_float_mul_op_max;
|
||||
break;
|
||||
case ppir_op_min:
|
||||
f->op = ppir_codegen_float_mul_op_min;
|
||||
break;
|
||||
case ppir_op_and:
|
||||
f->op = ppir_codegen_float_mul_op_and;
|
||||
break;
|
||||
case ppir_op_or:
|
||||
f->op = ppir_codegen_float_mul_op_or;
|
||||
break;
|
||||
case ppir_op_xor:
|
||||
f->op = ppir_codegen_float_mul_op_xor;
|
||||
break;
|
||||
case ppir_op_gt:
|
||||
f->op = ppir_codegen_float_mul_op_gt;
|
||||
break;
|
||||
case ppir_op_ge:
|
||||
f->op = ppir_codegen_float_mul_op_ge;
|
||||
break;
|
||||
case ppir_op_eq:
|
||||
f->op = ppir_codegen_float_mul_op_eq;
|
||||
break;
|
||||
case ppir_op_ne:
|
||||
f->op = ppir_codegen_float_mul_op_ne;
|
||||
break;
|
||||
case ppir_op_not:
|
||||
f->op = ppir_codegen_float_mul_op_not;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
ppir_src *src = alu->src;
|
||||
f->arg0_source = get_scl_reg_index(src, dest_component);
|
||||
f->arg0_absolute = src->absolute;
|
||||
f->arg0_negate = src->negate;
|
||||
|
||||
if (alu->num_src == 2) {
|
||||
src = alu->src + 1;
|
||||
f->arg1_source = get_scl_reg_index(src, dest_component);
|
||||
f->arg1_absolute = src->absolute;
|
||||
f->arg1_negate = src->negate;
|
||||
}
|
||||
}
|
||||
|
||||
static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
|
||||
{
|
||||
ppir_codegen_field_vec4_acc *f = code;
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
|
||||
ppir_dest *dest = &alu->dest;
|
||||
int index = ppir_target_get_dest_reg_index(dest);
|
||||
int dest_shift = index & 0x3;
|
||||
f->dest = index >> 2;
|
||||
f->mask = dest->write_mask << dest_shift;
|
||||
f->dest_modifier = dest->modifier;
|
||||
|
||||
switch (node->op) {
|
||||
case ppir_op_add:
|
||||
f->op = ppir_codegen_vec4_acc_op_add;
|
||||
break;
|
||||
case ppir_op_mov:
|
||||
f->op = ppir_codegen_vec4_acc_op_mov;
|
||||
break;
|
||||
case ppir_op_sum3:
|
||||
f->op = ppir_codegen_vec4_acc_op_sum3;
|
||||
dest_shift = 0;
|
||||
break;
|
||||
case ppir_op_sum4:
|
||||
f->op = ppir_codegen_vec4_acc_op_sum4;
|
||||
dest_shift = 0;
|
||||
break;
|
||||
case ppir_op_floor:
|
||||
f->op = ppir_codegen_vec4_acc_op_floor;
|
||||
break;
|
||||
case ppir_op_fract:
|
||||
f->op = ppir_codegen_vec4_acc_op_fract;
|
||||
break;
|
||||
case ppir_op_gt:
|
||||
f->op = ppir_codegen_vec4_acc_op_gt;
|
||||
break;
|
||||
case ppir_op_ge:
|
||||
f->op = ppir_codegen_vec4_acc_op_ge;
|
||||
break;
|
||||
case ppir_op_eq:
|
||||
f->op = ppir_codegen_vec4_acc_op_eq;
|
||||
break;
|
||||
case ppir_op_ne:
|
||||
f->op = ppir_codegen_vec4_acc_op_ne;
|
||||
break;
|
||||
case ppir_op_select:
|
||||
f->op = ppir_codegen_vec4_acc_op_sel;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
|
||||
index = ppir_target_get_src_reg_index(src);
|
||||
|
||||
if (src->type == ppir_target_pipeline &&
|
||||
src->pipeline == ppir_pipeline_reg_vmul)
|
||||
f->mul_in = true;
|
||||
else
|
||||
f->arg0_source = index >> 2;
|
||||
|
||||
f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
|
||||
f->arg0_absolute = src->absolute;
|
||||
f->arg0_negate = src->negate;
|
||||
|
||||
if (++src < alu->src + alu->num_src) {
|
||||
index = ppir_target_get_src_reg_index(src);
|
||||
f->arg1_source = index >> 2;
|
||||
f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
|
||||
f->arg1_absolute = src->absolute;
|
||||
f->arg1_negate = src->negate;
|
||||
}
|
||||
}
|
||||
|
||||
static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
|
||||
{
|
||||
ppir_codegen_field_float_acc *f = code;
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
|
||||
ppir_dest *dest = &alu->dest;
|
||||
int dest_component = ffs(dest->write_mask) - 1;
|
||||
assert(dest_component >= 0);
|
||||
|
||||
f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
|
||||
f->output_en = true;
|
||||
f->dest_modifier = dest->modifier;
|
||||
|
||||
switch (node->op) {
|
||||
case ppir_op_add:
|
||||
f->op = shift_to_op(alu->shift);
|
||||
break;
|
||||
case ppir_op_mov:
|
||||
f->op = ppir_codegen_float_acc_op_mov;
|
||||
break;
|
||||
case ppir_op_max:
|
||||
f->op = ppir_codegen_float_acc_op_max;
|
||||
break;
|
||||
case ppir_op_min:
|
||||
f->op = ppir_codegen_float_acc_op_min;
|
||||
break;
|
||||
case ppir_op_floor:
|
||||
f->op = ppir_codegen_float_acc_op_floor;
|
||||
break;
|
||||
case ppir_op_fract:
|
||||
f->op = ppir_codegen_float_acc_op_fract;
|
||||
break;
|
||||
case ppir_op_gt:
|
||||
f->op = ppir_codegen_float_acc_op_gt;
|
||||
break;
|
||||
case ppir_op_ge:
|
||||
f->op = ppir_codegen_float_acc_op_ge;
|
||||
break;
|
||||
case ppir_op_eq:
|
||||
f->op = ppir_codegen_float_acc_op_eq;
|
||||
break;
|
||||
case ppir_op_ne:
|
||||
f->op = ppir_codegen_float_acc_op_ne;
|
||||
break;
|
||||
case ppir_op_select:
|
||||
f->op = ppir_codegen_float_acc_op_sel;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
|
||||
if (src->type == ppir_target_pipeline &&
|
||||
src->pipeline == ppir_pipeline_reg_fmul)
|
||||
f->mul_in = true;
|
||||
else
|
||||
f->arg0_source = get_scl_reg_index(src, dest_component);
|
||||
f->arg0_absolute = src->absolute;
|
||||
f->arg0_negate = src->negate;
|
||||
|
||||
if (++src < alu->src + alu->num_src) {
|
||||
f->arg1_source = get_scl_reg_index(src, dest_component);
|
||||
f->arg1_absolute = src->absolute;
|
||||
f->arg1_negate = src->negate;
|
||||
}
|
||||
}
|
||||
|
||||
static void ppir_codegen_encode_combine(ppir_node *node, void *code)
|
||||
{
|
||||
ppir_codegen_field_combine *f = code;
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
|
||||
switch (node->op) {
|
||||
case ppir_op_rsqrt:
|
||||
case ppir_op_log2:
|
||||
case ppir_op_exp2:
|
||||
case ppir_op_rcp:
|
||||
case ppir_op_sqrt:
|
||||
case ppir_op_sin:
|
||||
case ppir_op_cos:
|
||||
{
|
||||
f->scalar.dest_vec = false;
|
||||
f->scalar.arg1_en = false;
|
||||
|
||||
ppir_dest *dest = &alu->dest;
|
||||
int dest_component = ffs(dest->write_mask) - 1;
|
||||
assert(dest_component >= 0);
|
||||
f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
|
||||
f->scalar.dest_modifier = dest->modifier;
|
||||
|
||||
ppir_src *src = alu->src;
|
||||
f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
|
||||
f->scalar.arg0_absolute = src->absolute;
|
||||
f->scalar.arg0_negate = src->negate;
|
||||
|
||||
switch (node->op) {
|
||||
case ppir_op_rsqrt:
|
||||
f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
|
||||
break;
|
||||
case ppir_op_log2:
|
||||
f->scalar.op = ppir_codegen_combine_scalar_op_log2;
|
||||
break;
|
||||
case ppir_op_exp2:
|
||||
f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
|
||||
break;
|
||||
case ppir_op_rcp:
|
||||
f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
|
||||
break;
|
||||
case ppir_op_sqrt:
|
||||
f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
|
||||
break;
|
||||
case ppir_op_sin:
|
||||
f->scalar.op = ppir_codegen_combine_scalar_op_sin;
|
||||
break;
|
||||
case ppir_op_cos:
|
||||
f->scalar.op = ppir_codegen_combine_scalar_op_cos;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
|
||||
{
|
||||
assert(node->op == ppir_op_store_temp);
|
||||
|
||||
ppir_codegen_field_temp_write *f = code;
|
||||
ppir_store_node *snode = ppir_node_to_store(node);
|
||||
int num_components = snode->num_components;
|
||||
|
||||
f->temp_write.dest = 0x03; // 11 - temporary
|
||||
f->temp_write.source = snode->src.reg->index;
|
||||
|
||||
int alignment = num_components == 4 ? 2 : num_components - 1;
|
||||
f->temp_write.alignment = alignment;
|
||||
f->temp_write.index = snode->index << (2 - alignment);
|
||||
|
||||
f->temp_write.offset_reg = snode->index >> 2;
|
||||
}
|
||||
|
||||
static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
|
||||
{
|
||||
for (int i = 0; i < constant->num; i++)
|
||||
code[i] = util_float_to_half(constant->value[i].f);
|
||||
}
|
||||
|
||||
typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
|
||||
|
||||
static const ppir_codegen_instr_slot_encode_func
|
||||
ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
|
||||
[PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
|
||||
[PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
|
||||
[PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
|
||||
[PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
|
||||
[PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
|
||||
[PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
|
||||
[PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
|
||||
[PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
|
||||
[PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
|
||||
};
|
||||
|
||||
static const int ppir_codegen_field_size[] = {
|
||||
34, 62, 41, 43, 30, 44, 31, 30, 41, 73
|
||||
};
|
||||
|
||||
static inline int align_to_word(int size)
|
||||
{
|
||||
return ((size + 0x1f) >> 5);
|
||||
}
|
||||
|
||||
static int get_instr_encode_size(ppir_instr *instr)
|
||||
{
|
||||
int size = 0;
|
||||
|
||||
for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
|
||||
if (instr->slots[i])
|
||||
size += ppir_codegen_field_size[i];
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (instr->constant[i].num)
|
||||
size += 64;
|
||||
}
|
||||
|
||||
return align_to_word(size) + 1;
|
||||
}
|
||||
|
||||
static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
|
||||
{
|
||||
int off1 = dst_offset & 0x1f;
|
||||
uint32_t *cpy_dst = dst, *cpy_src = src;
|
||||
|
||||
cpy_dst += (dst_offset >> 5);
|
||||
|
||||
if (off1) {
|
||||
int off2 = 32 - off1;
|
||||
int cpy_size = 0;
|
||||
while (1) {
|
||||
*cpy_dst |= *cpy_src << off1;
|
||||
cpy_dst++;
|
||||
|
||||
cpy_size += off2;
|
||||
if (cpy_size >= src_size)
|
||||
break;
|
||||
|
||||
*cpy_dst |= *cpy_src >> off2;
|
||||
cpy_src++;
|
||||
|
||||
cpy_size += off1;
|
||||
if (cpy_size >= src_size)
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
|
||||
}
|
||||
|
||||
static int encode_instr(ppir_instr *instr, void *code, void *last_code)
|
||||
{
|
||||
int size = 0;
|
||||
ppir_codegen_ctrl *ctrl = code;
|
||||
|
||||
for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
|
||||
if (instr->slots[i]) {
|
||||
/* max field size (73), align to dword */
|
||||
uint8_t output[12] = {0};
|
||||
|
||||
ppir_codegen_encode_slot[i](instr->slots[i], output);
|
||||
bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
|
||||
|
||||
size += ppir_codegen_field_size[i];
|
||||
ctrl->fields |= 1 << i;
|
||||
}
|
||||
}
|
||||
|
||||
if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
|
||||
ctrl->sync = true;
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (instr->constant[i].num) {
|
||||
uint16_t output[4] = {0};
|
||||
|
||||
ppir_codegen_encode_const(instr->constant + i, output);
|
||||
bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
|
||||
|
||||
size += 64;
|
||||
ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
|
||||
}
|
||||
}
|
||||
|
||||
size = align_to_word(size) + 1;
|
||||
|
||||
ctrl->count = size;
|
||||
if (instr->is_end)
|
||||
ctrl->stop = true;
|
||||
|
||||
if (last_code) {
|
||||
ppir_codegen_ctrl *last_ctrl = last_code;
|
||||
last_ctrl->next_count = size;
|
||||
last_ctrl->prefetch = true;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static void ppir_codegen_print_prog(ppir_compiler *comp)
|
||||
{
|
||||
uint32_t *prog = comp->prog->shader;
|
||||
unsigned offset = 0;
|
||||
|
||||
printf("========ppir codegen========\n");
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
|
||||
printf("%03d: ", instr->index);
|
||||
int n = prog[0] & 0x1f;
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (i && i % 6 == 0)
|
||||
printf("\n ");
|
||||
printf("%08x ", prog[i]);
|
||||
}
|
||||
printf("\n");
|
||||
ppir_disassemble_instr(prog, offset);
|
||||
prog += n;
|
||||
offset += n;
|
||||
}
|
||||
}
|
||||
printf("-----------------------\n");
|
||||
}
|
||||
|
||||
bool ppir_codegen_prog(ppir_compiler *comp)
|
||||
{
|
||||
int size = 0;
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
|
||||
size += get_instr_encode_size(instr);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
|
||||
if (!prog)
|
||||
return false;
|
||||
|
||||
uint32_t *code = prog, *last_code = NULL;
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
|
||||
int offset = encode_instr(instr, code, last_code);
|
||||
last_code = code;
|
||||
code += offset;
|
||||
}
|
||||
}
|
||||
|
||||
comp->prog->shader = prog;
|
||||
comp->prog->shader_size = size * sizeof(uint32_t);
|
||||
|
||||
if (lima_debug & LIMA_DEBUG_PP)
|
||||
ppir_codegen_print_prog(comp);
|
||||
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,359 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
* Copyright (c) 2013 Ben Brewer (ben.brewer@codethink.co.uk)
|
||||
* Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef LIMA_IR_PP_CODEGEN_H
|
||||
#define LIMA_IR_PP_CODEGEN_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
/* Control */
|
||||
|
||||
typedef union __attribute__((__packed__)) {
|
||||
struct __attribute__((__packed__)) {
|
||||
unsigned count : 5;
|
||||
bool stop : 1;
|
||||
bool sync : 1;
|
||||
unsigned fields : 12;
|
||||
unsigned next_count : 6;
|
||||
bool prefetch : 1;
|
||||
unsigned unknown : 6;
|
||||
};
|
||||
uint32_t mask;
|
||||
} ppir_codegen_ctrl;
|
||||
|
||||
typedef enum {
|
||||
ppir_codegen_field_shift_varying = 0,
|
||||
ppir_codegen_field_shift_sampler = 1,
|
||||
ppir_codegen_field_shift_uniform = 2,
|
||||
ppir_codegen_field_shift_vec4_mul = 3,
|
||||
ppir_codegen_field_shift_float_mul = 4,
|
||||
ppir_codegen_field_shift_vec4_acc = 5,
|
||||
ppir_codegen_field_shift_float_acc = 6,
|
||||
ppir_codegen_field_shift_combine = 7,
|
||||
ppir_codegen_field_shift_temp_write = 8,
|
||||
ppir_codegen_field_shift_branch = 9,
|
||||
ppir_codegen_field_shift_vec4_const_0 = 10,
|
||||
ppir_codegen_field_shift_vec4_const_1 = 11,
|
||||
ppir_codegen_field_shift_count = 12,
|
||||
} ppir_codegen_field_shift;
|
||||
|
||||
/* Data Inputs */
|
||||
|
||||
typedef enum {
|
||||
ppir_codegen_vec4_reg_frag_color = 0,
|
||||
ppir_codegen_vec4_reg_constant0 = 12,
|
||||
ppir_codegen_vec4_reg_constant1 = 13,
|
||||
ppir_codegen_vec4_reg_texture = 14,
|
||||
ppir_codegen_vec4_reg_uniform = 15,
|
||||
ppir_codegen_vec4_reg_discard = 15,
|
||||
} ppir_codegen_vec4_reg;
|
||||
|
||||
typedef union __attribute__((__packed__)) {
|
||||
struct __attribute__((__packed__)) {
|
||||
unsigned perspective : 2;
|
||||
unsigned source_type : 2;
|
||||
unsigned unknown_0 : 1; /* = 0 */
|
||||
unsigned alignment : 2;
|
||||
unsigned unknown_1 : 3; /* = 00 0 */
|
||||
unsigned offset_vector : 4;
|
||||
unsigned unknown_2 : 2; /* = 00 */
|
||||
unsigned offset_scalar : 2;
|
||||
unsigned index : 6;
|
||||
ppir_codegen_vec4_reg dest : 4;
|
||||
unsigned mask : 4;
|
||||
unsigned unknown_3 : 2; /* = 00 */
|
||||
} imm;
|
||||
struct __attribute__((__packed__)) {
|
||||
unsigned perspective : 2;
|
||||
unsigned source_type : 2; /* = 01 */
|
||||
unsigned unknown_0 : 2; /* = 00 */
|
||||
bool normalize : 1;
|
||||
unsigned unknown_1 : 3;
|
||||
ppir_codegen_vec4_reg source : 4;
|
||||
bool negate : 1;
|
||||
bool absolute : 1;
|
||||
unsigned swizzle : 8;
|
||||
ppir_codegen_vec4_reg dest : 4;
|
||||
unsigned mask : 4;
|
||||
unsigned unknown_2 : 2; /* = 00 */
|
||||
} reg;
|
||||
} ppir_codegen_field_varying;
|
||||
|
||||
typedef enum {
|
||||
ppir_codegen_sampler_type_2d = 0x00,
|
||||
ppir_codegen_sampler_type_cube = 0x1F,
|
||||
} ppir_codegen_sampler_type;
|
||||
|
||||
typedef struct __attribute__((__packed__)) {
|
||||
unsigned lod_bias : 6;
|
||||
unsigned index_offset : 6;
|
||||
unsigned unknown_0 : 6; /* = 000000 */
|
||||
bool lod_bias_en : 1;
|
||||
unsigned unknown_1 : 5; /* = 00000 */
|
||||
ppir_codegen_sampler_type type : 5;
|
||||
bool offset_en : 1;
|
||||
unsigned index : 12;
|
||||
unsigned unknown_2 : 20; /* = 0011 1001 0000 0000 0001 */
|
||||
} ppir_codegen_field_sampler;
|
||||
|
||||
typedef enum {
|
||||
ppir_codegen_uniform_src_uniform = 0,
|
||||
ppir_codegen_uniform_src_temporary = 3,
|
||||
} ppir_codegen_uniform_src;
|
||||
|
||||
typedef struct __attribute__((__packed__)) {
|
||||
ppir_codegen_uniform_src source : 2;
|
||||
unsigned unknown_0 : 8; /* = 00 0000 00 */
|
||||
unsigned alignment : 2; /* 00: float, 01: vec2, 10: vec4 */
|
||||
unsigned unknown_1 : 6; /* = 00 0000 */
|
||||
unsigned offset_reg : 6;
|
||||
bool offset_en : 1;
|
||||
unsigned index : 16;
|
||||
} ppir_codegen_field_uniform;
|
||||
|
||||
/* Vector Pipe */
|
||||
|
||||
typedef enum {
|
||||
ppir_codegen_vec4_mul_op_not = 0x08, /* Logical Not */
|
||||
ppir_codegen_vec4_mul_op_and = 0x09, /* Logical AND */
|
||||
ppir_codegen_vec4_mul_op_or = 0x0A, /* Logical OR */
|
||||
ppir_codegen_vec4_mul_op_xor = 0x0B, /* Logical XOR */
|
||||
ppir_codegen_vec4_mul_op_ne = 0x0C, /* Not Equal */
|
||||
ppir_codegen_vec4_mul_op_gt = 0x0D, /* Great Than */
|
||||
ppir_codegen_vec4_mul_op_ge = 0x0E, /* Great than or Equal */
|
||||
ppir_codegen_vec4_mul_op_eq = 0x0F, /* Equal */
|
||||
ppir_codegen_vec4_mul_op_min = 0x10, /* Minimum */
|
||||
ppir_codegen_vec4_mul_op_max = 0x11, /* Maximum */
|
||||
ppir_codegen_vec4_mul_op_mov = 0x1F, /* Passthrough, result = arg1 */
|
||||
} ppir_codegen_vec4_mul_op;
|
||||
|
||||
typedef enum {
|
||||
ppir_codegen_outmod_none = 0,
|
||||
ppir_codegen_outmod_clamp_fraction = 1,
|
||||
ppir_codegen_outmod_clamp_positive = 2,
|
||||
ppir_codegen_outmod_round = 3,
|
||||
} ppir_codegen_outmod;
|
||||
|
||||
typedef struct __attribute__((__packed__)) {
|
||||
ppir_codegen_vec4_reg arg0_source : 4;
|
||||
unsigned arg0_swizzle : 8;
|
||||
bool arg0_absolute : 1;
|
||||
bool arg0_negate : 1;
|
||||
ppir_codegen_vec4_reg arg1_source : 4;
|
||||
unsigned arg1_swizzle : 8;
|
||||
bool arg1_absolute : 1;
|
||||
bool arg1_negate : 1;
|
||||
unsigned dest : 4;
|
||||
unsigned mask : 4;
|
||||
ppir_codegen_outmod dest_modifier : 2;
|
||||
ppir_codegen_vec4_mul_op op : 5;
|
||||
} ppir_codegen_field_vec4_mul;
|
||||
|
||||
typedef enum {
|
||||
ppir_codegen_vec4_acc_op_add = 0x00,
|
||||
ppir_codegen_vec4_acc_op_fract = 0x04, /* Fract? */
|
||||
ppir_codegen_vec4_acc_op_ne = 0x08, /* Not Equal */
|
||||
ppir_codegen_vec4_acc_op_gt = 0x09, /* Great-Than */
|
||||
ppir_codegen_vec4_acc_op_ge = 0x0A, /* Great-than or Equal */
|
||||
ppir_codegen_vec4_acc_op_eq = 0x0B, /* Equal */
|
||||
ppir_codegen_vec4_acc_op_floor = 0x0C,
|
||||
ppir_codegen_vec4_acc_op_ceil = 0x0D,
|
||||
ppir_codegen_vec4_acc_op_min = 0x0E,
|
||||
ppir_codegen_vec4_acc_op_max = 0x0F,
|
||||
ppir_codegen_vec4_acc_op_sum3 = 0x10, /* dest.xyzw = (arg0.x + arg0.y + arg0.z) */
|
||||
ppir_codegen_vec4_acc_op_sum4 = 0x11, /* dest.xyzw = (arg0.x + arg0.y + arg0.z + arg0.w) */
|
||||
ppir_codegen_vec4_acc_op_dFdx = 0x14,
|
||||
ppir_codegen_vec4_acc_op_dFdy = 0x15,
|
||||
ppir_codegen_vec4_acc_op_sel = 0x17, /* result = (^fmul ? arg0 : arg1) */
|
||||
ppir_codegen_vec4_acc_op_mov = 0x1F, /* Passthrough, result = arg0 */
|
||||
} ppir_codegen_vec4_acc_op;
|
||||
|
||||
typedef struct __attribute__((__packed__)) {
|
||||
ppir_codegen_vec4_reg arg0_source : 4;
|
||||
unsigned arg0_swizzle : 8;
|
||||
bool arg0_absolute : 1;
|
||||
bool arg0_negate : 1;
|
||||
ppir_codegen_vec4_reg arg1_source : 4;
|
||||
unsigned arg1_swizzle : 8;
|
||||
bool arg1_absolute : 1;
|
||||
bool arg1_negate : 1;
|
||||
unsigned dest : 4;
|
||||
unsigned mask : 4;
|
||||
ppir_codegen_outmod dest_modifier : 2;
|
||||
ppir_codegen_vec4_acc_op op : 5;
|
||||
bool mul_in : 1; /* whether to get arg0 from multiply unit below */
|
||||
} ppir_codegen_field_vec4_acc;
|
||||
|
||||
/* Float (Scalar) Pipe */
|
||||
|
||||
typedef enum {
|
||||
ppir_codegen_float_mul_op_not = 0x08, /* Logical Not */
|
||||
ppir_codegen_float_mul_op_and = 0x09, /* Logical AND */
|
||||
ppir_codegen_float_mul_op_or = 0x0A, /* Logical OR */
|
||||
ppir_codegen_float_mul_op_xor = 0x0B, /* Logical XOR */
|
||||
ppir_codegen_float_mul_op_ne = 0x0C, /* Not Equal */
|
||||
ppir_codegen_float_mul_op_gt = 0x0D, /* Great Than */
|
||||
ppir_codegen_float_mul_op_ge = 0x0E, /* great than or Equal */
|
||||
ppir_codegen_float_mul_op_eq = 0x0F, /* Equal */
|
||||
ppir_codegen_float_mul_op_min = 0x10, /* Minimum */
|
||||
ppir_codegen_float_mul_op_max = 0x11, /* Maximum */
|
||||
ppir_codegen_float_mul_op_mov = 0x1F, /* Passthrough, result = arg1 */
|
||||
} ppir_codegen_float_mul_op;
|
||||
|
||||
typedef struct __attribute__((__packed__)) {
|
||||
unsigned arg0_source : 6;
|
||||
bool arg0_absolute : 1;
|
||||
bool arg0_negate : 1;
|
||||
unsigned arg1_source : 6;
|
||||
bool arg1_absolute : 1;
|
||||
bool arg1_negate : 1;
|
||||
unsigned dest : 6;
|
||||
bool output_en : 1; /* Set to 0 when outputting directly to float_acc below. */
|
||||
ppir_codegen_outmod dest_modifier : 2;
|
||||
ppir_codegen_float_mul_op op : 5;
|
||||
} ppir_codegen_field_float_mul;
|
||||
|
||||
typedef enum {
|
||||
ppir_codegen_float_acc_op_add = 0x00,
|
||||
ppir_codegen_float_acc_op_fract = 0x04,
|
||||
ppir_codegen_float_acc_op_ne = 0x08, /* Not Equal */
|
||||
ppir_codegen_float_acc_op_gt = 0x09, /* Great-Than */
|
||||
ppir_codegen_float_acc_op_ge = 0x0A, /* Great-than or Equal */
|
||||
ppir_codegen_float_acc_op_eq = 0x0B, /* Equal */
|
||||
ppir_codegen_float_acc_op_floor = 0x0C,
|
||||
ppir_codegen_float_acc_op_ceil = 0x0D,
|
||||
ppir_codegen_float_acc_op_min = 0x0E,
|
||||
ppir_codegen_float_acc_op_max = 0x0F,
|
||||
ppir_codegen_float_acc_op_dFdx = 0x14,
|
||||
ppir_codegen_float_acc_op_dFdy = 0x15,
|
||||
ppir_codegen_float_acc_op_sel = 0x17, /* result = (^fmul ? arg0 : arg1) */
|
||||
ppir_codegen_float_acc_op_mov = 0x1F, /* Passthrough, result = arg1 */
|
||||
} ppir_codegen_float_acc_op;
|
||||
|
||||
typedef struct __attribute__((__packed__)) {
|
||||
unsigned arg0_source : 6;
|
||||
bool arg0_absolute : 1;
|
||||
bool arg0_negate : 1;
|
||||
unsigned arg1_source : 6;
|
||||
bool arg1_absolute : 1;
|
||||
bool arg1_negate : 1;
|
||||
unsigned dest : 6;
|
||||
bool output_en : 1; /* Always true */
|
||||
ppir_codegen_outmod dest_modifier : 2;
|
||||
ppir_codegen_float_acc_op op : 5;
|
||||
bool mul_in : 1; /* Get arg1 from float_mul above. */
|
||||
} ppir_codegen_field_float_acc;
|
||||
|
||||
/* Temporary Write / Framebuffer Read */
|
||||
|
||||
typedef union __attribute__((__packed__)) {
|
||||
struct __attribute__((__packed__)) {
|
||||
unsigned dest : 2; /* = 11 */
|
||||
unsigned unknown_0 : 2; /* = 00 */
|
||||
unsigned source : 6;
|
||||
unsigned alignment : 2; /* 0: float, 1:vec2, 2: vec4 */
|
||||
unsigned unknown_1 : 6; /* = 00 0000 */
|
||||
unsigned offset_reg : 6;
|
||||
bool offset_en : 1;
|
||||
unsigned index : 16;
|
||||
} temp_write;
|
||||
struct __attribute__((__packed__)) {
|
||||
bool source : 1; /* 0 = fb_depth, 1 = fb_color */
|
||||
unsigned unknown_0 : 5; /* = 00 111 */
|
||||
unsigned dest : 4;
|
||||
unsigned unknown_1 : 31; /* = 0 0000 ... 10 */
|
||||
} fb_read;
|
||||
} ppir_codegen_field_temp_write;
|
||||
|
||||
/* Result combiner */
|
||||
|
||||
typedef enum {
|
||||
ppir_codegen_combine_scalar_op_rcp = 0, /* Reciprocal */
|
||||
ppir_codegen_combine_scalar_op_mov = 1, /* No Operation */
|
||||
ppir_codegen_combine_scalar_op_sqrt = 2, /* Square-Root */
|
||||
ppir_codegen_combine_scalar_op_rsqrt = 3, /* Inverse Square-Root */
|
||||
ppir_codegen_combine_scalar_op_exp2 = 4, /* Binary Exponent */
|
||||
ppir_codegen_combine_scalar_op_log2 = 5, /* Binary Logarithm */
|
||||
ppir_codegen_combine_scalar_op_sin = 6, /* Sine (Scaled LUT) */
|
||||
ppir_codegen_combine_scalar_op_cos = 7, /* Cosine (Scaled LUT) */
|
||||
ppir_codegen_combine_scalar_op_atan = 8, /* Arc Tangent Part 1 */
|
||||
ppir_codegen_combine_scalar_op_atan2 = 9, /* Arc Tangent 2 Part 1 */
|
||||
} ppir_codegen_combine_scalar_op;
|
||||
|
||||
typedef union __attribute__((__packed__)) {
|
||||
struct __attribute__((__packed__)) {
|
||||
bool dest_vec : 1;
|
||||
bool arg1_en : 1;
|
||||
ppir_codegen_combine_scalar_op op : 4;
|
||||
bool arg1_absolute : 1;
|
||||
bool arg1_negate : 1;
|
||||
unsigned arg1_src : 6;
|
||||
bool arg0_absolute : 1;
|
||||
bool arg0_negate : 1;
|
||||
unsigned arg0_src : 6;
|
||||
ppir_codegen_outmod dest_modifier : 2;
|
||||
unsigned dest : 6;
|
||||
} scalar;
|
||||
struct __attribute__((__packed__)) {
|
||||
bool dest_vec : 1;
|
||||
bool arg1_en : 1;
|
||||
unsigned arg1_swizzle : 8;
|
||||
unsigned arg1_source : 4;
|
||||
unsigned padding_0 : 8;
|
||||
unsigned mask : 4;
|
||||
unsigned dest : 4;
|
||||
} vector;
|
||||
} ppir_codegen_field_combine;
|
||||
|
||||
/* Branch/Control Flow */
|
||||
|
||||
#define PPIR_CODEGEN_DISCARD_WORD0 0x007F0003
|
||||
#define PPIR_CODEGEN_DISCARD_WORD1 0x00000000
|
||||
#define PPIR_CODEGEN_DISCARD_WORD2 0x000
|
||||
|
||||
typedef union __attribute__((__packed__)) {
|
||||
struct __attribute__((__packed__)) {
|
||||
unsigned unknown_0 : 4; /* = 0000 */
|
||||
unsigned arg1_source : 6;
|
||||
unsigned arg0_source : 6;
|
||||
bool cond_gt : 1;
|
||||
bool cond_eq : 1;
|
||||
bool cond_lt : 1;
|
||||
unsigned unknown_1 : 22; /* = 0 0000 0000 0000 0000 0000 0 */
|
||||
signed target : 27;
|
||||
unsigned unknown_2 : 5; /* = 0 0011 */
|
||||
} branch;
|
||||
struct __attribute__((__packed__)) {
|
||||
unsigned word0 : 32;
|
||||
unsigned word1 : 32;
|
||||
unsigned word2 : 9;
|
||||
} discard;
|
||||
} ppir_codegen_field_branch;
|
||||
|
||||
void ppir_disassemble_instr(uint32_t *instr, unsigned offset);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,776 @@
|
|||
/*
|
||||
* Copyright (c) 2018 Lima Project
|
||||
*
|
||||
* Copyright (c) 2013 Codethink (http://www.codethink.co.uk)
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/u_half.h"
|
||||
|
||||
#include "ppir.h"
|
||||
#include "codegen.h"
|
||||
|
||||
typedef struct {
|
||||
char *name;
|
||||
unsigned srcs;
|
||||
} asm_op;
|
||||
|
||||
static void
|
||||
print_swizzle(uint8_t swizzle)
|
||||
{
|
||||
if (swizzle == 0xE4)
|
||||
return;
|
||||
|
||||
printf(".");
|
||||
for (unsigned i = 0; i < 4; i++, swizzle >>= 2)
|
||||
printf("%c", "xyzw"[swizzle & 3]);
|
||||
}
|
||||
|
||||
static void
|
||||
print_mask(uint8_t mask)
|
||||
{
|
||||
if (mask == 0xF)
|
||||
return;
|
||||
|
||||
printf(".");
|
||||
if (mask & 1) printf("x");
|
||||
if (mask & 2) printf("y");
|
||||
if (mask & 4) printf("z");
|
||||
if (mask & 8) printf("w");
|
||||
}
|
||||
|
||||
static void
|
||||
print_reg(ppir_codegen_vec4_reg reg, const char *special)
|
||||
{
|
||||
if (special) {
|
||||
printf("%s", special);
|
||||
} else {
|
||||
switch (reg)
|
||||
{
|
||||
case ppir_codegen_vec4_reg_constant0:
|
||||
printf("^const0");
|
||||
break;
|
||||
case ppir_codegen_vec4_reg_constant1:
|
||||
printf("^const1");
|
||||
break;
|
||||
case ppir_codegen_vec4_reg_texture:
|
||||
printf("^texture");
|
||||
break;
|
||||
case ppir_codegen_vec4_reg_uniform:
|
||||
printf("^uniform");
|
||||
break;
|
||||
default:
|
||||
printf("$%u", reg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_vector_source(ppir_codegen_vec4_reg reg, const char *special,
|
||||
uint8_t swizzle, bool abs, bool neg)
|
||||
{
|
||||
if (neg)
|
||||
printf("-");
|
||||
if (abs)
|
||||
printf("abs(");
|
||||
|
||||
print_reg(reg, special);
|
||||
print_swizzle(swizzle);
|
||||
|
||||
if (abs)
|
||||
printf(")");
|
||||
}
|
||||
|
||||
static void
|
||||
print_source_scalar(unsigned reg, const char *special, bool abs, bool neg)
|
||||
{
|
||||
if (neg)
|
||||
printf("-");
|
||||
if (abs)
|
||||
printf("abs(");
|
||||
|
||||
print_reg(reg >> 2, special);
|
||||
if (!special)
|
||||
printf(".%c", "xyzw"[reg & 3]);
|
||||
|
||||
if (abs)
|
||||
printf(")");
|
||||
}
|
||||
|
||||
static void
|
||||
print_outmod(ppir_codegen_outmod modifier)
|
||||
{
|
||||
switch (modifier)
|
||||
{
|
||||
case ppir_codegen_outmod_clamp_fraction:
|
||||
printf(".sat");
|
||||
break;
|
||||
case ppir_codegen_outmod_clamp_positive:
|
||||
printf(".pos");
|
||||
break;
|
||||
case ppir_codegen_outmod_round:
|
||||
printf(".int");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_dest_scalar(unsigned reg)
|
||||
{
|
||||
printf("$%u", reg >> 2);
|
||||
printf(".%c ", "xyzw"[reg & 3]);
|
||||
}
|
||||
|
||||
static void
|
||||
print_const(unsigned const_num, uint16_t *val)
|
||||
{
|
||||
printf("const%u", const_num);
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
printf(" %f", util_half_to_float(val[i]));
|
||||
}
|
||||
|
||||
static void
|
||||
print_const0(void *code, unsigned offset)
|
||||
{
|
||||
(void) offset;
|
||||
|
||||
print_const(0, code);
|
||||
}
|
||||
|
||||
static void
|
||||
print_const1(void *code, unsigned offset)
|
||||
{
|
||||
(void) offset;
|
||||
|
||||
print_const(1, code);
|
||||
}
|
||||
|
||||
static void
|
||||
print_varying(void *code, unsigned offset)
|
||||
{
|
||||
(void) offset;
|
||||
ppir_codegen_field_varying *varying = code;
|
||||
|
||||
printf("load");
|
||||
|
||||
bool perspective = varying->imm.source_type < 2 && varying->imm.perspective;
|
||||
if (perspective)
|
||||
{
|
||||
printf(".perspective");
|
||||
switch (varying->imm.perspective)
|
||||
{
|
||||
case 2:
|
||||
printf(".z");
|
||||
break;
|
||||
case 3:
|
||||
printf(".w");
|
||||
break;
|
||||
default:
|
||||
printf(".unknown");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
printf(".v ");
|
||||
|
||||
switch (varying->imm.dest)
|
||||
{
|
||||
case ppir_codegen_vec4_reg_discard:
|
||||
printf("^discard");
|
||||
break;
|
||||
default:
|
||||
printf("$%u", varying->imm.dest);
|
||||
break;
|
||||
}
|
||||
print_mask(varying->imm.mask);
|
||||
printf(" ");
|
||||
|
||||
switch (varying->imm.source_type) {
|
||||
case 1:
|
||||
print_vector_source(varying->reg.source, NULL, varying->reg.swizzle,
|
||||
varying->reg.absolute, varying->reg.negate);
|
||||
break;
|
||||
case 2:
|
||||
printf("gl_FragCoord");
|
||||
break;
|
||||
case 3:
|
||||
if (varying->imm.perspective)
|
||||
printf("gl_FrontFacing");
|
||||
else
|
||||
printf("gl_PointCoord");
|
||||
break;
|
||||
default:
|
||||
switch (varying->imm.alignment) {
|
||||
case 0:
|
||||
printf("%u.%c", varying->imm.index >> 2,
|
||||
"xyzw"[varying->imm.index & 3]);
|
||||
break;
|
||||
case 1: {
|
||||
const char *c[2] = {"xy", "zw"};
|
||||
printf("%u.%s", varying->imm.index >> 1, c[varying->imm.index & 1]);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
printf("%u", varying->imm.index);
|
||||
break;
|
||||
}
|
||||
|
||||
if (varying->imm.offset_vector != 15) {
|
||||
unsigned reg = (varying->imm.offset_vector << 2) +
|
||||
varying->imm.offset_scalar;
|
||||
printf("+");
|
||||
print_source_scalar(reg, NULL, false, false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_sampler(void *code, unsigned offset)
|
||||
{
|
||||
(void) offset;
|
||||
ppir_codegen_field_sampler *sampler = code;
|
||||
|
||||
printf("texld");
|
||||
if (sampler->lod_bias_en)
|
||||
printf(".b");
|
||||
|
||||
switch (sampler->type) {
|
||||
case ppir_codegen_sampler_type_2d:
|
||||
printf(".2d");
|
||||
break;
|
||||
case ppir_codegen_sampler_type_cube:
|
||||
printf(".cube");
|
||||
break;
|
||||
default:
|
||||
printf("_t%u", sampler->type);
|
||||
break;
|
||||
}
|
||||
|
||||
printf(" %u", sampler->index);
|
||||
|
||||
if (sampler->offset_en)
|
||||
{
|
||||
printf("+");
|
||||
print_source_scalar(sampler->index_offset, NULL, false, false);
|
||||
}
|
||||
|
||||
if (sampler->lod_bias_en)
|
||||
{
|
||||
printf(" ");
|
||||
print_source_scalar(sampler->lod_bias, NULL, false, false);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_uniform(void *code, unsigned offset)
|
||||
{
|
||||
(void) offset;
|
||||
ppir_codegen_field_uniform *uniform = code;
|
||||
|
||||
printf("load.");
|
||||
|
||||
switch (uniform->source) {
|
||||
case ppir_codegen_uniform_src_uniform:
|
||||
printf("u");
|
||||
break;
|
||||
case ppir_codegen_uniform_src_temporary:
|
||||
printf("t");
|
||||
break;
|
||||
default:
|
||||
printf(".u%u", uniform->source);
|
||||
break;
|
||||
}
|
||||
|
||||
if (uniform->alignment)
|
||||
printf(" %u", uniform->index);
|
||||
else
|
||||
printf(" %u.%c", uniform->index >> 2, "xyzw"[uniform->index & 3]);
|
||||
|
||||
if (uniform->offset_en) {
|
||||
printf(" ");
|
||||
print_source_scalar(uniform->offset_reg, NULL, false, false);
|
||||
}
|
||||
}
|
||||
|
||||
#define CASE(_name, _srcs) \
|
||||
[ppir_codegen_vec4_mul_op_##_name] = { \
|
||||
.name = #_name, \
|
||||
.srcs = _srcs \
|
||||
}
|
||||
|
||||
static const asm_op vec4_mul_ops[] = {
|
||||
[0 ... 7] = {
|
||||
.name = "mul",
|
||||
.srcs = 2
|
||||
},
|
||||
CASE(not, 1),
|
||||
CASE(and, 2),
|
||||
CASE(or, 2),
|
||||
CASE(xor, 2),
|
||||
CASE(ne, 2),
|
||||
CASE(gt, 2),
|
||||
CASE(ge, 2),
|
||||
CASE(eq, 2),
|
||||
CASE(min, 2),
|
||||
CASE(max, 2),
|
||||
CASE(mov, 1),
|
||||
};
|
||||
|
||||
#undef CASE
|
||||
|
||||
static void
|
||||
print_vec4_mul(void *code, unsigned offset)
|
||||
{
|
||||
(void) offset;
|
||||
ppir_codegen_field_vec4_mul *vec4_mul = code;
|
||||
|
||||
asm_op op = vec4_mul_ops[vec4_mul->op];
|
||||
|
||||
if (op.name)
|
||||
printf("%s", op.name);
|
||||
else
|
||||
printf("op%u", vec4_mul->op);
|
||||
print_outmod(vec4_mul->dest_modifier);
|
||||
printf(".v0 ");
|
||||
|
||||
if (vec4_mul->mask) {
|
||||
printf("$%u", vec4_mul->dest);
|
||||
print_mask(vec4_mul->mask);
|
||||
printf(" ");
|
||||
}
|
||||
|
||||
print_vector_source(vec4_mul->arg0_source, NULL,
|
||||
vec4_mul->arg0_swizzle,
|
||||
vec4_mul->arg0_absolute,
|
||||
vec4_mul->arg0_negate);
|
||||
|
||||
if (vec4_mul->op < 8 && vec4_mul->op != 0) {
|
||||
printf("<<%u", vec4_mul->op);
|
||||
}
|
||||
|
||||
printf(" ");
|
||||
|
||||
if (op.srcs > 1) {
|
||||
print_vector_source(vec4_mul->arg1_source, NULL,
|
||||
vec4_mul->arg1_swizzle,
|
||||
vec4_mul->arg1_absolute,
|
||||
vec4_mul->arg1_negate);
|
||||
}
|
||||
}
|
||||
|
||||
#define CASE(_name, _srcs) \
|
||||
[ppir_codegen_vec4_acc_op_##_name] = { \
|
||||
.name = #_name, \
|
||||
.srcs = _srcs \
|
||||
}
|
||||
|
||||
static const asm_op vec4_acc_ops[] = {
|
||||
CASE(add, 2),
|
||||
CASE(fract, 1),
|
||||
CASE(ne, 2),
|
||||
CASE(gt, 2),
|
||||
CASE(ge, 2),
|
||||
CASE(eq, 2),
|
||||
CASE(floor, 1),
|
||||
CASE(ceil, 1),
|
||||
CASE(min, 2),
|
||||
CASE(max, 2),
|
||||
CASE(sum3, 1),
|
||||
CASE(sum4, 1),
|
||||
CASE(dFdx, 2),
|
||||
CASE(dFdy, 2),
|
||||
CASE(sel, 2),
|
||||
CASE(mov, 1),
|
||||
};
|
||||
|
||||
#undef CASE
|
||||
|
||||
static void
|
||||
print_vec4_acc(void *code, unsigned offset)
|
||||
{
|
||||
(void) offset;
|
||||
ppir_codegen_field_vec4_acc *vec4_acc = code;
|
||||
|
||||
asm_op op = vec4_acc_ops[vec4_acc->op];
|
||||
|
||||
if (op.name)
|
||||
printf("%s", op.name);
|
||||
else
|
||||
printf("op%u", vec4_acc->op);
|
||||
print_outmod(vec4_acc->dest_modifier);
|
||||
printf(".v1 ");
|
||||
|
||||
if (vec4_acc->mask) {
|
||||
printf("$%u", vec4_acc->dest);
|
||||
print_mask(vec4_acc->mask);
|
||||
printf(" ");
|
||||
}
|
||||
|
||||
print_vector_source(vec4_acc->arg0_source, vec4_acc->mul_in ? "^v0" : NULL,
|
||||
vec4_acc->arg0_swizzle,
|
||||
vec4_acc->arg0_absolute,
|
||||
vec4_acc->arg0_negate);
|
||||
|
||||
if (op.srcs > 1) {
|
||||
printf(" ");
|
||||
print_vector_source(vec4_acc->arg1_source, NULL,
|
||||
vec4_acc->arg1_swizzle,
|
||||
vec4_acc->arg1_absolute,
|
||||
vec4_acc->arg1_negate);
|
||||
}
|
||||
}
|
||||
|
||||
#define CASE(_name, _srcs) \
|
||||
[ppir_codegen_float_mul_op_##_name] = { \
|
||||
.name = #_name, \
|
||||
.srcs = _srcs \
|
||||
}
|
||||
|
||||
static const asm_op float_mul_ops[] = {
|
||||
[0 ... 7] = {
|
||||
.name = "mul",
|
||||
.srcs = 2
|
||||
},
|
||||
CASE(not, 1),
|
||||
CASE(and, 2),
|
||||
CASE(or, 2),
|
||||
CASE(xor, 2),
|
||||
CASE(ne, 2),
|
||||
CASE(gt, 2),
|
||||
CASE(ge, 2),
|
||||
CASE(eq, 2),
|
||||
CASE(min, 2),
|
||||
CASE(max, 2),
|
||||
CASE(mov, 1),
|
||||
};
|
||||
|
||||
#undef CASE
|
||||
|
||||
static void
|
||||
print_float_mul(void *code, unsigned offset)
|
||||
{
|
||||
(void) offset;
|
||||
ppir_codegen_field_float_mul *float_mul = code;
|
||||
|
||||
asm_op op = float_mul_ops[float_mul->op];
|
||||
|
||||
if (op.name)
|
||||
printf("%s", op.name);
|
||||
else
|
||||
printf("op%u", float_mul->op);
|
||||
print_outmod(float_mul->dest_modifier);
|
||||
printf(".s0 ");
|
||||
|
||||
if (float_mul->output_en)
|
||||
print_dest_scalar(float_mul->dest);
|
||||
|
||||
print_source_scalar(float_mul->arg0_source, NULL,
|
||||
float_mul->arg0_absolute,
|
||||
float_mul->arg0_negate);
|
||||
|
||||
if (float_mul->op < 8 && float_mul->op != 0) {
|
||||
printf("<<%u", float_mul->op);
|
||||
}
|
||||
|
||||
if (op.srcs > 1) {
|
||||
printf(" ");
|
||||
|
||||
print_source_scalar(float_mul->arg1_source, NULL,
|
||||
float_mul->arg1_absolute,
|
||||
float_mul->arg1_negate);
|
||||
}
|
||||
}
|
||||
|
||||
#define CASE(_name, _srcs) \
|
||||
[ppir_codegen_float_acc_op_##_name] = { \
|
||||
.name = #_name, \
|
||||
.srcs = _srcs \
|
||||
}
|
||||
|
||||
static const asm_op float_acc_ops[] = {
|
||||
CASE(add, 2),
|
||||
CASE(fract, 1),
|
||||
CASE(ne, 2),
|
||||
CASE(gt, 2),
|
||||
CASE(ge, 2),
|
||||
CASE(eq, 2),
|
||||
CASE(floor, 1),
|
||||
CASE(ceil, 1),
|
||||
CASE(min, 2),
|
||||
CASE(max, 2),
|
||||
CASE(dFdx, 2),
|
||||
CASE(dFdy, 2),
|
||||
CASE(sel, 2),
|
||||
CASE(mov, 1),
|
||||
};
|
||||
|
||||
#undef CASE
|
||||
|
||||
static void
|
||||
print_float_acc(void *code, unsigned offset)
|
||||
{
|
||||
(void) offset;
|
||||
ppir_codegen_field_float_acc *float_acc = code;
|
||||
|
||||
asm_op op = float_acc_ops[float_acc->op];
|
||||
|
||||
if (op.name)
|
||||
printf("%s", op.name);
|
||||
else
|
||||
printf("op%u", float_acc->op);
|
||||
print_outmod(float_acc->dest_modifier);
|
||||
printf(".s1 ");
|
||||
|
||||
if (float_acc->output_en)
|
||||
print_dest_scalar(float_acc->dest);
|
||||
|
||||
print_source_scalar(float_acc->arg0_source, float_acc->mul_in ? "^s0" : NULL,
|
||||
float_acc->arg0_absolute,
|
||||
float_acc->arg0_negate);
|
||||
|
||||
if (op.srcs > 1) {
|
||||
printf(" ");
|
||||
print_source_scalar(float_acc->arg1_source, NULL,
|
||||
float_acc->arg1_absolute,
|
||||
float_acc->arg1_negate);
|
||||
}
|
||||
}
|
||||
|
||||
#define CASE(_name, _srcs) \
|
||||
[ppir_codegen_combine_scalar_op_##_name] = { \
|
||||
.name = #_name, \
|
||||
.srcs = _srcs \
|
||||
}
|
||||
|
||||
static const asm_op combine_ops[] = {
|
||||
CASE(rcp, 1),
|
||||
CASE(mov, 1),
|
||||
CASE(sqrt, 1),
|
||||
CASE(rsqrt, 1),
|
||||
CASE(exp2, 1),
|
||||
CASE(log2, 1),
|
||||
CASE(sin, 1),
|
||||
CASE(cos, 1),
|
||||
CASE(atan, 1),
|
||||
CASE(atan2, 1),
|
||||
};
|
||||
|
||||
#undef CASE
|
||||
|
||||
static void
|
||||
print_combine(void *code, unsigned offset)
|
||||
{
|
||||
(void) offset;
|
||||
ppir_codegen_field_combine *combine = code;
|
||||
|
||||
if (combine->scalar.dest_vec &&
|
||||
combine->scalar.arg1_en) {
|
||||
/* This particular combination can only be valid for scalar * vector
|
||||
* multiplies, and the opcode field is reused for something else.
|
||||
*/
|
||||
printf("mul");
|
||||
} else {
|
||||
asm_op op = combine_ops[combine->scalar.op];
|
||||
|
||||
if (op.name)
|
||||
printf("%s", op.name);
|
||||
else
|
||||
printf("op%u", combine->scalar.op);
|
||||
}
|
||||
|
||||
if (!combine->scalar.dest_vec)
|
||||
print_outmod(combine->scalar.dest_modifier);
|
||||
printf(".s2 ");
|
||||
|
||||
if (combine->scalar.dest_vec) {
|
||||
printf("$%u", combine->vector.dest);
|
||||
print_mask(combine->vector.mask);
|
||||
} else {
|
||||
print_dest_scalar(combine->scalar.dest);
|
||||
}
|
||||
printf(" ");
|
||||
|
||||
print_source_scalar(combine->scalar.arg0_src, NULL,
|
||||
combine->scalar.arg0_absolute,
|
||||
combine->scalar.arg0_negate);
|
||||
printf(" ");
|
||||
|
||||
if (combine->scalar.arg1_en) {
|
||||
if (combine->scalar.dest_vec) {
|
||||
print_vector_source(combine->vector.arg1_source, NULL,
|
||||
combine->vector.arg1_swizzle,
|
||||
false, false);
|
||||
} else {
|
||||
print_source_scalar(combine->scalar.arg1_src, NULL,
|
||||
combine->scalar.arg1_absolute,
|
||||
combine->scalar.arg1_negate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_temp_write(void *code, unsigned offset)
|
||||
{
|
||||
(void) offset;
|
||||
ppir_codegen_field_temp_write *temp_write = code;
|
||||
|
||||
if (temp_write->fb_read.unknown_0 == 0x7) {
|
||||
if (temp_write->fb_read.source)
|
||||
printf("fb_color");
|
||||
else
|
||||
printf("fb_depth");
|
||||
printf(" $%u", temp_write->fb_read.dest);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
printf("store.t");
|
||||
|
||||
if (temp_write->temp_write.alignment) {
|
||||
printf(" %u", temp_write->temp_write.index);
|
||||
} else {
|
||||
printf(" %u.%c", temp_write->temp_write.index >> 2,
|
||||
"xyzw"[temp_write->temp_write.index & 3]);
|
||||
}
|
||||
|
||||
if (temp_write->temp_write.offset_en) {
|
||||
printf("+");
|
||||
print_source_scalar(temp_write->temp_write.offset_reg,
|
||||
NULL, false, false);
|
||||
}
|
||||
|
||||
printf(" ");
|
||||
|
||||
if (temp_write->temp_write.alignment) {
|
||||
print_reg(temp_write->temp_write.source >> 2, NULL);
|
||||
} else {
|
||||
print_source_scalar(temp_write->temp_write.source, NULL, false, false);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_branch(void *code, unsigned offset)
|
||||
{
|
||||
ppir_codegen_field_branch *branch = code;
|
||||
|
||||
if (branch->discard.word0 == PPIR_CODEGEN_DISCARD_WORD0 &&
|
||||
branch->discard.word1 == PPIR_CODEGEN_DISCARD_WORD1 &&
|
||||
branch->discard.word2 == PPIR_CODEGEN_DISCARD_WORD2) {
|
||||
printf("discard");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
const char* cond[] = {
|
||||
"nv", "lt", "eq", "le",
|
||||
"gt", "ne", "ge", "" ,
|
||||
};
|
||||
|
||||
unsigned cond_mask = 0;
|
||||
cond_mask |= (branch->branch.cond_lt ? 1 : 0);
|
||||
cond_mask |= (branch->branch.cond_eq ? 2 : 0);
|
||||
cond_mask |= (branch->branch.cond_gt ? 4 : 0);
|
||||
printf("branch");
|
||||
if (cond_mask != 0x7) {
|
||||
printf(".%s ", cond[cond_mask]);
|
||||
print_source_scalar(branch->branch.arg0_source, NULL, false, false);
|
||||
printf(" ");
|
||||
print_source_scalar(branch->branch.arg1_source, NULL, false, false);
|
||||
}
|
||||
|
||||
printf(" %d", branch->branch.target + offset);
|
||||
}
|
||||
|
||||
typedef void (*print_field_func)(void *, unsigned);
|
||||
|
||||
static const print_field_func print_field[ppir_codegen_field_shift_count] = {
|
||||
[ppir_codegen_field_shift_varying] = print_varying,
|
||||
[ppir_codegen_field_shift_sampler] = print_sampler,
|
||||
[ppir_codegen_field_shift_uniform] = print_uniform,
|
||||
[ppir_codegen_field_shift_vec4_mul] = print_vec4_mul,
|
||||
[ppir_codegen_field_shift_float_mul] = print_float_mul,
|
||||
[ppir_codegen_field_shift_vec4_acc] = print_vec4_acc,
|
||||
[ppir_codegen_field_shift_float_acc] = print_float_acc,
|
||||
[ppir_codegen_field_shift_combine] = print_combine,
|
||||
[ppir_codegen_field_shift_temp_write] = print_temp_write,
|
||||
[ppir_codegen_field_shift_branch] = print_branch,
|
||||
[ppir_codegen_field_shift_vec4_const_0] = print_const0,
|
||||
[ppir_codegen_field_shift_vec4_const_1] = print_const1,
|
||||
};
|
||||
|
||||
static const int ppir_codegen_field_size[] = {
|
||||
34, 62, 41, 43, 30, 44, 31, 30, 41, 73, 64, 64
|
||||
};
|
||||
|
||||
static void
|
||||
bitcopy(char *src, char *dst, unsigned bits, unsigned src_offset)
|
||||
{
|
||||
src += src_offset / 8;
|
||||
src_offset %= 8;
|
||||
|
||||
for (int b = bits; b > 0; b -= 8, src++, dst++) {
|
||||
unsigned char out = ((unsigned char) *src) >> src_offset;
|
||||
if (src_offset > 0 && src_offset + b > 8)
|
||||
out |= ((unsigned char) *(src + 1)) << (8 - src_offset);
|
||||
*dst = (char) out;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ppir_disassemble_instr(uint32_t *instr, unsigned offset)
|
||||
{
|
||||
ppir_codegen_ctrl *ctrl = (ppir_codegen_ctrl *) instr;
|
||||
|
||||
char *instr_code = (char *) (instr + 1);
|
||||
unsigned bit_offset = 0;
|
||||
bool first = true;
|
||||
for (unsigned i = 0; i < ppir_codegen_field_shift_count; i++) {
|
||||
char code[12];
|
||||
|
||||
if (!((ctrl->fields >> i) & 1))
|
||||
continue;
|
||||
|
||||
unsigned bits = ppir_codegen_field_size[i];
|
||||
bitcopy(instr_code, code, bits, bit_offset);
|
||||
|
||||
if (first)
|
||||
first = false;
|
||||
else
|
||||
printf(", ");
|
||||
|
||||
print_field[i](code, offset);
|
||||
|
||||
bit_offset += bits;
|
||||
}
|
||||
|
||||
if (ctrl->sync)
|
||||
printf(", sync");
|
||||
if (ctrl->stop)
|
||||
printf(", stop");
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
|
@ -0,0 +1,311 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/ralloc.h"
|
||||
|
||||
#include "ppir.h"
|
||||
|
||||
ppir_instr *ppir_instr_create(ppir_block *block)
|
||||
{
|
||||
ppir_instr *instr = rzalloc(block, ppir_instr);
|
||||
if (!instr)
|
||||
return NULL;
|
||||
|
||||
list_inithead(&instr->succ_list);
|
||||
list_inithead(&instr->pred_list);
|
||||
|
||||
instr->index = block->comp->cur_instr_index++;
|
||||
instr->reg_pressure = -1;
|
||||
|
||||
list_addtail(&instr->list, &block->instr_list);
|
||||
return instr;
|
||||
}
|
||||
|
||||
void ppir_instr_add_dep(ppir_instr *succ, ppir_instr *pred)
|
||||
{
|
||||
/* don't add duplicated instr */
|
||||
ppir_instr_foreach_pred(succ, dep) {
|
||||
if (pred == dep->pred)
|
||||
return;
|
||||
}
|
||||
|
||||
ppir_dep *dep = ralloc(succ, ppir_dep);
|
||||
dep->pred = pred;
|
||||
dep->succ = succ;
|
||||
list_addtail(&dep->pred_link, &succ->pred_list);
|
||||
list_addtail(&dep->succ_link, &pred->succ_list);
|
||||
}
|
||||
|
||||
void ppir_instr_insert_mul_node(ppir_node *add, ppir_node *mul)
|
||||
{
|
||||
ppir_instr *instr = add->instr;
|
||||
int pos = mul->instr_pos;
|
||||
int *slots = ppir_op_infos[mul->op].slots;
|
||||
|
||||
for (int i = 0; slots[i] != PPIR_INSTR_SLOT_END; i++) {
|
||||
/* possible to insert at required place */
|
||||
if (slots[i] == pos) {
|
||||
if (!instr->slots[pos]) {
|
||||
ppir_alu_node *add_alu = ppir_node_to_alu(add);
|
||||
ppir_alu_node *mul_alu = ppir_node_to_alu(mul);
|
||||
ppir_dest *dest = &mul_alu->dest;
|
||||
int pipeline = pos == PPIR_INSTR_SLOT_ALU_VEC_MUL ?
|
||||
ppir_pipeline_reg_vmul : ppir_pipeline_reg_fmul;
|
||||
|
||||
/* ^vmul/^fmul can't be used as last arg */
|
||||
if (add_alu->num_src > 1) {
|
||||
ppir_src *last_src = add_alu->src + add_alu->num_src - 1;
|
||||
if (ppir_node_target_equal(last_src, dest))
|
||||
return;
|
||||
}
|
||||
|
||||
/* update add node src to use pipeline reg */
|
||||
ppir_src *src = add_alu->src;
|
||||
if (add_alu->num_src == 3) {
|
||||
if (ppir_node_target_equal(src, dest)) {
|
||||
src->type = ppir_target_pipeline;
|
||||
src->pipeline = pipeline;
|
||||
}
|
||||
|
||||
if (ppir_node_target_equal(++src, dest)) {
|
||||
src->type = ppir_target_pipeline;
|
||||
src->pipeline = pipeline;
|
||||
}
|
||||
}
|
||||
else {
|
||||
assert(ppir_node_target_equal(src, dest));
|
||||
src->type = ppir_target_pipeline;
|
||||
src->pipeline = pipeline;
|
||||
}
|
||||
|
||||
/* update mul node dest to output to pipeline reg */
|
||||
dest->type = ppir_target_pipeline;
|
||||
dest->pipeline = pipeline;
|
||||
|
||||
instr->slots[pos] = mul;
|
||||
mul->instr = instr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check whether a const slot fix into another const slot */
|
||||
static bool ppir_instr_insert_const(ppir_const *dst, const ppir_const *src,
|
||||
uint8_t *swizzle)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < src->num; i++) {
|
||||
for (j = 0; j < dst->num; j++) {
|
||||
if (src->value[i].ui == dst->value[j].ui)
|
||||
break;
|
||||
}
|
||||
|
||||
if (j == dst->num) {
|
||||
if (dst->num == 4)
|
||||
return false;
|
||||
dst->value[dst->num++] = src->value[i];
|
||||
}
|
||||
|
||||
swizzle[i] = j;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* make alu node src reflact the pipeline reg */
|
||||
static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipeline,
|
||||
ppir_dest *dest, uint8_t *swizzle)
|
||||
{
|
||||
for (int i = PPIR_INSTR_SLOT_ALU_START; i <= PPIR_INSTR_SLOT_ALU_END; i++) {
|
||||
if (!instr->slots[i])
|
||||
continue;
|
||||
|
||||
ppir_alu_node *alu = ppir_node_to_alu(instr->slots[i]);
|
||||
for (int j = 0; j < alu->num_src; j++) {
|
||||
ppir_src *src = alu->src + j;
|
||||
if (ppir_node_target_equal(src, dest)) {
|
||||
src->type = ppir_target_pipeline;
|
||||
src->pipeline = pipeline;
|
||||
|
||||
if (swizzle) {
|
||||
for (int k = 0; k < 4; k++)
|
||||
src->swizzle[k] = swizzle[src->swizzle[k]];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ppir_instr_insert_node(ppir_instr *instr, ppir_node *node)
|
||||
{
|
||||
if (node->op == ppir_op_const) {
|
||||
int i;
|
||||
ppir_const_node *c = ppir_node_to_const(node);
|
||||
const ppir_const *nc = &c->constant;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
ppir_const ic = instr->constant[i];
|
||||
uint8_t swizzle[4] = {0};
|
||||
|
||||
if (ppir_instr_insert_const(&ic, nc, swizzle)) {
|
||||
instr->constant[i] = ic;
|
||||
ppir_instr_update_src_pipeline(
|
||||
instr, ppir_pipeline_reg_const0 + i, &c->dest, swizzle);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* no const slot can insert */
|
||||
if (i == 2)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
int *slots = ppir_op_infos[node->op].slots;
|
||||
for (int i = 0; slots[i] != PPIR_INSTR_SLOT_END; i++) {
|
||||
int pos = slots[i];
|
||||
|
||||
if (instr->slots[pos]) {
|
||||
/* node already in this instr, i.e. load_uniform */
|
||||
if (instr->slots[pos] == node)
|
||||
return true;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pos == PPIR_INSTR_SLOT_ALU_SCL_MUL ||
|
||||
pos == PPIR_INSTR_SLOT_ALU_SCL_ADD) {
|
||||
ppir_dest *dest = ppir_node_get_dest(node);
|
||||
if (!ppir_target_is_scaler(dest))
|
||||
continue;
|
||||
}
|
||||
|
||||
instr->slots[pos] = node;
|
||||
node->instr = instr;
|
||||
node->instr_pos = pos;
|
||||
|
||||
if ((node->op == ppir_op_load_uniform) || (node->op == ppir_op_load_temp)) {
|
||||
ppir_load_node *l = ppir_node_to_load(node);
|
||||
ppir_instr_update_src_pipeline(
|
||||
instr, ppir_pipeline_reg_uniform, &l->dest, NULL);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static struct {
|
||||
int len;
|
||||
char *name;
|
||||
} ppir_instr_fields[] = {
|
||||
[PPIR_INSTR_SLOT_VARYING] = { 4, "vary" },
|
||||
[PPIR_INSTR_SLOT_TEXLD] = { 4, "texl"},
|
||||
[PPIR_INSTR_SLOT_UNIFORM] = { 4, "unif" },
|
||||
[PPIR_INSTR_SLOT_ALU_VEC_MUL] = { 4, "vmul" },
|
||||
[PPIR_INSTR_SLOT_ALU_SCL_MUL] = { 4, "smul" },
|
||||
[PPIR_INSTR_SLOT_ALU_VEC_ADD] = { 4, "vadd" },
|
||||
[PPIR_INSTR_SLOT_ALU_SCL_ADD] = { 4, "sadd" },
|
||||
[PPIR_INSTR_SLOT_ALU_COMBINE] = { 4, "comb" },
|
||||
[PPIR_INSTR_SLOT_STORE_TEMP] = { 4, "stor" },
|
||||
};
|
||||
|
||||
void ppir_instr_print_list(ppir_compiler *comp)
|
||||
{
|
||||
if (!(lima_debug & LIMA_DEBUG_PP))
|
||||
return;
|
||||
|
||||
printf("======ppir instr list======\n");
|
||||
printf(" ");
|
||||
for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++)
|
||||
printf("%-*s ", ppir_instr_fields[i].len, ppir_instr_fields[i].name);
|
||||
printf("const0|1\n");
|
||||
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
|
||||
printf("%c%03d: ", instr->is_end ? '*' : ' ', instr->index);
|
||||
for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
|
||||
ppir_node *node = instr->slots[i];
|
||||
if (node)
|
||||
printf("%-*d ", ppir_instr_fields[i].len, node->index);
|
||||
else
|
||||
printf("%-*s ", ppir_instr_fields[i].len, "null");
|
||||
}
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (i)
|
||||
printf("| ");
|
||||
|
||||
for (int j = 0; j < instr->constant[i].num; j++)
|
||||
printf("%f ", instr->constant[i].value[j].f);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("------------------------\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void ppir_instr_print_sub(ppir_instr *instr)
|
||||
{
|
||||
printf("[%s%d",
|
||||
instr->printed && !ppir_instr_is_leaf(instr) ? "+" : "",
|
||||
instr->index);
|
||||
|
||||
if (!instr->printed) {
|
||||
ppir_instr_foreach_pred(instr, dep) {
|
||||
ppir_instr_print_sub(dep->pred);
|
||||
}
|
||||
|
||||
instr->printed = true;
|
||||
}
|
||||
|
||||
printf("]");
|
||||
}
|
||||
|
||||
void ppir_instr_print_dep(ppir_compiler *comp)
|
||||
{
|
||||
if (!(lima_debug & LIMA_DEBUG_PP))
|
||||
return;
|
||||
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
|
||||
instr->printed = false;
|
||||
}
|
||||
}
|
||||
|
||||
printf("======ppir instr depend======\n");
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
|
||||
if (ppir_instr_is_root(instr)) {
|
||||
ppir_instr_print_sub(instr);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
printf("------------------------\n");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,421 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/bitscan.h"
|
||||
#include "util/ralloc.h"
|
||||
|
||||
#include "ppir.h"
|
||||
|
||||
static bool ppir_lower_const(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
if (ppir_node_is_root(node)) {
|
||||
ppir_node_delete(node);
|
||||
return true;
|
||||
}
|
||||
|
||||
ppir_node *move = NULL;
|
||||
ppir_dest *dest = ppir_node_get_dest(node);
|
||||
|
||||
/* const (register) can only be used in alu node, create a move
|
||||
* node for other types of node */
|
||||
ppir_node_foreach_succ_safe(node, dep) {
|
||||
ppir_node *succ = dep->succ;
|
||||
|
||||
if (succ->type != ppir_node_type_alu) {
|
||||
if (!move) {
|
||||
move = ppir_node_create(block, ppir_op_mov, -1, 0);
|
||||
if (unlikely(!move))
|
||||
return false;
|
||||
|
||||
ppir_debug("lower const create move %d for %d\n",
|
||||
move->index, node->index);
|
||||
|
||||
ppir_alu_node *alu = ppir_node_to_alu(move);
|
||||
alu->dest = *dest;
|
||||
alu->num_src = 1;
|
||||
ppir_node_target_assign(alu->src, dest);
|
||||
for (int i = 0; i < 4; i++)
|
||||
alu->src->swizzle[i] = i;
|
||||
}
|
||||
|
||||
ppir_node_replace_pred(dep, move);
|
||||
ppir_node_replace_child(succ, node, move);
|
||||
}
|
||||
}
|
||||
|
||||
if (move) {
|
||||
ppir_node_add_dep(move, node);
|
||||
list_addtail(&move->list, &node->list);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* lower dot to mul+sum */
|
||||
static bool ppir_lower_dot(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
ppir_alu_node *mul = ppir_node_create(block, ppir_op_mul, -1, 0);
|
||||
if (!mul)
|
||||
return false;
|
||||
list_addtail(&mul->node.list, &node->list);
|
||||
|
||||
ppir_alu_node *dot = ppir_node_to_alu(node);
|
||||
mul->src[0] = dot->src[0];
|
||||
mul->src[1] = dot->src[1];
|
||||
mul->num_src = 2;
|
||||
|
||||
int num_components = node->op - ppir_op_dot2 + 2;
|
||||
ppir_dest *dest = &mul->dest;
|
||||
dest->type = ppir_target_ssa;
|
||||
dest->ssa.num_components = num_components;
|
||||
dest->ssa.live_in = INT_MAX;
|
||||
dest->ssa.live_out = 0;
|
||||
dest->write_mask = u_bit_consecutive(0, num_components);
|
||||
|
||||
ppir_node_foreach_pred_safe(node, dep) {
|
||||
ppir_node_remove_dep(dep);
|
||||
ppir_node_add_dep(&mul->node, dep->pred);
|
||||
}
|
||||
ppir_node_add_dep(node, &mul->node);
|
||||
|
||||
if (node->op == ppir_op_dot2) {
|
||||
node->op = ppir_op_add;
|
||||
|
||||
ppir_node_target_assign(dot->src, dest);
|
||||
dot->src[0].swizzle[0] = 0;
|
||||
dot->src[0].absolute = false;
|
||||
dot->src[0].negate = false;
|
||||
|
||||
ppir_node_target_assign(dot->src + 1, dest);
|
||||
dot->src[1].swizzle[0] = 1;
|
||||
dot->src[1].absolute = false;
|
||||
dot->src[1].negate = false;
|
||||
}
|
||||
else {
|
||||
node->op = node->op == ppir_op_dot3 ? ppir_op_sum3 : ppir_op_sum4;
|
||||
|
||||
ppir_node_target_assign(dot->src, dest);
|
||||
for (int i = 0; i < 4; i++)
|
||||
dot->src[0].swizzle[i] = i;
|
||||
dot->src[0].absolute = false;
|
||||
dot->src[0].negate = false;
|
||||
|
||||
dot->num_src = 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static ppir_reg *create_reg(ppir_compiler *comp, int num_components)
|
||||
{
|
||||
ppir_reg *r = rzalloc(comp, ppir_reg);
|
||||
if (!r)
|
||||
return NULL;
|
||||
|
||||
r->num_components = num_components;
|
||||
r->live_in = INT_MAX;
|
||||
r->live_out = 0;
|
||||
r->is_head = false;
|
||||
list_addtail(&r->list, &comp->reg_list);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/* lower vector alu node to multi scalar nodes */
|
||||
static bool ppir_lower_vec_to_scalar(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
ppir_dest *dest = &alu->dest;
|
||||
|
||||
int n = 0;
|
||||
int index[4];
|
||||
|
||||
unsigned mask = dest->write_mask;
|
||||
while (mask)
|
||||
index[n++] = u_bit_scan(&mask);
|
||||
|
||||
if (n == 1)
|
||||
return true;
|
||||
|
||||
ppir_reg *r;
|
||||
/* we need a reg for scalar nodes to store output */
|
||||
if (dest->type == ppir_target_register)
|
||||
r = dest->reg;
|
||||
else {
|
||||
r = create_reg(block->comp, n);
|
||||
if (!r)
|
||||
return false;
|
||||
|
||||
/* change all successors to use reg r */
|
||||
ppir_node_foreach_succ(node, dep) {
|
||||
ppir_node *succ = dep->succ;
|
||||
if (succ->type == ppir_node_type_alu) {
|
||||
ppir_alu_node *sa = ppir_node_to_alu(succ);
|
||||
for (int i = 0; i < sa->num_src; i++) {
|
||||
ppir_src *src = sa->src + i;
|
||||
if (ppir_node_target_equal(src, dest)) {
|
||||
src->type = ppir_target_register;
|
||||
src->reg = r;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
assert(succ->type == ppir_node_type_store);
|
||||
ppir_store_node *ss = ppir_node_to_store(succ);
|
||||
ppir_src *src = &ss->src;
|
||||
src->type = ppir_target_register;
|
||||
src->reg = r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* create each component's scalar node */
|
||||
for (int i = 0; i < n; i++) {
|
||||
ppir_node *s = ppir_node_create(block, node->op, -1, 0);
|
||||
if (!s)
|
||||
return false;
|
||||
list_addtail(&s->list, &node->list);
|
||||
|
||||
ppir_alu_node *sa = ppir_node_to_alu(s);
|
||||
ppir_dest *sd = &sa->dest;
|
||||
sd->type = ppir_target_register;
|
||||
sd->reg = r;
|
||||
sd->modifier = dest->modifier;
|
||||
sd->write_mask = 1 << index[i];
|
||||
|
||||
for (int j = 0; j < alu->num_src; j++)
|
||||
sa->src[j] = alu->src[j];
|
||||
sa->num_src = alu->num_src;
|
||||
|
||||
/* TODO: need per reg component dependancy */
|
||||
ppir_node_foreach_succ(node, dep) {
|
||||
ppir_node_add_dep(dep->succ, s);
|
||||
}
|
||||
|
||||
ppir_node_foreach_pred(node, dep) {
|
||||
ppir_node_add_dep(s, dep->pred);
|
||||
}
|
||||
}
|
||||
|
||||
ppir_node_delete(node);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
/* swapped op must be the next op */
|
||||
node->op++;
|
||||
|
||||
assert(node->type == ppir_node_type_alu);
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
assert(alu->num_src == 2);
|
||||
|
||||
ppir_src tmp = alu->src[0];
|
||||
alu->src[0] = alu->src[1];
|
||||
alu->src[1] = tmp;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ppir_lower_texture(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node);
|
||||
|
||||
if (ppir_node_has_single_pred(node)) {
|
||||
ppir_node *pred = ppir_node_first_pred(node);
|
||||
if (pred->op == ppir_op_load_varying) {
|
||||
/* If ldtex is the only successor of load_varying node
|
||||
* we're good. Just change load_varying op type to load_coords.
|
||||
*/
|
||||
if (ppir_node_has_single_succ(pred)) {
|
||||
pred->op = ppir_op_load_coords;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise we need to create load_coords node */
|
||||
ppir_load_node *load = ppir_node_create(block, ppir_op_load_coords, -1, 0);
|
||||
if (!load)
|
||||
return false;
|
||||
list_addtail(&load->node.list, &node->list);
|
||||
|
||||
ppir_debug("%s create load_coords node %d for %d\n",
|
||||
__FUNCTION__, load->node.index, node->index);
|
||||
|
||||
ppir_dest *dest = &load->dest;
|
||||
dest->type = ppir_target_ssa;
|
||||
dest->ssa.num_components = load_tex->src_coords.ssa->num_components;
|
||||
dest->ssa.live_in = INT_MAX;
|
||||
dest->ssa.live_out = 0;
|
||||
dest->write_mask = u_bit_consecutive(0, dest->ssa.num_components);
|
||||
|
||||
load->src = load_tex->src_coords;
|
||||
|
||||
ppir_src *src = &load_tex->src_coords;
|
||||
src->type = ppir_target_ssa;
|
||||
src->ssa = &dest->ssa;
|
||||
|
||||
ppir_node_foreach_pred_safe(node, dep) {
|
||||
ppir_node *pred = dep->pred;
|
||||
ppir_node_remove_dep(dep);
|
||||
ppir_node_add_dep(&load->node, pred);
|
||||
}
|
||||
|
||||
ppir_node_add_dep(node, &load->node);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Prepare for sin and cos and then lower vector alu node to multi
|
||||
* scalar nodes */
|
||||
static bool ppir_lower_sin_cos_vec_to_scalar(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
|
||||
ppir_node *inv_2pi_node = ppir_node_create(block, ppir_op_const, -1, 0);
|
||||
if (!inv_2pi_node)
|
||||
return false;
|
||||
list_addtail(&inv_2pi_node->list, &node->list);
|
||||
|
||||
/* For sin and cos, the input has to multiplied by the constant
|
||||
* 1/(2*pi), presumably to simplify the hardware. */
|
||||
ppir_const_node *inv_2pi_const = ppir_node_to_const(inv_2pi_node);
|
||||
inv_2pi_const->constant.num = 1;
|
||||
inv_2pi_const->constant.value[0].f = (1.0f/(2.0f * M_PI));
|
||||
|
||||
inv_2pi_const->dest.type = ppir_target_ssa;
|
||||
inv_2pi_const->dest.ssa.num_components = 1;
|
||||
inv_2pi_const->dest.ssa.live_in = INT_MAX;
|
||||
inv_2pi_const->dest.ssa.live_out = 0;
|
||||
inv_2pi_const->dest.write_mask = 0x01;
|
||||
|
||||
ppir_node *mul_node = ppir_node_create(block, ppir_op_mul, -1, 0);
|
||||
if (!mul_node)
|
||||
return false;
|
||||
list_addtail(&mul_node->list, &node->list);
|
||||
|
||||
ppir_alu_node *mul_alu = ppir_node_to_alu(mul_node);
|
||||
mul_alu->num_src = 2;
|
||||
mul_alu->src[0] = alu->src[0];
|
||||
mul_alu->src[1].type = ppir_target_ssa;
|
||||
mul_alu->src[1].ssa = &inv_2pi_const->dest.ssa;
|
||||
|
||||
int num_components = alu->src[0].ssa->num_components;
|
||||
mul_alu->dest.type = ppir_target_ssa;
|
||||
mul_alu->dest.ssa.num_components = num_components;
|
||||
mul_alu->dest.ssa.live_in = INT_MAX;
|
||||
mul_alu->dest.ssa.live_out = 0;
|
||||
mul_alu->dest.write_mask = u_bit_consecutive(0, num_components);
|
||||
|
||||
alu->src[0].type = ppir_target_ssa;
|
||||
alu->src[0].ssa = &mul_alu->dest.ssa;
|
||||
for (int i = 0; i < 4; i++)
|
||||
alu->src->swizzle[i] = i;
|
||||
|
||||
ppir_node_foreach_pred_safe(node, dep) {
|
||||
ppir_node *pred = dep->pred;
|
||||
ppir_node_remove_dep(dep);
|
||||
ppir_node_add_dep(mul_node, pred);
|
||||
}
|
||||
ppir_node_add_dep(node, mul_node);
|
||||
ppir_node_add_dep(mul_node, inv_2pi_node);
|
||||
|
||||
return ppir_lower_vec_to_scalar(block, node);
|
||||
}
|
||||
|
||||
/* insert a move as the select condition to make sure it can
|
||||
* be inserted to select instr float mul slot
|
||||
*/
|
||||
static bool ppir_lower_select(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
|
||||
ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0);
|
||||
if (!move)
|
||||
return false;
|
||||
list_addtail(&move->list, &node->list);
|
||||
|
||||
ppir_alu_node *move_alu = ppir_node_to_alu(move);
|
||||
ppir_src *move_src = move_alu->src, *src = alu->src;
|
||||
move_src->type = src->type;
|
||||
move_src->ssa = src->ssa;
|
||||
move_src->swizzle[0] = src->swizzle[0];
|
||||
move_alu->num_src = 1;
|
||||
|
||||
ppir_dest *move_dest = &move_alu->dest;
|
||||
move_dest->type = ppir_target_ssa;
|
||||
move_dest->ssa.num_components = 1;
|
||||
move_dest->ssa.live_in = INT_MAX;
|
||||
move_dest->ssa.live_out = 0;
|
||||
move_dest->write_mask = 1;
|
||||
|
||||
ppir_node_foreach_pred(node, dep) {
|
||||
ppir_node *pred = dep->pred;
|
||||
ppir_dest *dest = ppir_node_get_dest(pred);
|
||||
if (ppir_node_target_equal(alu->src, dest)) {
|
||||
ppir_node_replace_pred(dep, move);
|
||||
ppir_node_add_dep(move, pred);
|
||||
}
|
||||
}
|
||||
|
||||
/* move must be the first pred of select node which make sure
|
||||
* the float mul slot is free when node to instr
|
||||
*/
|
||||
assert(ppir_node_first_pred(node) == move);
|
||||
|
||||
src->swizzle[0] = 0;
|
||||
ppir_node_target_assign(alu->src, move_dest);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
|
||||
[ppir_op_const] = ppir_lower_const,
|
||||
[ppir_op_dot2] = ppir_lower_dot,
|
||||
[ppir_op_dot3] = ppir_lower_dot,
|
||||
[ppir_op_dot4] = ppir_lower_dot,
|
||||
[ppir_op_rcp] = ppir_lower_vec_to_scalar,
|
||||
[ppir_op_rsqrt] = ppir_lower_vec_to_scalar,
|
||||
[ppir_op_log2] = ppir_lower_vec_to_scalar,
|
||||
[ppir_op_exp2] = ppir_lower_vec_to_scalar,
|
||||
[ppir_op_sqrt] = ppir_lower_vec_to_scalar,
|
||||
[ppir_op_sin] = ppir_lower_sin_cos_vec_to_scalar,
|
||||
[ppir_op_cos] = ppir_lower_sin_cos_vec_to_scalar,
|
||||
[ppir_op_lt] = ppir_lower_swap_args,
|
||||
[ppir_op_le] = ppir_lower_swap_args,
|
||||
[ppir_op_load_texture] = ppir_lower_texture,
|
||||
[ppir_op_select] = ppir_lower_select,
|
||||
};
|
||||
|
||||
bool ppir_lower_prog(ppir_compiler *comp)
|
||||
{
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {
|
||||
if (ppir_lower_funcs[node->op] &&
|
||||
!ppir_lower_funcs[node->op](block, node))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
ppir_node_print_prog(comp);
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,494 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
|
||||
#include "ppir.h"
|
||||
|
||||
static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
|
||||
{
|
||||
ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
ppir_dest *dest = ppir_node_get_dest(node);
|
||||
dest->type = ppir_target_ssa;
|
||||
dest->ssa.num_components = ssa->num_components;
|
||||
dest->ssa.live_in = INT_MAX;
|
||||
dest->ssa.live_out = 0;
|
||||
dest->write_mask = u_bit_consecutive(0, ssa->num_components);
|
||||
|
||||
if (node->type == ppir_node_type_load ||
|
||||
node->type == ppir_node_type_store)
|
||||
dest->ssa.is_head = true;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
|
||||
nir_reg_dest *reg, unsigned mask)
|
||||
{
|
||||
ppir_node *node = ppir_node_create(block, op, reg->reg->index, mask);
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
ppir_dest *dest = ppir_node_get_dest(node);
|
||||
|
||||
list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
|
||||
if (r->index == reg->reg->index) {
|
||||
dest->reg = r;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
dest->type = ppir_target_register;
|
||||
dest->write_mask = mask;
|
||||
|
||||
if (node->type == ppir_node_type_load ||
|
||||
node->type == ppir_node_type_store)
|
||||
dest->reg->is_head = true;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
|
||||
nir_dest *dest, unsigned mask)
|
||||
{
|
||||
unsigned index = -1;
|
||||
|
||||
if (dest) {
|
||||
if (dest->is_ssa)
|
||||
return ppir_node_create_ssa(block, op, &dest->ssa);
|
||||
else
|
||||
return ppir_node_create_reg(block, op, &dest->reg, mask);
|
||||
}
|
||||
|
||||
return ppir_node_create(block, op, index, 0);
|
||||
}
|
||||
|
||||
static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
|
||||
ppir_src *ps, nir_src *ns, unsigned mask)
|
||||
{
|
||||
ppir_node *child = NULL;
|
||||
|
||||
if (ns->is_ssa) {
|
||||
child = comp->var_nodes[ns->ssa->index];
|
||||
ppir_node_add_dep(node, child);
|
||||
}
|
||||
else {
|
||||
nir_register *reg = ns->reg.reg;
|
||||
while (mask) {
|
||||
int swizzle = ps->swizzle[u_bit_scan(&mask)];
|
||||
child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
|
||||
ppir_node_add_dep(node, child);
|
||||
}
|
||||
}
|
||||
|
||||
ppir_dest *dest = ppir_node_get_dest(child);
|
||||
ppir_node_target_assign(ps, dest);
|
||||
}
|
||||
|
||||
static int nir_to_ppir_opcodes[nir_num_opcodes] = {
|
||||
/* not supported */
|
||||
[0 ... nir_last_opcode] = -1,
|
||||
|
||||
[nir_op_fmov] = ppir_op_mov,
|
||||
[nir_op_imov] = ppir_op_mov,
|
||||
[nir_op_fmul] = ppir_op_mul,
|
||||
[nir_op_fadd] = ppir_op_add,
|
||||
[nir_op_fdot2] = ppir_op_dot2,
|
||||
[nir_op_fdot3] = ppir_op_dot3,
|
||||
[nir_op_fdot4] = ppir_op_dot4,
|
||||
[nir_op_frsq] = ppir_op_rsqrt,
|
||||
[nir_op_flog2] = ppir_op_log2,
|
||||
[nir_op_fexp2] = ppir_op_exp2,
|
||||
[nir_op_fsqrt] = ppir_op_sqrt,
|
||||
[nir_op_fsin] = ppir_op_sin,
|
||||
[nir_op_fcos] = ppir_op_cos,
|
||||
[nir_op_fmax] = ppir_op_max,
|
||||
[nir_op_fmin] = ppir_op_min,
|
||||
[nir_op_frcp] = ppir_op_rcp,
|
||||
[nir_op_ffloor] = ppir_op_floor,
|
||||
[nir_op_ffract] = ppir_op_fract,
|
||||
[nir_op_fand] = ppir_op_and,
|
||||
[nir_op_for] = ppir_op_or,
|
||||
[nir_op_fxor] = ppir_op_xor,
|
||||
[nir_op_sge] = ppir_op_ge,
|
||||
[nir_op_fge] = ppir_op_ge,
|
||||
[nir_op_slt] = ppir_op_lt,
|
||||
[nir_op_flt] = ppir_op_lt,
|
||||
[nir_op_seq] = ppir_op_eq,
|
||||
[nir_op_feq] = ppir_op_eq,
|
||||
[nir_op_sne] = ppir_op_ne,
|
||||
[nir_op_fne] = ppir_op_ne,
|
||||
[nir_op_fnot] = ppir_op_not,
|
||||
[nir_op_bcsel] = ppir_op_select,
|
||||
[nir_op_inot] = ppir_op_not,
|
||||
[nir_op_b2f32] = ppir_op_mov,
|
||||
};
|
||||
|
||||
static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
|
||||
{
|
||||
nir_alu_instr *instr = nir_instr_as_alu(ni);
|
||||
int op = nir_to_ppir_opcodes[instr->op];
|
||||
|
||||
if (op < 0) {
|
||||
ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
|
||||
instr->dest.write_mask);
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
ppir_dest *pd = &node->dest;
|
||||
nir_alu_dest *nd = &instr->dest;
|
||||
if (nd->saturate)
|
||||
pd->modifier = ppir_outmod_clamp_fraction;
|
||||
|
||||
unsigned src_mask;
|
||||
switch (op) {
|
||||
case ppir_op_dot2:
|
||||
src_mask = 0b0011;
|
||||
break;
|
||||
case ppir_op_dot3:
|
||||
src_mask = 0b0111;
|
||||
break;
|
||||
case ppir_op_dot4:
|
||||
src_mask = 0b1111;
|
||||
break;
|
||||
default:
|
||||
src_mask = pd->write_mask;
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned num_child = nir_op_infos[instr->op].num_inputs;
|
||||
node->num_src = num_child;
|
||||
|
||||
for (int i = 0; i < num_child; i++) {
|
||||
nir_alu_src *ns = instr->src + i;
|
||||
ppir_src *ps = node->src + i;
|
||||
memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
|
||||
ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
|
||||
|
||||
ps->absolute = ns->abs;
|
||||
ps->negate = ns->negate;
|
||||
}
|
||||
|
||||
return &node->node;
|
||||
}
|
||||
|
||||
static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
|
||||
{
|
||||
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
|
||||
unsigned mask = 0;
|
||||
ppir_load_node *lnode;
|
||||
ppir_store_node *snode;
|
||||
nir_const_value *const_offset;
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_input:
|
||||
if (!instr->dest.is_ssa)
|
||||
mask = u_bit_consecutive(0, instr->num_components);
|
||||
|
||||
lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
|
||||
if (!lnode)
|
||||
return NULL;
|
||||
|
||||
lnode->num_components = instr->num_components;
|
||||
lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
|
||||
return &lnode->node;
|
||||
|
||||
case nir_intrinsic_load_uniform:
|
||||
if (!instr->dest.is_ssa)
|
||||
mask = u_bit_consecutive(0, instr->num_components);
|
||||
|
||||
lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
|
||||
if (!lnode)
|
||||
return NULL;
|
||||
|
||||
lnode->num_components = instr->num_components;
|
||||
lnode->index = nir_intrinsic_base(instr);
|
||||
|
||||
const_offset = nir_src_as_const_value(instr->src[0]);
|
||||
assert(const_offset);
|
||||
lnode->index += (uint32_t)const_offset->f32[0];
|
||||
|
||||
return &lnode->node;
|
||||
|
||||
case nir_intrinsic_store_output:
|
||||
snode = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
|
||||
if (!snode)
|
||||
return NULL;
|
||||
|
||||
snode->index = nir_intrinsic_base(instr);
|
||||
|
||||
for (int i = 0; i < instr->num_components; i++)
|
||||
snode->src.swizzle[i] = i;
|
||||
|
||||
ppir_node_add_src(block->comp, &snode->node, &snode->src, instr->src,
|
||||
u_bit_consecutive(0, instr->num_components));
|
||||
|
||||
return &snode->node;
|
||||
|
||||
default:
|
||||
ppir_error("unsupported nir_intrinsic_instr %d\n", instr->intrinsic);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
|
||||
{
|
||||
nir_load_const_instr *instr = nir_instr_as_load_const(ni);
|
||||
ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
assert(instr->def.bit_size == 32);
|
||||
|
||||
for (int i = 0; i < instr->def.num_components; i++)
|
||||
node->constant.value[i].i = instr->value.i32[i];
|
||||
node->constant.num = instr->def.num_components;
|
||||
|
||||
return &node->node;
|
||||
}
|
||||
|
||||
static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
|
||||
{
|
||||
ppir_error("nir_ssa_undef_instr not support\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
|
||||
{
|
||||
nir_tex_instr *instr = nir_instr_as_tex(ni);
|
||||
ppir_load_texture_node *node;
|
||||
|
||||
if (instr->op != nir_texop_tex) {
|
||||
ppir_error("unsupported texop %d\n", instr->op);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, 0);
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
node->sampler = instr->texture_index;
|
||||
|
||||
switch (instr->sampler_dim) {
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_RECT:
|
||||
case GLSL_SAMPLER_DIM_EXTERNAL:
|
||||
break;
|
||||
default:
|
||||
ppir_debug("unsupported sampler dim: %d\n", instr->sampler_dim);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
node->sampler_dim = instr->sampler_dim;
|
||||
|
||||
for (int i = 0; i < instr->coord_components; i++)
|
||||
node->src_coords.swizzle[i] = i;
|
||||
|
||||
assert(instr->num_srcs == 1);
|
||||
for (int i = 0; i < instr->num_srcs; i++) {
|
||||
switch (instr->src[i].src_type) {
|
||||
case nir_tex_src_coord:
|
||||
ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src,
|
||||
u_bit_consecutive(0, instr->coord_components));
|
||||
break;
|
||||
default:
|
||||
ppir_debug("unknown texture source");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return &node->node;
|
||||
}
|
||||
|
||||
static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
|
||||
{
|
||||
ppir_error("nir_jump_instr not support\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
|
||||
[nir_instr_type_alu] = ppir_emit_alu,
|
||||
[nir_instr_type_intrinsic] = ppir_emit_intrinsic,
|
||||
[nir_instr_type_load_const] = ppir_emit_load_const,
|
||||
[nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
|
||||
[nir_instr_type_tex] = ppir_emit_tex,
|
||||
[nir_instr_type_jump] = ppir_emit_jump,
|
||||
};
|
||||
|
||||
static ppir_block *ppir_block_create(ppir_compiler *comp)
|
||||
{
|
||||
ppir_block *block = rzalloc(comp, ppir_block);
|
||||
if (!block)
|
||||
return NULL;
|
||||
|
||||
list_inithead(&block->node_list);
|
||||
list_inithead(&block->instr_list);
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
|
||||
{
|
||||
ppir_block *block = ppir_block_create(comp);
|
||||
if (!block)
|
||||
return false;
|
||||
|
||||
list_addtail(&block->list, &comp->block_list);
|
||||
block->comp = comp;
|
||||
|
||||
nir_foreach_instr(instr, nblock) {
|
||||
assert(instr->type < nir_instr_type_phi);
|
||||
ppir_node *node = ppir_emit_instr[instr->type](block, instr);
|
||||
if (node)
|
||||
list_addtail(&node->list, &block->node_list);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ppir_emit_if(ppir_compiler *comp, nir_if *nif)
|
||||
{
|
||||
ppir_error("if nir_cf_node not support\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
|
||||
{
|
||||
ppir_error("loop nir_cf_node not support\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
|
||||
{
|
||||
ppir_error("function nir_cf_node not support\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
|
||||
{
|
||||
foreach_list_typed(nir_cf_node, node, node, list) {
|
||||
bool ret;
|
||||
|
||||
switch (node->type) {
|
||||
case nir_cf_node_block:
|
||||
ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
|
||||
break;
|
||||
case nir_cf_node_if:
|
||||
ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
|
||||
break;
|
||||
case nir_cf_node_loop:
|
||||
ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
|
||||
break;
|
||||
case nir_cf_node_function:
|
||||
ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
|
||||
break;
|
||||
default:
|
||||
ppir_error("unknown NIR node type %d\n", node->type);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
|
||||
{
|
||||
ppir_compiler *comp = rzalloc_size(
|
||||
prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
|
||||
if (!comp)
|
||||
return NULL;
|
||||
|
||||
list_inithead(&comp->block_list);
|
||||
list_inithead(&comp->reg_list);
|
||||
|
||||
comp->var_nodes = (ppir_node **)(comp + 1);
|
||||
comp->reg_base = num_ssa;
|
||||
comp->prog = prog;
|
||||
return comp;
|
||||
}
|
||||
|
||||
bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
|
||||
struct ra_regs *ra)
|
||||
{
|
||||
nir_function_impl *func = nir_shader_get_entrypoint(nir);
|
||||
ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
|
||||
if (!comp)
|
||||
return false;
|
||||
|
||||
comp->ra = ra;
|
||||
|
||||
foreach_list_typed(nir_register, reg, node, &func->registers) {
|
||||
ppir_reg *r = rzalloc(comp, ppir_reg);
|
||||
if (!r)
|
||||
return false;
|
||||
|
||||
r->index = reg->index;
|
||||
r->num_components = reg->num_components;
|
||||
r->live_in = INT_MAX;
|
||||
r->live_out = 0;
|
||||
r->is_head = false;
|
||||
list_addtail(&r->list, &comp->reg_list);
|
||||
}
|
||||
|
||||
if (!ppir_emit_cf_list(comp, &func->body))
|
||||
goto err_out0;
|
||||
ppir_node_print_prog(comp);
|
||||
|
||||
if (!ppir_lower_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!ppir_node_to_instr(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!ppir_schedule_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!ppir_regalloc_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!ppir_codegen_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
ralloc_free(comp);
|
||||
return true;
|
||||
|
||||
err_out0:
|
||||
ralloc_free(comp);
|
||||
return false;
|
||||
}
|
||||
|
|
@ -0,0 +1,426 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/u_math.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/bitscan.h"
|
||||
|
||||
#include "ppir.h"
|
||||
|
||||
const ppir_op_info ppir_op_infos[] = {
|
||||
[ppir_op_mov] = {
|
||||
.name = "mov",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
|
||||
PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_mul] = {
|
||||
.name = "mul",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_add] = {
|
||||
.name = "add",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_dot2] = {
|
||||
.name = "dot2",
|
||||
},
|
||||
[ppir_op_dot3] = {
|
||||
.name = "dot3",
|
||||
},
|
||||
[ppir_op_dot4] = {
|
||||
.name = "dot4",
|
||||
},
|
||||
[ppir_op_sum3] = {
|
||||
.name = "sum3",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_sum4] = {
|
||||
.name = "sum4",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_rsqrt] = {
|
||||
.name = "rsqrt",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_log2] = {
|
||||
.name = "log2",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_exp2] = {
|
||||
.name = "exp2",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_sqrt] = {
|
||||
.name = "sqrt",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_sin] = {
|
||||
.name = "sin",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_cos] = {
|
||||
.name = "cos",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_max] = {
|
||||
.name = "max",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
|
||||
PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_min] = {
|
||||
.name = "min",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
|
||||
PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_floor] = {
|
||||
.name = "floor",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_fract] = {
|
||||
.name = "fract",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_and] = {
|
||||
.name = "and",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_or] = {
|
||||
.name = "or",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_xor] = {
|
||||
.name = "xor",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_not] = {
|
||||
.name = "not",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_lt] = {
|
||||
.name = "lt",
|
||||
},
|
||||
[ppir_op_le] = {
|
||||
.name = "le",
|
||||
},
|
||||
[ppir_op_gt] = {
|
||||
.name = "gt",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
|
||||
PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_ge] = {
|
||||
.name = "ge",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
|
||||
PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_eq] = {
|
||||
.name = "eq",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
|
||||
PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_ne] = {
|
||||
.name = "ne",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
|
||||
PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_select] = {
|
||||
.name = "select",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_rcp] = {
|
||||
.name = "rcp",
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_load_varying] = {
|
||||
.name = "ld_var",
|
||||
.type = ppir_node_type_load,
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_load_coords] = {
|
||||
.name = "ld_coords",
|
||||
.type = ppir_node_type_load,
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_load_uniform] = {
|
||||
.name = "ld_uni",
|
||||
.type = ppir_node_type_load,
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_load_texture] = {
|
||||
.name = "ld_tex",
|
||||
.type = ppir_node_type_load_texture,
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_TEXLD, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_load_temp] = {
|
||||
.name = "ld_temp",
|
||||
.type = ppir_node_type_load,
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_const] = {
|
||||
.name = "const",
|
||||
.type = ppir_node_type_const,
|
||||
},
|
||||
[ppir_op_store_color] = {
|
||||
.name = "st_col",
|
||||
.type = ppir_node_type_store,
|
||||
},
|
||||
[ppir_op_store_temp] = {
|
||||
.name = "st_temp",
|
||||
.type = ppir_node_type_store,
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
|
||||
{
|
||||
ppir_compiler *comp = block->comp;
|
||||
static const int node_size[] = {
|
||||
[ppir_node_type_alu] = sizeof(ppir_alu_node),
|
||||
[ppir_node_type_const] = sizeof(ppir_const_node),
|
||||
[ppir_node_type_load] = sizeof(ppir_load_node),
|
||||
[ppir_node_type_store] = sizeof(ppir_store_node),
|
||||
[ppir_node_type_load_texture] = sizeof(ppir_load_texture_node),
|
||||
};
|
||||
|
||||
ppir_node_type type = ppir_op_infos[op].type;
|
||||
int size = node_size[type];
|
||||
ppir_node *node = rzalloc_size(block, size);
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
list_inithead(&node->succ_list);
|
||||
list_inithead(&node->pred_list);
|
||||
|
||||
if (index >= 0) {
|
||||
if (mask) {
|
||||
/* reg has 4 slots for each componemt write node */
|
||||
while (mask)
|
||||
comp->var_nodes[(index << 2) + comp->reg_base + u_bit_scan(&mask)] = node;
|
||||
snprintf(node->name, sizeof(node->name), "reg%d", index);
|
||||
} else {
|
||||
comp->var_nodes[index] = node;
|
||||
snprintf(node->name, sizeof(node->name), "ssa%d", index);
|
||||
}
|
||||
}
|
||||
else
|
||||
snprintf(node->name, sizeof(node->name), "new");
|
||||
|
||||
node->op = op;
|
||||
node->type = type;
|
||||
node->index = comp->cur_index++;
|
||||
node->block = block;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
void ppir_node_add_dep(ppir_node *succ, ppir_node *pred)
|
||||
{
|
||||
/* don't add dep for two nodes from different block */
|
||||
if (succ->block != pred->block)
|
||||
return;
|
||||
|
||||
/* don't add duplicated dep */
|
||||
ppir_node_foreach_pred(succ, dep) {
|
||||
if (dep->pred == pred)
|
||||
return;
|
||||
}
|
||||
|
||||
ppir_dep *dep = ralloc(succ, ppir_dep);
|
||||
dep->pred = pred;
|
||||
dep->succ = succ;
|
||||
list_addtail(&dep->pred_link, &succ->pred_list);
|
||||
list_addtail(&dep->succ_link, &pred->succ_list);
|
||||
}
|
||||
|
||||
void ppir_node_remove_dep(ppir_dep *dep)
|
||||
{
|
||||
list_del(&dep->succ_link);
|
||||
list_del(&dep->pred_link);
|
||||
ralloc_free(dep);
|
||||
}
|
||||
|
||||
static void _ppir_node_replace_child(ppir_src *src, ppir_node *old_child, ppir_node *new_child)
|
||||
{
|
||||
ppir_dest *od = ppir_node_get_dest(old_child);
|
||||
if (ppir_node_target_equal(src, od)) {
|
||||
ppir_dest *nd = ppir_node_get_dest(new_child);
|
||||
ppir_node_target_assign(src, nd);
|
||||
}
|
||||
}
|
||||
|
||||
void ppir_node_replace_child(ppir_node *parent, ppir_node *old_child, ppir_node *new_child)
|
||||
{
|
||||
if (parent->type == ppir_node_type_alu) {
|
||||
ppir_alu_node *alu = ppir_node_to_alu(parent);
|
||||
for (int i = 0; i < alu->num_src; i++)
|
||||
_ppir_node_replace_child(alu->src + i, old_child, new_child);
|
||||
}
|
||||
else if (parent->type == ppir_node_type_store) {
|
||||
ppir_store_node *store = ppir_node_to_store(parent);
|
||||
_ppir_node_replace_child(&store->src, old_child, new_child);
|
||||
}
|
||||
}
|
||||
|
||||
void ppir_node_replace_pred(ppir_dep *dep, ppir_node *new_pred)
|
||||
{
|
||||
list_del(&dep->succ_link);
|
||||
dep->pred = new_pred;
|
||||
list_addtail(&dep->succ_link, &new_pred->succ_list);
|
||||
}
|
||||
|
||||
void ppir_node_replace_all_succ(ppir_node *dst, ppir_node *src)
|
||||
{
|
||||
ppir_node_foreach_succ_safe(src, dep) {
|
||||
ppir_node_replace_pred(dep, dst);
|
||||
ppir_node_replace_child(dep->succ, src, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void ppir_node_delete(ppir_node *node)
|
||||
{
|
||||
ppir_node_foreach_succ_safe(node, dep)
|
||||
ppir_node_remove_dep(dep);
|
||||
|
||||
ppir_node_foreach_pred_safe(node, dep)
|
||||
ppir_node_remove_dep(dep);
|
||||
|
||||
list_del(&node->list);
|
||||
ralloc_free(node);
|
||||
}
|
||||
|
||||
static void ppir_node_print_node(ppir_node *node, int space)
|
||||
{
|
||||
for (int i = 0; i < space; i++)
|
||||
printf(" ");
|
||||
printf("%s%s %d %s\n", node->printed && !ppir_node_is_leaf(node) ? "+" : "",
|
||||
ppir_op_infos[node->op].name, node->index, node->name);
|
||||
|
||||
if (!node->printed) {
|
||||
ppir_node_foreach_pred(node, dep) {
|
||||
ppir_node *pred = dep->pred;
|
||||
ppir_node_print_node(pred, space + 2);
|
||||
}
|
||||
|
||||
node->printed = true;
|
||||
}
|
||||
}
|
||||
|
||||
void ppir_node_print_prog(ppir_compiler *comp)
|
||||
{
|
||||
if (!(lima_debug & LIMA_DEBUG_PP))
|
||||
return;
|
||||
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_node, node, &block->node_list, list) {
|
||||
node->printed = false;
|
||||
}
|
||||
}
|
||||
|
||||
printf("========prog========\n");
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
printf("-------block------\n");
|
||||
list_for_each_entry(ppir_node, node, &block->node_list, list) {
|
||||
if (ppir_node_is_root(node))
|
||||
ppir_node_print_node(node, 0);
|
||||
}
|
||||
}
|
||||
printf("====================\n");
|
||||
}
|
|
@ -0,0 +1,401 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "ppir.h"
|
||||
|
||||
|
||||
static bool create_new_instr(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
ppir_instr *instr = ppir_instr_create(block);
|
||||
if (unlikely(!instr))
|
||||
return false;
|
||||
|
||||
if (!ppir_instr_insert_node(instr, node))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool insert_to_load_tex(ppir_block *block, ppir_node *load_coords, ppir_node *ldtex)
|
||||
{
|
||||
ppir_dest *dest = ppir_node_get_dest(ldtex);
|
||||
ppir_node *move = NULL;
|
||||
|
||||
ppir_load_node *load = ppir_node_to_load(load_coords);
|
||||
load->dest.type = ppir_target_pipeline;
|
||||
load->dest.pipeline = ppir_pipeline_reg_discard;
|
||||
|
||||
ppir_load_texture_node *load_texture = ppir_node_to_load_texture(ldtex);
|
||||
load_texture->src_coords.type = ppir_target_pipeline;
|
||||
load_texture->src_coords.pipeline = ppir_pipeline_reg_discard;
|
||||
|
||||
/* Insert load_coords to ldtex instruction */
|
||||
if (!ppir_instr_insert_node(ldtex->instr, load_coords))
|
||||
return false;
|
||||
|
||||
/* Create move node */
|
||||
move = ppir_node_create(block, ppir_op_mov, -1 , 0);
|
||||
if (unlikely(!move))
|
||||
return false;
|
||||
|
||||
ppir_debug("insert_load_tex: create move %d for %d\n",
|
||||
move->index, ldtex->index);
|
||||
|
||||
ppir_alu_node *alu = ppir_node_to_alu(move);
|
||||
alu->dest = *dest;
|
||||
|
||||
ppir_node_replace_all_succ(move, ldtex);
|
||||
|
||||
dest->type = ppir_target_pipeline;
|
||||
dest->pipeline = ppir_pipeline_reg_sampler;
|
||||
|
||||
alu->num_src = 1;
|
||||
ppir_node_target_assign(&alu->src[0], dest);
|
||||
for (int i = 0; i < 4; i++)
|
||||
alu->src->swizzle[i] = i;
|
||||
|
||||
ppir_node_add_dep(move, ldtex);
|
||||
list_addtail(&move->list, &ldtex->list);
|
||||
|
||||
if (!ppir_instr_insert_node(ldtex->instr, move))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool insert_to_each_succ_instr(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
ppir_dest *dest = ppir_node_get_dest(node);
|
||||
assert(dest->type == ppir_target_ssa);
|
||||
|
||||
ppir_node *move = NULL;
|
||||
|
||||
ppir_node_foreach_succ_safe(node, dep) {
|
||||
ppir_node *succ = dep->succ;
|
||||
assert(succ->type == ppir_node_type_alu);
|
||||
|
||||
if (!ppir_instr_insert_node(succ->instr, node)) {
|
||||
/* create a move node to insert for failed node */
|
||||
if (!move) {
|
||||
move = ppir_node_create(block, ppir_op_mov, -1, 0);
|
||||
if (unlikely(!move))
|
||||
return false;
|
||||
|
||||
ppir_debug("node_to_instr create move %d for %d\n",
|
||||
move->index, node->index);
|
||||
|
||||
ppir_alu_node *alu = ppir_node_to_alu(move);
|
||||
alu->dest = *dest;
|
||||
alu->num_src = 1;
|
||||
ppir_node_target_assign(alu->src, dest);
|
||||
for (int i = 0; i < 4; i++)
|
||||
alu->src->swizzle[i] = i;
|
||||
}
|
||||
|
||||
ppir_node_replace_pred(dep, move);
|
||||
ppir_node_replace_child(succ, node, move);
|
||||
}
|
||||
}
|
||||
|
||||
if (move) {
|
||||
if (!create_new_instr(block, move))
|
||||
return false;
|
||||
|
||||
MAYBE_UNUSED bool insert_result =
|
||||
ppir_instr_insert_node(move->instr, node);
|
||||
assert(insert_result);
|
||||
|
||||
ppir_node_add_dep(move, node);
|
||||
list_addtail(&move->list, &node->list);
|
||||
}
|
||||
|
||||
/* dupliacte node for each successor */
|
||||
|
||||
bool first = true;
|
||||
struct list_head dup_list;
|
||||
list_inithead(&dup_list);
|
||||
|
||||
ppir_node_foreach_succ_safe(node, dep) {
|
||||
ppir_node *succ = dep->succ;
|
||||
|
||||
if (first) {
|
||||
first = false;
|
||||
node->instr = succ->instr;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (succ->instr == node->instr)
|
||||
continue;
|
||||
|
||||
list_for_each_entry(ppir_node, dup, &dup_list, list) {
|
||||
if (succ->instr == dup->instr) {
|
||||
ppir_node_replace_pred(dep, dup);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
ppir_node *dup = ppir_node_create(block, node->op, -1, 0);
|
||||
if (unlikely(!dup))
|
||||
return false;
|
||||
list_addtail(&dup->list, &dup_list);
|
||||
|
||||
ppir_debug("node_to_instr duplicate %s %d from %d\n",
|
||||
ppir_op_infos[dup->op].name, dup->index, node->index);
|
||||
|
||||
ppir_instr *instr = succ->instr;
|
||||
dup->instr = instr;
|
||||
dup->instr_pos = node->instr_pos;
|
||||
ppir_node_replace_pred(dep, dup);
|
||||
|
||||
if ((node->op == ppir_op_load_uniform) || (node->op == ppir_op_load_temp)) {
|
||||
ppir_load_node *load = ppir_node_to_load(node);
|
||||
ppir_load_node *dup_load = ppir_node_to_load(dup);
|
||||
dup_load->dest = load->dest;
|
||||
dup_load->index = load->index;
|
||||
dup_load->num_components = load->num_components;
|
||||
instr->slots[node->instr_pos] = dup;
|
||||
}
|
||||
}
|
||||
|
||||
list_splicetail(&dup_list, &node->list);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
switch (node->type) {
|
||||
case ppir_node_type_alu:
|
||||
{
|
||||
/* merge pred mul and succ add in the same instr can save a reg
|
||||
* by using pipeline reg ^vmul/^fmul */
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
if (alu->dest.type == ppir_target_ssa &&
|
||||
ppir_node_has_single_succ(node)) {
|
||||
ppir_node *succ = ppir_node_first_succ(node);
|
||||
if (succ->instr_pos == PPIR_INSTR_SLOT_ALU_VEC_ADD) {
|
||||
node->instr_pos = PPIR_INSTR_SLOT_ALU_VEC_MUL;
|
||||
/* select instr's condition must be inserted to fmul slot */
|
||||
if (succ->op == ppir_op_select &&
|
||||
ppir_node_first_pred(succ) == node) {
|
||||
assert(alu->dest.ssa.num_components == 1);
|
||||
node->instr_pos = PPIR_INSTR_SLOT_ALU_SCL_MUL;
|
||||
}
|
||||
ppir_instr_insert_mul_node(succ, node);
|
||||
}
|
||||
else if (succ->instr_pos == PPIR_INSTR_SLOT_ALU_SCL_ADD &&
|
||||
alu->dest.ssa.num_components == 1) {
|
||||
node->instr_pos = PPIR_INSTR_SLOT_ALU_SCL_MUL;
|
||||
ppir_instr_insert_mul_node(succ, node);
|
||||
}
|
||||
}
|
||||
|
||||
/* can't inserted to any existing instr, create one */
|
||||
if (!node->instr && !create_new_instr(block, node))
|
||||
return false;
|
||||
|
||||
break;
|
||||
}
|
||||
case ppir_node_type_load:
|
||||
if ((node->op == ppir_op_load_uniform) || (node->op == ppir_op_load_temp)) {
|
||||
/* merge pred load_uniform into succ instr can save a reg
|
||||
* by using pipeline reg */
|
||||
if (!insert_to_each_succ_instr(block, node))
|
||||
return false;
|
||||
|
||||
ppir_load_node *load = ppir_node_to_load(node);
|
||||
load->dest.type = ppir_target_pipeline;
|
||||
load->dest.pipeline = ppir_pipeline_reg_uniform;
|
||||
}
|
||||
else if (node->op == ppir_op_load_temp) {
|
||||
/* merge pred load_temp into succ instr can save a reg
|
||||
* by using pipeline reg */
|
||||
if (!insert_to_each_succ_instr(block, node))
|
||||
return false;
|
||||
|
||||
ppir_load_node *load = ppir_node_to_load(node);
|
||||
load->dest.type = ppir_target_pipeline;
|
||||
load->dest.pipeline = ppir_pipeline_reg_uniform;
|
||||
}
|
||||
else if (node->op == ppir_op_load_varying) {
|
||||
/* delay the load varying dup to scheduler */
|
||||
if (!create_new_instr(block, node))
|
||||
return false;
|
||||
}
|
||||
else if (node->op == ppir_op_load_coords) {
|
||||
ppir_node *ldtex = ppir_node_first_succ(node);
|
||||
if (!insert_to_load_tex(block, node, ldtex))
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
/* not supported yet */
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case ppir_node_type_load_texture:
|
||||
if (!create_new_instr(block, node))
|
||||
return false;
|
||||
break;
|
||||
case ppir_node_type_const:
|
||||
if (!insert_to_each_succ_instr(block, node))
|
||||
return false;
|
||||
break;
|
||||
case ppir_node_type_store:
|
||||
{
|
||||
if (node->op == ppir_op_store_temp) {
|
||||
if (!create_new_instr(block, node))
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Only the store color node should appear here.
|
||||
* Currently we always insert a move node as the end instr.
|
||||
* But it should only be done when:
|
||||
* 1. store a const node
|
||||
* 2. store a load node
|
||||
* 3. store a reg assigned in another block like loop/if
|
||||
*/
|
||||
|
||||
assert(node->op == ppir_op_store_color);
|
||||
|
||||
ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0);
|
||||
if (unlikely(!move))
|
||||
return false;
|
||||
|
||||
ppir_debug("node_to_instr create move %d from store %d\n",
|
||||
move->index, node->index);
|
||||
|
||||
ppir_node_foreach_pred_safe(node, dep) {
|
||||
ppir_node *pred = dep->pred;
|
||||
/* we can't do this in this function except here as this
|
||||
* store is the root of this recursion */
|
||||
ppir_node_remove_dep(dep);
|
||||
ppir_node_add_dep(move, pred);
|
||||
}
|
||||
|
||||
ppir_node_add_dep(node, move);
|
||||
list_addtail(&move->list, &node->list);
|
||||
|
||||
ppir_alu_node *alu = ppir_node_to_alu(move);
|
||||
ppir_store_node *store = ppir_node_to_store(node);
|
||||
alu->src[0] = store->src;
|
||||
alu->num_src = 1;
|
||||
|
||||
alu->dest.type = ppir_target_ssa;
|
||||
alu->dest.ssa.num_components = 4;
|
||||
alu->dest.ssa.live_in = INT_MAX;
|
||||
alu->dest.ssa.live_out = 0;
|
||||
alu->dest.write_mask = 0xf;
|
||||
|
||||
store->src.type = ppir_target_ssa;
|
||||
store->src.ssa = &alu->dest.ssa;
|
||||
|
||||
if (!create_new_instr(block, move))
|
||||
return false;
|
||||
|
||||
move->instr->is_end = true;
|
||||
node->instr = move->instr;
|
||||
|
||||
/* use move for the following recursion */
|
||||
node = move;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
/* we have to make sure the dep not be destroyed (due to
|
||||
* succ change) in ppir_do_node_to_instr, otherwise we can't
|
||||
* do recursion like this */
|
||||
ppir_node_foreach_pred(node, dep) {
|
||||
ppir_node *pred = dep->pred;
|
||||
bool ready = true;
|
||||
|
||||
/* pred may already be processed by the previous pred
|
||||
* (this pred may be both node and previous pred's child) */
|
||||
if (pred->instr)
|
||||
continue;
|
||||
|
||||
/* insert pred only when all its successors have been inserted */
|
||||
ppir_node_foreach_succ(pred, dep) {
|
||||
ppir_node *succ = dep->succ;
|
||||
if (!succ->instr) {
|
||||
ready = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ready) {
|
||||
if (!ppir_do_node_to_instr(block, pred))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ppir_create_instr_from_node(ppir_compiler *comp)
|
||||
{
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_node, node, &block->node_list, list) {
|
||||
if (ppir_node_is_root(node)) {
|
||||
if (!ppir_do_node_to_instr(block, node))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void ppir_build_instr_dependency(ppir_compiler *comp)
|
||||
{
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
|
||||
for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
|
||||
ppir_node *node = instr->slots[i];
|
||||
if (node) {
|
||||
ppir_node_foreach_pred(node, dep) {
|
||||
ppir_node *pred = dep->pred;
|
||||
if (pred->instr && pred->instr != instr)
|
||||
ppir_instr_add_dep(instr, pred->instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ppir_node_to_instr(ppir_compiler *comp)
|
||||
{
|
||||
if (!ppir_create_instr_from_node(comp))
|
||||
return false;
|
||||
ppir_instr_print_list(comp);
|
||||
|
||||
ppir_build_instr_dependency(comp);
|
||||
ppir_instr_print_dep(comp);
|
||||
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,512 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
* Copyright (c) 2013 Connor Abbott
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef LIMA_IR_PP_PPIR_H
|
||||
#define LIMA_IR_PP_PPIR_H
|
||||
|
||||
#include "util/u_math.h"
|
||||
#include "util/list.h"
|
||||
|
||||
#include "ir/lima_ir.h"
|
||||
|
||||
typedef enum {
|
||||
ppir_op_mov,
|
||||
ppir_op_add,
|
||||
|
||||
ppir_op_ddx,
|
||||
ppir_op_ddy,
|
||||
|
||||
ppir_op_mul,
|
||||
ppir_op_rcp,
|
||||
|
||||
ppir_op_sin_lut,
|
||||
ppir_op_cos_lut,
|
||||
|
||||
ppir_op_sum3,
|
||||
ppir_op_sum4,
|
||||
|
||||
ppir_op_normalize2,
|
||||
ppir_op_normalize3,
|
||||
ppir_op_normalize4,
|
||||
|
||||
ppir_op_select,
|
||||
|
||||
ppir_op_sin,
|
||||
ppir_op_cos,
|
||||
ppir_op_tan,
|
||||
ppir_op_asin,
|
||||
ppir_op_acos,
|
||||
|
||||
ppir_op_atan,
|
||||
ppir_op_atan2,
|
||||
ppir_op_atan_pt1,
|
||||
ppir_op_atan2_pt1,
|
||||
ppir_op_atan_pt2,
|
||||
|
||||
ppir_op_exp,
|
||||
ppir_op_log,
|
||||
ppir_op_exp2,
|
||||
ppir_op_log2,
|
||||
ppir_op_sqrt,
|
||||
ppir_op_rsqrt,
|
||||
|
||||
ppir_op_sign,
|
||||
ppir_op_floor,
|
||||
ppir_op_ceil,
|
||||
ppir_op_fract,
|
||||
ppir_op_mod,
|
||||
ppir_op_min,
|
||||
ppir_op_max,
|
||||
|
||||
ppir_op_dot2,
|
||||
ppir_op_dot3,
|
||||
ppir_op_dot4,
|
||||
|
||||
ppir_op_and,
|
||||
ppir_op_or,
|
||||
ppir_op_xor,
|
||||
|
||||
ppir_op_lt,
|
||||
ppir_op_gt,
|
||||
ppir_op_le,
|
||||
ppir_op_ge,
|
||||
ppir_op_eq,
|
||||
ppir_op_ne,
|
||||
ppir_op_not,
|
||||
|
||||
ppir_op_load_uniform,
|
||||
ppir_op_load_varying,
|
||||
ppir_op_load_coords,
|
||||
ppir_op_load_texture,
|
||||
ppir_op_load_temp,
|
||||
|
||||
ppir_op_store_temp,
|
||||
ppir_op_store_color,
|
||||
|
||||
ppir_op_const,
|
||||
|
||||
ppir_op_num,
|
||||
} ppir_op;
|
||||
|
||||
typedef enum {
|
||||
ppir_node_type_alu,
|
||||
ppir_node_type_const,
|
||||
ppir_node_type_load,
|
||||
ppir_node_type_store,
|
||||
ppir_node_type_load_texture,
|
||||
} ppir_node_type;
|
||||
|
||||
typedef struct {
|
||||
char *name;
|
||||
ppir_node_type type;
|
||||
int *slots;
|
||||
} ppir_op_info;
|
||||
|
||||
extern const ppir_op_info ppir_op_infos[];
|
||||
|
||||
typedef struct {
|
||||
void *pred, *succ;
|
||||
struct list_head pred_link;
|
||||
struct list_head succ_link;
|
||||
} ppir_dep;
|
||||
|
||||
typedef struct ppir_node {
|
||||
struct list_head list;
|
||||
ppir_op op;
|
||||
ppir_node_type type;
|
||||
int index;
|
||||
char name[16];
|
||||
bool printed;
|
||||
struct ppir_instr *instr;
|
||||
int instr_pos;
|
||||
struct ppir_block *block;
|
||||
|
||||
/* for scheduler */
|
||||
struct list_head succ_list;
|
||||
struct list_head pred_list;
|
||||
} ppir_node;
|
||||
|
||||
typedef enum {
|
||||
ppir_pipeline_reg_const0,
|
||||
ppir_pipeline_reg_const1,
|
||||
ppir_pipeline_reg_sampler,
|
||||
ppir_pipeline_reg_uniform,
|
||||
ppir_pipeline_reg_vmul,
|
||||
ppir_pipeline_reg_fmul,
|
||||
ppir_pipeline_reg_discard, /* varying load */
|
||||
} ppir_pipeline;
|
||||
|
||||
typedef struct ppir_reg {
|
||||
struct list_head list;
|
||||
int index;
|
||||
int num_components;
|
||||
/* whether this reg has to start from the x component
|
||||
* of a full physical reg, this is true for reg used
|
||||
* in load/store instr which has no swizzle field
|
||||
*/
|
||||
bool is_head;
|
||||
/* instr live range */
|
||||
int live_in, live_out;
|
||||
bool spilled;
|
||||
} ppir_reg;
|
||||
|
||||
typedef enum {
|
||||
ppir_target_ssa,
|
||||
ppir_target_pipeline,
|
||||
ppir_target_register,
|
||||
} ppir_target;
|
||||
|
||||
typedef struct ppir_src {
|
||||
ppir_target type;
|
||||
|
||||
union {
|
||||
ppir_reg *ssa;
|
||||
ppir_reg *reg;
|
||||
ppir_pipeline pipeline;
|
||||
};
|
||||
|
||||
uint8_t swizzle[4];
|
||||
bool absolute, negate;
|
||||
} ppir_src;
|
||||
|
||||
typedef enum {
|
||||
ppir_outmod_none,
|
||||
ppir_outmod_clamp_fraction,
|
||||
ppir_outmod_clamp_positive,
|
||||
ppir_outmod_round,
|
||||
} ppir_outmod;
|
||||
|
||||
typedef struct ppir_dest {
|
||||
ppir_target type;
|
||||
|
||||
union {
|
||||
ppir_reg ssa;
|
||||
ppir_reg *reg;
|
||||
ppir_pipeline pipeline;
|
||||
};
|
||||
|
||||
ppir_outmod modifier;
|
||||
unsigned write_mask : 4;
|
||||
} ppir_dest;
|
||||
|
||||
typedef struct {
|
||||
ppir_node node;
|
||||
ppir_dest dest;
|
||||
ppir_src src[3];
|
||||
int num_src;
|
||||
int shift : 3; /* Only used for ppir_op_mul */
|
||||
} ppir_alu_node;
|
||||
|
||||
typedef struct ppir_const {
|
||||
union fi value[4];
|
||||
int num;
|
||||
} ppir_const;
|
||||
|
||||
typedef struct {
|
||||
ppir_node node;
|
||||
ppir_const constant;
|
||||
ppir_dest dest;
|
||||
} ppir_const_node;
|
||||
|
||||
typedef struct {
|
||||
ppir_node node;
|
||||
int index;
|
||||
int num_components;
|
||||
ppir_dest dest;
|
||||
ppir_src src;
|
||||
} ppir_load_node;
|
||||
|
||||
typedef struct {
|
||||
ppir_node node;
|
||||
int index;
|
||||
int num_components;
|
||||
ppir_src src;
|
||||
} ppir_store_node;
|
||||
|
||||
typedef struct {
|
||||
ppir_node node;
|
||||
ppir_dest dest;
|
||||
ppir_src src_coords;
|
||||
int sampler;
|
||||
int sampler_dim;
|
||||
} ppir_load_texture_node;
|
||||
|
||||
enum ppir_instr_slot {
|
||||
PPIR_INSTR_SLOT_VARYING,
|
||||
PPIR_INSTR_SLOT_TEXLD,
|
||||
PPIR_INSTR_SLOT_UNIFORM,
|
||||
PPIR_INSTR_SLOT_ALU_VEC_MUL,
|
||||
PPIR_INSTR_SLOT_ALU_SCL_MUL,
|
||||
PPIR_INSTR_SLOT_ALU_VEC_ADD,
|
||||
PPIR_INSTR_SLOT_ALU_SCL_ADD,
|
||||
PPIR_INSTR_SLOT_ALU_COMBINE,
|
||||
PPIR_INSTR_SLOT_STORE_TEMP,
|
||||
PPIR_INSTR_SLOT_NUM,
|
||||
PPIR_INSTR_SLOT_END,
|
||||
PPIR_INSTR_SLOT_ALU_START = PPIR_INSTR_SLOT_ALU_VEC_MUL,
|
||||
PPIR_INSTR_SLOT_ALU_END = PPIR_INSTR_SLOT_ALU_COMBINE,
|
||||
};
|
||||
|
||||
typedef struct ppir_instr {
|
||||
struct list_head list;
|
||||
int index;
|
||||
bool printed;
|
||||
int seq; /* command sequence after schedule */
|
||||
|
||||
ppir_node *slots[PPIR_INSTR_SLOT_NUM];
|
||||
ppir_const constant[2];
|
||||
bool is_end;
|
||||
|
||||
/* for scheduler */
|
||||
struct list_head succ_list;
|
||||
struct list_head pred_list;
|
||||
float reg_pressure;
|
||||
int est; /* earliest start time */
|
||||
int parent_index;
|
||||
bool scheduled;
|
||||
} ppir_instr;
|
||||
|
||||
typedef struct ppir_block {
|
||||
struct list_head list;
|
||||
struct list_head node_list;
|
||||
struct list_head instr_list;
|
||||
struct ppir_compiler *comp;
|
||||
|
||||
/* for scheduler */
|
||||
int sched_instr_index;
|
||||
int sched_instr_base;
|
||||
} ppir_block;
|
||||
|
||||
struct ra_regs;
|
||||
struct lima_fs_shader_state;
|
||||
|
||||
typedef struct ppir_compiler {
|
||||
struct list_head block_list;
|
||||
int cur_index;
|
||||
int cur_instr_index;
|
||||
|
||||
struct list_head reg_list;
|
||||
|
||||
/* array for searching ssa/reg node */
|
||||
ppir_node **var_nodes;
|
||||
unsigned reg_base;
|
||||
|
||||
struct ra_regs *ra;
|
||||
struct lima_fs_shader_state *prog;
|
||||
|
||||
/* for scheduler */
|
||||
int sched_instr_base;
|
||||
|
||||
/* for regalloc spilling debug */
|
||||
int force_spilling;
|
||||
} ppir_compiler;
|
||||
|
||||
void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask);
|
||||
void ppir_node_add_dep(ppir_node *succ, ppir_node *pred);
|
||||
void ppir_node_remove_dep(ppir_dep *dep);
|
||||
void ppir_node_delete(ppir_node *node);
|
||||
void ppir_node_print_prog(ppir_compiler *comp);
|
||||
void ppir_node_replace_child(ppir_node *parent, ppir_node *old_child, ppir_node *new_child);
|
||||
void ppir_node_replace_all_succ(ppir_node *dst, ppir_node *src);
|
||||
void ppir_node_replace_pred(ppir_dep *dep, ppir_node *new_pred);
|
||||
|
||||
static inline bool ppir_node_is_root(ppir_node *node)
|
||||
{
|
||||
return list_empty(&node->succ_list);
|
||||
}
|
||||
|
||||
static inline bool ppir_node_is_leaf(ppir_node *node)
|
||||
{
|
||||
return list_empty(&node->pred_list);
|
||||
}
|
||||
|
||||
static inline bool ppir_node_has_single_succ(ppir_node *node)
|
||||
{
|
||||
return list_is_singular(&node->succ_list);
|
||||
}
|
||||
|
||||
static inline ppir_node *ppir_node_first_succ(ppir_node *node)
|
||||
{
|
||||
return list_first_entry(&node->succ_list, ppir_dep, succ_link)->succ;
|
||||
}
|
||||
|
||||
static inline bool ppir_node_has_single_pred(ppir_node *node)
|
||||
{
|
||||
return list_is_singular(&node->pred_list);
|
||||
}
|
||||
|
||||
static inline ppir_node *ppir_node_first_pred(ppir_node *node)
|
||||
{
|
||||
return list_first_entry(&node->pred_list, ppir_dep, pred_link)->pred;
|
||||
}
|
||||
|
||||
#define ppir_node_foreach_succ(node, dep) \
|
||||
list_for_each_entry(ppir_dep, dep, &node->succ_list, succ_link)
|
||||
#define ppir_node_foreach_succ_safe(node, dep) \
|
||||
list_for_each_entry_safe(ppir_dep, dep, &node->succ_list, succ_link)
|
||||
#define ppir_node_foreach_pred(node, dep) \
|
||||
list_for_each_entry(ppir_dep, dep, &node->pred_list, pred_link)
|
||||
#define ppir_node_foreach_pred_safe(node, dep) \
|
||||
list_for_each_entry_safe(ppir_dep, dep, &node->pred_list, pred_link)
|
||||
|
||||
#define ppir_node_to_alu(node) ((ppir_alu_node *)(node))
|
||||
#define ppir_node_to_const(node) ((ppir_const_node *)(node))
|
||||
#define ppir_node_to_load(node) ((ppir_load_node *)(node))
|
||||
#define ppir_node_to_store(node) ((ppir_store_node *)(node))
|
||||
#define ppir_node_to_load_texture(node) ((ppir_load_texture_node *)(node))
|
||||
|
||||
static inline ppir_dest *ppir_node_get_dest(ppir_node *node)
|
||||
{
|
||||
switch (node->type) {
|
||||
case ppir_node_type_alu:
|
||||
return &ppir_node_to_alu(node)->dest;
|
||||
case ppir_node_type_load:
|
||||
return &ppir_node_to_load(node)->dest;
|
||||
case ppir_node_type_const:
|
||||
return &ppir_node_to_const(node)->dest;
|
||||
case ppir_node_type_load_texture:
|
||||
return &ppir_node_to_load_texture(node)->dest;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ppir_node_target_assign(ppir_src *src, ppir_dest *dest)
|
||||
{
|
||||
src->type = dest->type;
|
||||
switch (src->type) {
|
||||
case ppir_target_ssa:
|
||||
src->ssa = &dest->ssa;
|
||||
break;
|
||||
case ppir_target_register:
|
||||
src->reg = dest->reg;
|
||||
break;
|
||||
case ppir_target_pipeline:
|
||||
src->pipeline = dest->pipeline;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool ppir_node_target_equal(ppir_src *src, ppir_dest *dest)
|
||||
{
|
||||
if (src->type != dest->type ||
|
||||
(src->type == ppir_target_ssa && src->ssa != &dest->ssa) ||
|
||||
(src->type == ppir_target_register && src->reg != dest->reg) ||
|
||||
(src->type == ppir_target_pipeline && src->pipeline != dest->pipeline))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int ppir_target_get_src_reg_index(ppir_src *src)
|
||||
{
|
||||
switch (src->type) {
|
||||
case ppir_target_ssa:
|
||||
return src->ssa->index;
|
||||
case ppir_target_register:
|
||||
return src->reg->index;
|
||||
case ppir_target_pipeline:
|
||||
if (src->pipeline == ppir_pipeline_reg_discard)
|
||||
return 15 * 4;
|
||||
return (src->pipeline + 12) * 4;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int ppir_target_get_dest_reg_index(ppir_dest *dest)
|
||||
{
|
||||
switch (dest->type) {
|
||||
case ppir_target_ssa:
|
||||
return dest->ssa.index;
|
||||
case ppir_target_register:
|
||||
return dest->reg->index;
|
||||
case ppir_target_pipeline:
|
||||
if (dest->pipeline == ppir_pipeline_reg_discard)
|
||||
return 15 * 4;
|
||||
return (dest->pipeline + 12) * 4;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline bool ppir_target_is_scaler(ppir_dest *dest)
|
||||
{
|
||||
switch (dest->type) {
|
||||
case ppir_target_ssa:
|
||||
return dest->ssa.num_components == 1;
|
||||
case ppir_target_register:
|
||||
/* only one bit in mask is set */
|
||||
if ((dest->write_mask & 0x3) == 0x3 ||
|
||||
(dest->write_mask & 0x5) == 0x5 ||
|
||||
(dest->write_mask & 0x9) == 0x9 ||
|
||||
(dest->write_mask & 0x6) == 0x6 ||
|
||||
(dest->write_mask & 0xa) == 0xa ||
|
||||
(dest->write_mask & 0xc) == 0xc)
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
case ppir_target_pipeline:
|
||||
if (dest->pipeline == ppir_pipeline_reg_fmul)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
ppir_instr *ppir_instr_create(ppir_block *block);
|
||||
bool ppir_instr_insert_node(ppir_instr *instr, ppir_node *node);
|
||||
void ppir_instr_add_dep(ppir_instr *succ, ppir_instr *pred);
|
||||
void ppir_instr_print_list(ppir_compiler *comp);
|
||||
void ppir_instr_print_dep(ppir_compiler *comp);
|
||||
void ppir_instr_insert_mul_node(ppir_node *add, ppir_node *mul);
|
||||
|
||||
#define ppir_instr_foreach_succ(instr, dep) \
|
||||
list_for_each_entry(ppir_dep, dep, &instr->succ_list, succ_link)
|
||||
#define ppir_instr_foreach_succ_safe(instr, dep) \
|
||||
list_for_each_entry_safe(ppir_dep, dep, &instr->succ_list, succ_link)
|
||||
#define ppir_instr_foreach_pred(instr, dep) \
|
||||
list_for_each_entry(ppir_dep, dep, &instr->pred_list, pred_link)
|
||||
#define ppir_instr_foreach_pred_safe(instr, dep) \
|
||||
list_for_each_entry_safe(ppir_dep, dep, &instr->pred_list, pred_link)
|
||||
|
||||
static inline bool ppir_instr_is_root(ppir_instr *instr)
|
||||
{
|
||||
return list_empty(&instr->succ_list);
|
||||
}
|
||||
|
||||
static inline bool ppir_instr_is_leaf(ppir_instr *instr)
|
||||
{
|
||||
return list_empty(&instr->pred_list);
|
||||
}
|
||||
|
||||
bool ppir_lower_prog(ppir_compiler *comp);
|
||||
bool ppir_node_to_instr(ppir_compiler *comp);
|
||||
bool ppir_schedule_prog(ppir_compiler *comp);
|
||||
bool ppir_regalloc_prog(ppir_compiler *comp);
|
||||
bool ppir_codegen_prog(ppir_compiler *comp);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,757 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "util/register_allocate.h"
|
||||
#include "util/u_debug.h"
|
||||
|
||||
#include "ppir.h"
|
||||
#include "lima_context.h"
|
||||
|
||||
#define PPIR_FULL_REG_NUM 6
|
||||
|
||||
#define PPIR_VEC1_REG_NUM (PPIR_FULL_REG_NUM * 4) /* x, y, z, w */
|
||||
#define PPIR_VEC2_REG_NUM (PPIR_FULL_REG_NUM * 3) /* xy, yz, zw */
|
||||
#define PPIR_VEC3_REG_NUM (PPIR_FULL_REG_NUM * 2) /* xyz, yzw */
|
||||
#define PPIR_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */
|
||||
#define PPIR_HEAD_VEC1_REG_NUM PPIR_FULL_REG_NUM /* x */
|
||||
#define PPIR_HEAD_VEC2_REG_NUM PPIR_FULL_REG_NUM /* xy */
|
||||
#define PPIR_HEAD_VEC3_REG_NUM PPIR_FULL_REG_NUM /* xyz */
|
||||
#define PPIR_HEAD_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */
|
||||
|
||||
#define PPIR_VEC1_REG_BASE 0
|
||||
#define PPIR_VEC2_REG_BASE (PPIR_VEC1_REG_BASE + PPIR_VEC1_REG_NUM)
|
||||
#define PPIR_VEC3_REG_BASE (PPIR_VEC2_REG_BASE + PPIR_VEC2_REG_NUM)
|
||||
#define PPIR_VEC4_REG_BASE (PPIR_VEC3_REG_BASE + PPIR_VEC3_REG_NUM)
|
||||
#define PPIR_HEAD_VEC1_REG_BASE (PPIR_VEC4_REG_BASE + PPIR_VEC4_REG_NUM)
|
||||
#define PPIR_HEAD_VEC2_REG_BASE (PPIR_HEAD_VEC1_REG_BASE + PPIR_HEAD_VEC1_REG_NUM)
|
||||
#define PPIR_HEAD_VEC3_REG_BASE (PPIR_HEAD_VEC2_REG_BASE + PPIR_HEAD_VEC2_REG_NUM)
|
||||
#define PPIR_HEAD_VEC4_REG_BASE (PPIR_HEAD_VEC3_REG_BASE + PPIR_HEAD_VEC3_REG_NUM)
|
||||
#define PPIR_REG_COUNT (PPIR_HEAD_VEC4_REG_BASE + PPIR_HEAD_VEC4_REG_NUM)
|
||||
|
||||
enum ppir_ra_reg_class {
|
||||
ppir_ra_reg_class_vec1,
|
||||
ppir_ra_reg_class_vec2,
|
||||
ppir_ra_reg_class_vec3,
|
||||
ppir_ra_reg_class_vec4,
|
||||
|
||||
/* 4 reg class for load/store instr regs:
|
||||
* load/store instr has no swizzle field, so the (virtual) register
|
||||
* must be allocated at the beginning of a (physical) register,
|
||||
*/
|
||||
ppir_ra_reg_class_head_vec1,
|
||||
ppir_ra_reg_class_head_vec2,
|
||||
ppir_ra_reg_class_head_vec3,
|
||||
ppir_ra_reg_class_head_vec4,
|
||||
|
||||
ppir_ra_reg_class_num,
|
||||
};
|
||||
|
||||
static const int ppir_ra_reg_base[ppir_ra_reg_class_num + 1] = {
|
||||
[ppir_ra_reg_class_vec1] = PPIR_VEC1_REG_BASE,
|
||||
[ppir_ra_reg_class_vec2] = PPIR_VEC2_REG_BASE,
|
||||
[ppir_ra_reg_class_vec3] = PPIR_VEC3_REG_BASE,
|
||||
[ppir_ra_reg_class_vec4] = PPIR_VEC4_REG_BASE,
|
||||
[ppir_ra_reg_class_head_vec1] = PPIR_HEAD_VEC1_REG_BASE,
|
||||
[ppir_ra_reg_class_head_vec2] = PPIR_HEAD_VEC2_REG_BASE,
|
||||
[ppir_ra_reg_class_head_vec3] = PPIR_HEAD_VEC3_REG_BASE,
|
||||
[ppir_ra_reg_class_head_vec4] = PPIR_HEAD_VEC4_REG_BASE,
|
||||
[ppir_ra_reg_class_num] = PPIR_REG_COUNT,
|
||||
};
|
||||
|
||||
static unsigned int *
|
||||
ppir_ra_reg_q_values[ppir_ra_reg_class_num] = {
|
||||
(unsigned int []) {1, 2, 3, 4, 1, 2, 3, 4},
|
||||
(unsigned int []) {2, 3, 3, 3, 1, 2, 3, 3},
|
||||
(unsigned int []) {2, 2, 2, 2, 1, 2, 2, 2},
|
||||
(unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
|
||||
(unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
|
||||
(unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
|
||||
(unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
|
||||
(unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
|
||||
};
|
||||
|
||||
struct ra_regs *ppir_regalloc_init(void *mem_ctx)
|
||||
{
|
||||
struct ra_regs *ret = ra_alloc_reg_set(mem_ctx, PPIR_REG_COUNT, false);
|
||||
if (!ret)
|
||||
return NULL;
|
||||
|
||||
/* (x, y, z, w) (xy, yz, zw) (xyz, yzw) (xyzw) (x) (xy) (xyz) (xyzw) */
|
||||
static const int class_reg_num[ppir_ra_reg_class_num] = {
|
||||
4, 3, 2, 1, 1, 1, 1, 1,
|
||||
};
|
||||
/* base reg (x, y, z, w) confliction with other regs */
|
||||
for (int h = 0; h < 4; h++) {
|
||||
int base_reg_mask = 1 << h;
|
||||
for (int i = 1; i < ppir_ra_reg_class_num; i++) {
|
||||
int class_reg_base_mask = (1 << ((i % 4) + 1)) - 1;
|
||||
for (int j = 0; j < class_reg_num[i]; j++) {
|
||||
if (base_reg_mask & (class_reg_base_mask << j)) {
|
||||
for (int k = 0; k < PPIR_FULL_REG_NUM; k++) {
|
||||
ra_add_reg_conflict(ret, k * 4 + h,
|
||||
ppir_ra_reg_base[i] + k * class_reg_num[i] + j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* build all other confliction by the base reg confliction */
|
||||
for (int i = 0; i < PPIR_VEC1_REG_NUM; i++)
|
||||
ra_make_reg_conflicts_transitive(ret, i);
|
||||
|
||||
for (int i = 0; i < ppir_ra_reg_class_num; i++)
|
||||
ra_alloc_reg_class(ret);
|
||||
|
||||
int reg_index = 0;
|
||||
for (int i = 0; i < ppir_ra_reg_class_num; i++) {
|
||||
while (reg_index < ppir_ra_reg_base[i + 1])
|
||||
ra_class_add_reg(ret, i, reg_index++);
|
||||
}
|
||||
|
||||
ra_set_finalize(ret, ppir_ra_reg_q_values);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ppir_reg *get_src_reg(ppir_src *src)
|
||||
{
|
||||
switch (src->type) {
|
||||
case ppir_target_ssa:
|
||||
return src->ssa;
|
||||
case ppir_target_register:
|
||||
return src->reg;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
|
||||
{
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_node, node, &block->node_list, list) {
|
||||
if (node->op == ppir_op_store_color)
|
||||
continue;
|
||||
|
||||
if (!node->instr || node->op == ppir_op_const)
|
||||
continue;
|
||||
|
||||
ppir_dest *dest = ppir_node_get_dest(node);
|
||||
if (dest) {
|
||||
ppir_reg *reg = NULL;
|
||||
|
||||
if (dest->type == ppir_target_ssa) {
|
||||
reg = &dest->ssa;
|
||||
list_addtail(®->list, &comp->reg_list);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static ppir_reg *ppir_regalloc_build_liveness_info(ppir_compiler *comp)
|
||||
{
|
||||
ppir_reg *ret = NULL;
|
||||
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_node, node, &block->node_list, list) {
|
||||
if (node->op == ppir_op_store_color) {
|
||||
ppir_store_node *store = ppir_node_to_store(node);
|
||||
if (store->src.type == ppir_target_ssa)
|
||||
ret = store->src.ssa;
|
||||
else
|
||||
ret = store->src.reg;
|
||||
ret->live_out = INT_MAX;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!node->instr || node->op == ppir_op_const)
|
||||
continue;
|
||||
|
||||
/* update reg live_in from node dest (write) */
|
||||
ppir_dest *dest = ppir_node_get_dest(node);
|
||||
if (dest) {
|
||||
ppir_reg *reg = NULL;
|
||||
|
||||
if (dest->type == ppir_target_ssa) {
|
||||
reg = &dest->ssa;
|
||||
}
|
||||
else if (dest->type == ppir_target_register)
|
||||
reg = dest->reg;
|
||||
|
||||
if (reg && node->instr->seq < reg->live_in)
|
||||
reg->live_in = node->instr->seq;
|
||||
}
|
||||
|
||||
/* update reg live_out from node src (read) */
|
||||
switch (node->type) {
|
||||
case ppir_node_type_alu:
|
||||
{
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
for (int i = 0; i < alu->num_src; i++) {
|
||||
ppir_reg *reg = get_src_reg(alu->src + i);
|
||||
if (reg && node->instr->seq > reg->live_out)
|
||||
reg->live_out = node->instr->seq;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ppir_node_type_store:
|
||||
{
|
||||
ppir_store_node *store = ppir_node_to_store(node);
|
||||
ppir_reg *reg = get_src_reg(&store->src);
|
||||
if (reg && node->instr->seq > reg->live_out)
|
||||
reg->live_out = node->instr->seq;
|
||||
break;
|
||||
}
|
||||
case ppir_node_type_load:
|
||||
{
|
||||
ppir_load_node *load = ppir_node_to_load(node);
|
||||
ppir_reg *reg = get_src_reg(&load->src);
|
||||
if (reg && node->instr->seq > reg->live_out)
|
||||
reg->live_out = node->instr->seq;
|
||||
break;
|
||||
}
|
||||
case ppir_node_type_load_texture:
|
||||
{
|
||||
ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node);
|
||||
ppir_reg *reg = get_src_reg(&load_tex->src_coords);
|
||||
if (reg && node->instr->seq > reg->live_out)
|
||||
reg->live_out = node->instr->seq;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_phy_reg_index(int reg)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ppir_ra_reg_class_num; i++) {
|
||||
if (reg < ppir_ra_reg_base[i + 1]) {
|
||||
reg -= ppir_ra_reg_base[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i < ppir_ra_reg_class_head_vec1)
|
||||
return reg / (4 - i) * 4 + reg % (4 - i);
|
||||
else
|
||||
return reg * 4;
|
||||
}
|
||||
|
||||
static void ppir_regalloc_print_result(ppir_compiler *comp)
|
||||
{
|
||||
printf("======ppir regalloc result======\n");
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
|
||||
printf("%03d:", instr->index);
|
||||
for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
|
||||
ppir_node *node = instr->slots[i];
|
||||
if (!node)
|
||||
continue;
|
||||
|
||||
printf(" (%d|", node->index);
|
||||
|
||||
ppir_dest *dest = ppir_node_get_dest(node);
|
||||
if (dest)
|
||||
printf("%d", ppir_target_get_dest_reg_index(dest));
|
||||
|
||||
printf("|");
|
||||
|
||||
switch (node->type) {
|
||||
case ppir_node_type_alu:
|
||||
{
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
for (int j = 0; j < alu->num_src; j++) {
|
||||
if (j)
|
||||
printf(" ");
|
||||
|
||||
printf("%d", ppir_target_get_src_reg_index(alu->src + j));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ppir_node_type_store:
|
||||
{
|
||||
ppir_store_node *store = ppir_node_to_store(node);
|
||||
printf("%d", ppir_target_get_src_reg_index(&store->src));
|
||||
break;
|
||||
}
|
||||
case ppir_node_type_load:
|
||||
{
|
||||
ppir_load_node *load = ppir_node_to_load(node);
|
||||
if (!load->num_components)
|
||||
printf("%d", ppir_target_get_src_reg_index(&load->src));
|
||||
break;
|
||||
}
|
||||
case ppir_node_type_load_texture:
|
||||
{
|
||||
ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node);
|
||||
printf("%d", ppir_target_get_src_reg_index(&load_tex->src_coords));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
printf(")");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
printf("--------------------------\n");
|
||||
}
|
||||
|
||||
static bool create_new_instr_after(ppir_block *block, ppir_instr *ref,
|
||||
ppir_node *node)
|
||||
{
|
||||
ppir_instr *newinstr = ppir_instr_create(block);
|
||||
if (unlikely(!newinstr))
|
||||
return false;
|
||||
|
||||
list_del(&newinstr->list);
|
||||
list_add(&newinstr->list, &ref->list);
|
||||
|
||||
if (!ppir_instr_insert_node(newinstr, node))
|
||||
return false;
|
||||
|
||||
list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
|
||||
instr->seq++;
|
||||
}
|
||||
newinstr->seq = ref->seq+1;
|
||||
newinstr->scheduled = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool create_new_instr_before(ppir_block *block, ppir_instr *ref,
|
||||
ppir_node *node)
|
||||
{
|
||||
ppir_instr *newinstr = ppir_instr_create(block);
|
||||
if (unlikely(!newinstr))
|
||||
return false;
|
||||
|
||||
list_del(&newinstr->list);
|
||||
list_addtail(&newinstr->list, &ref->list);
|
||||
|
||||
if (!ppir_instr_insert_node(newinstr, node))
|
||||
return false;
|
||||
|
||||
list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
|
||||
instr->seq++;
|
||||
}
|
||||
newinstr->seq = ref->seq-1;
|
||||
newinstr->scheduled = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
static ppir_alu_node* ppir_update_spilled_src(ppir_compiler *comp,
|
||||
ppir_block *block,
|
||||
ppir_node *node, ppir_src *src,
|
||||
ppir_alu_node *move_alu)
|
||||
{
|
||||
/* alu nodes may have multiple references to the same value.
|
||||
* try to avoid unnecessary loads for the same alu node by
|
||||
* saving the node resulting from the temporary load */
|
||||
if (move_alu)
|
||||
goto update_src;
|
||||
|
||||
/* alloc new node to load value */
|
||||
ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
|
||||
if (!load_node)
|
||||
return NULL;
|
||||
list_addtail(&load_node->list, &node->list);
|
||||
|
||||
ppir_load_node *load = ppir_node_to_load(load_node);
|
||||
|
||||
load->index = -comp->prog->stack_size; /* index sizes are negative */
|
||||
load->num_components = src->reg->num_components;
|
||||
|
||||
ppir_dest *ld_dest = &load->dest;
|
||||
ld_dest->type = ppir_target_pipeline;
|
||||
ld_dest->pipeline = ppir_pipeline_reg_uniform;
|
||||
ld_dest->write_mask = 0xf;
|
||||
|
||||
create_new_instr_before(block, node->instr, load_node);
|
||||
|
||||
/* Create move node */
|
||||
ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
|
||||
if (unlikely(!move_node))
|
||||
return false;
|
||||
list_addtail(&move_node->list, &node->list);
|
||||
|
||||
move_alu = ppir_node_to_alu(move_node);
|
||||
|
||||
move_alu->num_src = 1;
|
||||
move_alu->src->type = ppir_target_pipeline;
|
||||
move_alu->src->pipeline = ppir_pipeline_reg_uniform;
|
||||
for (int i = 0; i < 4; i++)
|
||||
move_alu->src->swizzle[i] = i;
|
||||
|
||||
ppir_dest *alu_dest = &move_alu->dest;
|
||||
alu_dest->type = ppir_target_ssa;
|
||||
alu_dest->ssa.num_components = 4;
|
||||
alu_dest->ssa.live_in = INT_MAX;
|
||||
alu_dest->ssa.live_out = 0;
|
||||
alu_dest->write_mask = 0xf;
|
||||
|
||||
list_addtail(&alu_dest->ssa.list, &comp->reg_list);
|
||||
|
||||
if (!ppir_instr_insert_node(load_node->instr, move_node))
|
||||
return false;
|
||||
|
||||
/* insert the new node as predecessor */
|
||||
ppir_node_foreach_pred_safe(node, dep) {
|
||||
ppir_node *pred = dep->pred;
|
||||
ppir_node_remove_dep(dep);
|
||||
ppir_node_add_dep(load_node, pred);
|
||||
}
|
||||
ppir_node_add_dep(node, move_node);
|
||||
ppir_node_add_dep(move_node, load_node);
|
||||
|
||||
update_src:
|
||||
/* switch node src to use the new ssa instead */
|
||||
src->type = ppir_target_ssa;
|
||||
src->ssa = &move_alu->dest.ssa;
|
||||
|
||||
return move_alu;
|
||||
}
|
||||
|
||||
static ppir_reg *create_reg(ppir_compiler *comp, int num_components)
|
||||
{
|
||||
ppir_reg *r = rzalloc(comp, ppir_reg);
|
||||
if (!r)
|
||||
return NULL;
|
||||
|
||||
r->num_components = num_components;
|
||||
r->live_in = INT_MAX;
|
||||
r->live_out = 0;
|
||||
r->is_head = false;
|
||||
list_addtail(&r->list, &comp->reg_list);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block,
|
||||
ppir_node *node, ppir_dest *dest)
|
||||
{
|
||||
assert(dest != NULL);
|
||||
ppir_reg *reg = NULL;
|
||||
if (dest->type == ppir_target_register) {
|
||||
reg = dest->reg;
|
||||
reg->num_components = 4;
|
||||
reg->spilled = true;
|
||||
}
|
||||
else {
|
||||
reg = create_reg(comp, 4);
|
||||
reg->spilled = true;
|
||||
list_del(&dest->ssa.list);
|
||||
}
|
||||
|
||||
/* alloc new node to load value */
|
||||
ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
|
||||
if (!load_node)
|
||||
return NULL;
|
||||
list_addtail(&load_node->list, &node->list);
|
||||
|
||||
ppir_load_node *load = ppir_node_to_load(load_node);
|
||||
|
||||
load->index = -comp->prog->stack_size; /* index sizes are negative */
|
||||
load->num_components = 4;
|
||||
|
||||
load->dest.type = ppir_target_pipeline;
|
||||
load->dest.pipeline = ppir_pipeline_reg_uniform;
|
||||
load->dest.write_mask = 0xf;
|
||||
|
||||
create_new_instr_before(block, node->instr, load_node);
|
||||
|
||||
/* Create move node */
|
||||
ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
|
||||
if (unlikely(!move_node))
|
||||
return false;
|
||||
list_addtail(&move_node->list, &node->list);
|
||||
|
||||
ppir_alu_node *move_alu = ppir_node_to_alu(move_node);
|
||||
|
||||
move_alu->num_src = 1;
|
||||
move_alu->src->type = ppir_target_pipeline;
|
||||
move_alu->src->pipeline = ppir_pipeline_reg_uniform;
|
||||
for (int i = 0; i < 4; i++)
|
||||
move_alu->src->swizzle[i] = i;
|
||||
|
||||
move_alu->dest.type = ppir_target_register;
|
||||
move_alu->dest.reg = reg;
|
||||
move_alu->dest.write_mask = 0x0f;
|
||||
|
||||
if (!ppir_instr_insert_node(load_node->instr, move_node))
|
||||
return false;
|
||||
|
||||
ppir_node_foreach_pred_safe(node, dep) {
|
||||
ppir_node *pred = dep->pred;
|
||||
ppir_node_remove_dep(dep);
|
||||
ppir_node_add_dep(load_node, pred);
|
||||
}
|
||||
ppir_node_add_dep(node, move_node);
|
||||
ppir_node_add_dep(move_node, load_node);
|
||||
|
||||
dest->type = ppir_target_register;
|
||||
dest->reg = reg;
|
||||
|
||||
/* alloc new node to store value */
|
||||
ppir_node *store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0);
|
||||
if (!store_node)
|
||||
return false;
|
||||
list_addtail(&store_node->list, &node->list);
|
||||
|
||||
ppir_store_node *store = ppir_node_to_store(store_node);
|
||||
|
||||
store->index = -comp->prog->stack_size; /* index sizes are negative */
|
||||
store->num_components = 4;
|
||||
|
||||
store->src.type = ppir_target_register;
|
||||
store->src.reg = dest->reg;
|
||||
|
||||
/* insert the new node as successor */
|
||||
ppir_node_foreach_succ_safe(node, dep) {
|
||||
ppir_node *succ = dep->succ;
|
||||
ppir_node_remove_dep(dep);
|
||||
ppir_node_add_dep(succ, store_node);
|
||||
}
|
||||
ppir_node_add_dep(store_node, node);
|
||||
|
||||
create_new_instr_after(block, node->instr, store_node);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ppir_regalloc_spill_reg(ppir_compiler *comp, ppir_reg *chosen)
|
||||
{
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_node, node, &block->node_list, list) {
|
||||
|
||||
ppir_dest *dest = ppir_node_get_dest(node);
|
||||
ppir_reg *reg = NULL;
|
||||
if (dest) {
|
||||
if (dest->type == ppir_target_ssa)
|
||||
reg = &dest->ssa;
|
||||
else if (dest->type == ppir_target_register)
|
||||
reg = dest->reg;
|
||||
|
||||
if (reg == chosen)
|
||||
ppir_update_spilled_dest(comp, block, node, dest);
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case ppir_node_type_alu:
|
||||
{
|
||||
/* alu nodes may have multiple references to the same value.
|
||||
* try to avoid unnecessary loads for the same alu node by
|
||||
* saving the node resulting from the temporary load */
|
||||
ppir_alu_node *move_alu = NULL;
|
||||
ppir_alu_node *alu = ppir_node_to_alu(node);
|
||||
for (int i = 0; i < alu->num_src; i++) {
|
||||
reg = get_src_reg(alu->src + i);
|
||||
if (reg == chosen) {
|
||||
move_alu = ppir_update_spilled_src(comp, block, node,
|
||||
alu->src + i, move_alu);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ppir_node_type_store:
|
||||
{
|
||||
ppir_store_node *store = ppir_node_to_store(node);
|
||||
reg = get_src_reg(&store->src);
|
||||
if (reg == chosen) {
|
||||
ppir_update_spilled_src(comp, block, node, &store->src, NULL);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ppir_node_type_load:
|
||||
{
|
||||
ppir_load_node *load = ppir_node_to_load(node);
|
||||
reg = get_src_reg(&load->src);
|
||||
if (reg == chosen) {
|
||||
ppir_update_spilled_src(comp, block, node, &load->src, NULL);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ppir_node_type_load_texture:
|
||||
{
|
||||
ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node);
|
||||
reg = get_src_reg(&load_tex->src_coords);
|
||||
if (reg == chosen) {
|
||||
ppir_update_spilled_src(comp, block, node, &load_tex->src_coords,
|
||||
NULL);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static ppir_reg *ppir_regalloc_choose_spill_node(ppir_compiler *comp,
|
||||
struct ra_graph *g)
|
||||
{
|
||||
int max_range = -1;
|
||||
ppir_reg *chosen = NULL;
|
||||
|
||||
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
|
||||
int range = reg->live_out - reg->live_in;
|
||||
|
||||
if (!reg->spilled && reg->live_out != INT_MAX && range > max_range) {
|
||||
chosen = reg;
|
||||
max_range = range;
|
||||
}
|
||||
}
|
||||
|
||||
if (chosen)
|
||||
chosen->spilled = true;
|
||||
|
||||
return chosen;
|
||||
}
|
||||
|
||||
static void ppir_regalloc_reset_liveness_info(ppir_compiler *comp)
|
||||
{
|
||||
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
|
||||
reg->live_in = INT_MAX;
|
||||
reg->live_out = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int lima_ppir_force_spilling = 0;
|
||||
|
||||
static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled)
|
||||
{
|
||||
ppir_reg *end_reg;
|
||||
|
||||
ppir_regalloc_reset_liveness_info(comp);
|
||||
end_reg = ppir_regalloc_build_liveness_info(comp);
|
||||
|
||||
struct ra_graph *g = ra_alloc_interference_graph(
|
||||
comp->ra, list_length(&comp->reg_list));
|
||||
|
||||
int n = 0, end_reg_index = 0;
|
||||
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
|
||||
int c = ppir_ra_reg_class_vec1 + (reg->num_components - 1);
|
||||
if (reg->is_head)
|
||||
c += 4;
|
||||
if (reg == end_reg)
|
||||
end_reg_index = n;
|
||||
ra_set_node_class(g, n++, c);
|
||||
}
|
||||
|
||||
int n1 = 0;
|
||||
list_for_each_entry(ppir_reg, reg1, &comp->reg_list, list) {
|
||||
int n2 = n1 + 1;
|
||||
list_for_each_entry_from(ppir_reg, reg2, reg1->list.next,
|
||||
&comp->reg_list, list) {
|
||||
bool interference = false;
|
||||
if (reg1->live_in < reg2->live_in) {
|
||||
if (reg1->live_out > reg2->live_in)
|
||||
interference = true;
|
||||
}
|
||||
else if (reg1->live_in > reg2->live_in) {
|
||||
if (reg2->live_out > reg1->live_in)
|
||||
interference = true;
|
||||
}
|
||||
else
|
||||
interference = true;
|
||||
|
||||
if (interference)
|
||||
ra_add_node_interference(g, n1, n2);
|
||||
|
||||
n2++;
|
||||
}
|
||||
n1++;
|
||||
}
|
||||
|
||||
ra_set_node_reg(g, end_reg_index, ppir_ra_reg_base[ppir_ra_reg_class_vec4]);
|
||||
|
||||
*spilled = false;
|
||||
bool ok = ra_allocate(g);
|
||||
if (!ok || (comp->force_spilling-- > 0)) {
|
||||
ppir_reg *chosen = ppir_regalloc_choose_spill_node(comp, g);
|
||||
if (chosen) {
|
||||
/* stack_size will be used to assemble the frame reg in lima_draw.
|
||||
* It is also be used in the spilling code, as negative indices
|
||||
* starting from -1, to create stack addresses. */
|
||||
comp->prog->stack_size++;
|
||||
ppir_regalloc_spill_reg(comp, chosen);
|
||||
/* Ask the outer loop to call back in. */
|
||||
*spilled = true;
|
||||
|
||||
ppir_debug("ppir: spilled register\n");
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
ppir_error("ppir: regalloc fail\n");
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
n = 0;
|
||||
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
|
||||
int reg_index = ra_get_node_reg(g, n++);
|
||||
reg->index = get_phy_reg_index(reg_index);
|
||||
}
|
||||
|
||||
ralloc_free(g);
|
||||
|
||||
if (lima_debug & LIMA_DEBUG_PP)
|
||||
ppir_regalloc_print_result(comp);
|
||||
|
||||
return true;
|
||||
|
||||
err_out:
|
||||
ralloc_free(g);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ppir_regalloc_prog(ppir_compiler *comp)
|
||||
{
|
||||
bool spilled = false;
|
||||
comp->prog->stack_size = 0;
|
||||
|
||||
/* Set from an environment variable to force spilling
|
||||
* for debugging purposes, see lima_screen.c */
|
||||
comp->force_spilling = lima_ppir_force_spilling;
|
||||
|
||||
ppir_regalloc_update_reglist_ssa(comp);
|
||||
|
||||
/* this will most likely succeed in the first
|
||||
* try, except for very complicated shaders */
|
||||
while (!ppir_regalloc_prog_try(comp, &spilled))
|
||||
if (!spilled)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,197 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "ppir.h"
|
||||
|
||||
|
||||
static void ppir_schedule_calc_sched_info(ppir_instr *instr)
|
||||
{
|
||||
int n = 0;
|
||||
float extra_reg = 1.0;
|
||||
|
||||
/* update all children's sched info */
|
||||
ppir_instr_foreach_pred(instr, dep) {
|
||||
ppir_instr *pred = dep->pred;
|
||||
|
||||
if (pred->reg_pressure < 0)
|
||||
ppir_schedule_calc_sched_info(pred);
|
||||
|
||||
if (instr->est < pred->est + 1)
|
||||
instr->est = pred->est + 1;
|
||||
|
||||
float reg_weight = 1.0 - 1.0 / list_length(&pred->succ_list);
|
||||
if (extra_reg > reg_weight)
|
||||
extra_reg = reg_weight;
|
||||
|
||||
n++;
|
||||
}
|
||||
|
||||
/* leaf instr */
|
||||
if (!n) {
|
||||
instr->reg_pressure = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
int i = 0, reg[n];
|
||||
ppir_instr_foreach_pred(instr, dep) {
|
||||
ppir_instr *pred = dep->pred;
|
||||
reg[i++] = pred->reg_pressure;
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = 0; i < n - 1; i++) {
|
||||
for (int j = 0; j < n - i - 1; j++) {
|
||||
if (reg[j] > reg[j + 1]) {
|
||||
int tmp = reg[j + 1];
|
||||
reg[j + 1] = reg[j];
|
||||
reg[j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
int pressure = reg[i] + n - (i + 1);
|
||||
if (pressure > instr->reg_pressure)
|
||||
instr->reg_pressure = pressure;
|
||||
}
|
||||
|
||||
/* If all children of this instr have multi parents, then this
|
||||
* instr need an extra reg to store its result. For example,
|
||||
* it's not fair for parent has the same reg pressure as child
|
||||
* if n==1 and child's successor>1, because we need 2 reg for
|
||||
* this.
|
||||
*
|
||||
* But we can't add a full reg to the reg_pressure, because the
|
||||
* last parent of a multi-successor child doesn't need an extra
|
||||
* reg. For example, a single child (with multi successor) instr
|
||||
* should has less reg pressure than a two children (with single
|
||||
* successor) instr.
|
||||
*
|
||||
* extra reg = min(all child)(1.0 - 1.0 / num successor)
|
||||
*/
|
||||
instr->reg_pressure += extra_reg;
|
||||
}
|
||||
|
||||
static void ppir_insert_ready_list(struct list_head *ready_list,
|
||||
ppir_instr *insert_instr)
|
||||
{
|
||||
struct list_head *insert_pos = ready_list;
|
||||
|
||||
list_for_each_entry(ppir_instr, instr, ready_list, list) {
|
||||
if (insert_instr->parent_index < instr->parent_index ||
|
||||
(insert_instr->parent_index == instr->parent_index &&
|
||||
(insert_instr->reg_pressure < instr->reg_pressure ||
|
||||
(insert_instr->reg_pressure == instr->reg_pressure &&
|
||||
(insert_instr->est >= instr->est))))) {
|
||||
insert_pos = &instr->list;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
list_del(&insert_instr->list);
|
||||
list_addtail(&insert_instr->list, insert_pos);
|
||||
}
|
||||
|
||||
static void ppir_schedule_ready_list(ppir_block *block,
|
||||
struct list_head *ready_list)
|
||||
{
|
||||
if (list_empty(ready_list))
|
||||
return;
|
||||
|
||||
ppir_instr *instr = list_first_entry(ready_list, ppir_instr, list);
|
||||
list_del(&instr->list);
|
||||
|
||||
/* schedule the instr to the block instr list */
|
||||
list_add(&instr->list, &block->instr_list);
|
||||
instr->scheduled = true;
|
||||
block->sched_instr_index--;
|
||||
instr->seq = block->sched_instr_base + block->sched_instr_index;
|
||||
|
||||
ppir_instr_foreach_pred(instr, dep) {
|
||||
ppir_instr *pred = dep->pred;
|
||||
pred->parent_index = block->sched_instr_index;
|
||||
|
||||
bool ready = true;
|
||||
ppir_instr_foreach_succ(pred, dep) {
|
||||
ppir_instr *succ = dep->succ;
|
||||
if (!succ->scheduled) {
|
||||
ready = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* all successor have been scheduled */
|
||||
if (ready)
|
||||
ppir_insert_ready_list(ready_list, pred);
|
||||
}
|
||||
|
||||
ppir_schedule_ready_list(block, ready_list);
|
||||
}
|
||||
|
||||
/* Register sensitive schedule algorithm from paper:
|
||||
* "Register-Sensitive Selection, Duplication, and Sequencing of Instructions"
|
||||
* Author: Vivek Sarkar, Mauricio J. Serrano, Barbara B. Simons
|
||||
*/
|
||||
static void ppir_schedule_block(ppir_block *block)
|
||||
{
|
||||
/* move all instr to instr_list, block->instr_list will
|
||||
* contain schedule result */
|
||||
struct list_head instr_list;
|
||||
list_replace(&block->instr_list, &instr_list);
|
||||
list_inithead(&block->instr_list);
|
||||
|
||||
/* step 2 & 3 */
|
||||
list_for_each_entry(ppir_instr, instr, &instr_list, list) {
|
||||
if (ppir_instr_is_root(instr))
|
||||
ppir_schedule_calc_sched_info(instr);
|
||||
block->sched_instr_index++;
|
||||
}
|
||||
block->sched_instr_base = block->comp->sched_instr_base;
|
||||
block->comp->sched_instr_base += block->sched_instr_index;
|
||||
|
||||
/* step 4 */
|
||||
struct list_head ready_list;
|
||||
list_inithead(&ready_list);
|
||||
|
||||
/* step 5 */
|
||||
list_for_each_entry_safe(ppir_instr, instr, &instr_list, list) {
|
||||
if (ppir_instr_is_root(instr)) {
|
||||
instr->parent_index = INT_MAX;
|
||||
ppir_insert_ready_list(&ready_list, instr);
|
||||
}
|
||||
}
|
||||
|
||||
/* step 6 */
|
||||
ppir_schedule_ready_list(block, &ready_list);
|
||||
}
|
||||
|
||||
bool ppir_schedule_prog(ppir_compiler *comp)
|
||||
{
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
ppir_schedule_block(block);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,337 @@
|
|||
/*
|
||||
* Copyright (C) 2017-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "xf86drm.h"
|
||||
#include "drm-uapi/lima_drm.h"
|
||||
|
||||
#include "util/u_hash_table.h"
|
||||
#include "util/os_time.h"
|
||||
#include "os/os_mman.h"
|
||||
|
||||
#include "state_tracker/drm_driver.h"
|
||||
|
||||
#include "lima_screen.h"
|
||||
#include "lima_bo.h"
|
||||
|
||||
#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
|
||||
|
||||
static unsigned handle_hash(void *key)
|
||||
{
|
||||
return PTR_TO_UINT(key);
|
||||
}
|
||||
|
||||
static int handle_compare(void *key1, void *key2)
|
||||
{
|
||||
return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
|
||||
}
|
||||
|
||||
bool lima_bo_table_init(struct lima_screen *screen)
|
||||
{
|
||||
screen->bo_handles = util_hash_table_create(handle_hash, handle_compare);
|
||||
if (!screen->bo_handles)
|
||||
return false;
|
||||
|
||||
screen->bo_flink_names = util_hash_table_create(handle_hash, handle_compare);
|
||||
if (!screen->bo_flink_names)
|
||||
goto err_out0;
|
||||
|
||||
mtx_init(&screen->bo_table_lock, mtx_plain);
|
||||
return true;
|
||||
|
||||
err_out0:
|
||||
util_hash_table_destroy(screen->bo_handles);
|
||||
return false;
|
||||
}
|
||||
|
||||
void lima_bo_table_fini(struct lima_screen *screen)
|
||||
{
|
||||
mtx_destroy(&screen->bo_table_lock);
|
||||
util_hash_table_destroy(screen->bo_handles);
|
||||
util_hash_table_destroy(screen->bo_flink_names);
|
||||
}
|
||||
|
||||
static void lima_close_kms_handle(struct lima_screen *screen, uint32_t handle)
|
||||
{
|
||||
struct drm_gem_close args = {
|
||||
.handle = handle,
|
||||
};
|
||||
|
||||
drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &args);
|
||||
}
|
||||
|
||||
static bool lima_bo_get_info(struct lima_bo *bo)
|
||||
{
|
||||
struct drm_lima_gem_info req = {
|
||||
.handle = bo->handle,
|
||||
};
|
||||
|
||||
if(drmIoctl(bo->screen->fd, DRM_IOCTL_LIMA_GEM_INFO, &req))
|
||||
return false;
|
||||
|
||||
bo->offset = req.offset;
|
||||
bo->va = req.va;
|
||||
return true;
|
||||
}
|
||||
|
||||
struct lima_bo *lima_bo_create(struct lima_screen *screen,
|
||||
uint32_t size, uint32_t flags)
|
||||
{
|
||||
struct lima_bo *bo;
|
||||
struct drm_lima_gem_create req = {
|
||||
.size = size,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
if (!(bo = calloc(1, sizeof(*bo))))
|
||||
return NULL;
|
||||
|
||||
if (drmIoctl(screen->fd, DRM_IOCTL_LIMA_GEM_CREATE, &req))
|
||||
goto err_out0;
|
||||
|
||||
bo->screen = screen;
|
||||
bo->size = req.size;
|
||||
bo->handle = req.handle;
|
||||
p_atomic_set(&bo->refcnt, 1);
|
||||
|
||||
if (!lima_bo_get_info(bo))
|
||||
goto err_out1;
|
||||
|
||||
return bo;
|
||||
|
||||
err_out1:
|
||||
lima_close_kms_handle(screen, bo->handle);
|
||||
err_out0:
|
||||
free(bo);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void lima_bo_free(struct lima_bo *bo)
|
||||
{
|
||||
if (!p_atomic_dec_zero(&bo->refcnt))
|
||||
return;
|
||||
|
||||
struct lima_screen *screen = bo->screen;
|
||||
mtx_lock(&screen->bo_table_lock);
|
||||
util_hash_table_remove(screen->bo_handles,
|
||||
(void *)(uintptr_t)bo->handle);
|
||||
if (bo->flink_name)
|
||||
util_hash_table_remove(screen->bo_flink_names,
|
||||
(void *)(uintptr_t)bo->flink_name);
|
||||
mtx_unlock(&screen->bo_table_lock);
|
||||
|
||||
if (bo->map)
|
||||
lima_bo_unmap(bo);
|
||||
|
||||
lima_close_kms_handle(screen, bo->handle);
|
||||
free(bo);
|
||||
}
|
||||
|
||||
void *lima_bo_map(struct lima_bo *bo)
|
||||
{
|
||||
if (!bo->map) {
|
||||
bo->map = os_mmap(0, bo->size, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, bo->screen->fd, bo->offset);
|
||||
if (bo->map == MAP_FAILED)
|
||||
bo->map = NULL;
|
||||
}
|
||||
|
||||
return bo->map;
|
||||
}
|
||||
|
||||
void lima_bo_unmap(struct lima_bo *bo)
|
||||
{
|
||||
if (bo->map) {
|
||||
os_munmap(bo->map, bo->size);
|
||||
bo->map = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool lima_bo_export(struct lima_bo *bo, struct winsys_handle *handle)
|
||||
{
|
||||
struct lima_screen *screen = bo->screen;
|
||||
|
||||
switch (handle->type) {
|
||||
case WINSYS_HANDLE_TYPE_SHARED:
|
||||
if (!bo->flink_name) {
|
||||
struct drm_gem_flink flink = {
|
||||
.handle = bo->handle,
|
||||
.name = 0,
|
||||
};
|
||||
if (drmIoctl(screen->fd, DRM_IOCTL_GEM_FLINK, &flink))
|
||||
return false;
|
||||
|
||||
bo->flink_name = flink.name;
|
||||
|
||||
mtx_lock(&screen->bo_table_lock);
|
||||
util_hash_table_set(screen->bo_flink_names,
|
||||
(void *)(uintptr_t)bo->flink_name, bo);
|
||||
mtx_unlock(&screen->bo_table_lock);
|
||||
}
|
||||
handle->handle = bo->flink_name;
|
||||
return true;
|
||||
|
||||
case WINSYS_HANDLE_TYPE_KMS:
|
||||
mtx_lock(&screen->bo_table_lock);
|
||||
util_hash_table_set(screen->bo_handles,
|
||||
(void *)(uintptr_t)bo->handle, bo);
|
||||
mtx_unlock(&screen->bo_table_lock);
|
||||
|
||||
handle->handle = bo->handle;
|
||||
return true;
|
||||
|
||||
case WINSYS_HANDLE_TYPE_FD:
|
||||
if (drmPrimeHandleToFD(screen->fd, bo->handle, DRM_CLOEXEC,
|
||||
(int*)&handle->handle))
|
||||
return false;
|
||||
|
||||
mtx_lock(&screen->bo_table_lock);
|
||||
util_hash_table_set(screen->bo_handles,
|
||||
(void *)(uintptr_t)bo->handle, bo);
|
||||
mtx_unlock(&screen->bo_table_lock);
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
struct lima_bo *lima_bo_import(struct lima_screen *screen,
|
||||
struct winsys_handle *handle)
|
||||
{
|
||||
struct lima_bo *bo = NULL;
|
||||
struct drm_gem_open req = {0};
|
||||
uint32_t dma_buf_size = 0;
|
||||
unsigned h = handle->handle;
|
||||
|
||||
mtx_lock(&screen->bo_table_lock);
|
||||
|
||||
/* Convert a DMA buf handle to a KMS handle now. */
|
||||
if (handle->type == WINSYS_HANDLE_TYPE_FD) {
|
||||
uint32_t prime_handle;
|
||||
off_t size;
|
||||
|
||||
/* Get a KMS handle. */
|
||||
if (drmPrimeFDToHandle(screen->fd, h, &prime_handle)) {
|
||||
mtx_unlock(&screen->bo_table_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Query the buffer size. */
|
||||
size = lseek(h, 0, SEEK_END);
|
||||
if (size == (off_t)-1) {
|
||||
mtx_unlock(&screen->bo_table_lock);
|
||||
lima_close_kms_handle(screen, prime_handle);
|
||||
return NULL;
|
||||
}
|
||||
lseek(h, 0, SEEK_SET);
|
||||
|
||||
dma_buf_size = size;
|
||||
h = prime_handle;
|
||||
}
|
||||
|
||||
switch (handle->type) {
|
||||
case WINSYS_HANDLE_TYPE_SHARED:
|
||||
bo = util_hash_table_get(screen->bo_flink_names,
|
||||
(void *)(uintptr_t)h);
|
||||
break;
|
||||
case WINSYS_HANDLE_TYPE_KMS:
|
||||
case WINSYS_HANDLE_TYPE_FD:
|
||||
bo = util_hash_table_get(screen->bo_handles,
|
||||
(void *)(uintptr_t)h);
|
||||
break;
|
||||
default:
|
||||
mtx_unlock(&screen->bo_table_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (bo) {
|
||||
p_atomic_inc(&bo->refcnt);
|
||||
mtx_unlock(&screen->bo_table_lock);
|
||||
return bo;
|
||||
}
|
||||
|
||||
if (!(bo = calloc(1, sizeof(*bo)))) {
|
||||
mtx_unlock(&screen->bo_table_lock);
|
||||
if (handle->type == WINSYS_HANDLE_TYPE_FD)
|
||||
lima_close_kms_handle(screen, h);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bo->screen = screen;
|
||||
p_atomic_set(&bo->refcnt, 1);
|
||||
|
||||
switch (handle->type) {
|
||||
case WINSYS_HANDLE_TYPE_SHARED:
|
||||
req.name = h;
|
||||
if (drmIoctl(screen->fd, DRM_IOCTL_GEM_OPEN, &req)) {
|
||||
mtx_unlock(&screen->bo_table_lock);
|
||||
free(bo);
|
||||
return NULL;
|
||||
}
|
||||
bo->handle = req.handle;
|
||||
bo->flink_name = h;
|
||||
bo->size = req.size;
|
||||
break;
|
||||
case WINSYS_HANDLE_TYPE_FD:
|
||||
bo->handle = h;
|
||||
bo->size = dma_buf_size;
|
||||
break;
|
||||
default:
|
||||
/* not possible */
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (lima_bo_get_info(bo)) {
|
||||
if (handle->type == WINSYS_HANDLE_TYPE_SHARED)
|
||||
util_hash_table_set(screen->bo_flink_names,
|
||||
(void *)(uintptr_t)bo->flink_name, bo);
|
||||
util_hash_table_set(screen->bo_handles,
|
||||
(void*)(uintptr_t)bo->handle, bo);
|
||||
}
|
||||
else {
|
||||
lima_close_kms_handle(screen, bo->handle);
|
||||
free(bo);
|
||||
bo = NULL;
|
||||
}
|
||||
|
||||
mtx_unlock(&screen->bo_table_lock);
|
||||
|
||||
return bo;
|
||||
}
|
||||
|
||||
bool lima_bo_wait(struct lima_bo *bo, uint32_t op, uint64_t timeout_ns)
|
||||
{
|
||||
int64_t abs_timeout = os_time_get_absolute_timeout(timeout_ns);
|
||||
struct drm_lima_gem_wait req = {
|
||||
.handle = bo->handle,
|
||||
.op = op,
|
||||
.timeout_ns = abs_timeout,
|
||||
};
|
||||
|
||||
return drmIoctl(bo->screen->fd, DRM_IOCTL_LIMA_GEM_WAIT, &req) == 0;
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef H_LIMA_BO
|
||||
#define H_LIMA_BO
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "util/u_atomic.h"
|
||||
|
||||
struct lima_bo {
|
||||
struct lima_screen *screen;
|
||||
int refcnt;
|
||||
|
||||
uint32_t size;
|
||||
uint32_t handle;
|
||||
uint64_t offset;
|
||||
uint32_t flink_name;
|
||||
|
||||
void *map;
|
||||
uint32_t va;
|
||||
};
|
||||
|
||||
bool lima_bo_table_init(struct lima_screen *screen);
|
||||
void lima_bo_table_fini(struct lima_screen *screen);
|
||||
|
||||
struct lima_bo *lima_bo_create(struct lima_screen *screen, uint32_t size,
|
||||
uint32_t flags);
|
||||
void lima_bo_free(struct lima_bo *bo);
|
||||
|
||||
static inline void lima_bo_reference(struct lima_bo *bo)
|
||||
{
|
||||
p_atomic_inc(&bo->refcnt);
|
||||
}
|
||||
|
||||
void *lima_bo_map(struct lima_bo *bo);
|
||||
void lima_bo_unmap(struct lima_bo *bo);
|
||||
|
||||
bool lima_bo_export(struct lima_bo *bo, struct winsys_handle *handle);
|
||||
struct lima_bo *lima_bo_import(struct lima_screen *screen,
|
||||
struct winsys_handle *handle);
|
||||
|
||||
bool lima_bo_wait(struct lima_bo *bo, uint32_t op, uint64_t timeout_ns);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,262 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_suballoc.h"
|
||||
#include "util/hash_table.h"
|
||||
|
||||
#include "lima_screen.h"
|
||||
#include "lima_context.h"
|
||||
#include "lima_resource.h"
|
||||
#include "lima_bo.h"
|
||||
#include "lima_submit.h"
|
||||
#include "lima_util.h"
|
||||
#include "lima_fence.h"
|
||||
|
||||
#include <drm-uapi/lima_drm.h>
|
||||
#include <xf86drm.h>
|
||||
|
||||
int lima_ctx_num_plb = LIMA_CTX_PLB_DEF_NUM;
|
||||
|
||||
uint32_t
|
||||
lima_ctx_buff_va(struct lima_context *ctx, enum lima_ctx_buff buff, unsigned submit)
|
||||
{
|
||||
struct lima_ctx_buff_state *cbs = ctx->buffer_state + buff;
|
||||
struct lima_resource *res = lima_resource(cbs->res);
|
||||
|
||||
if (submit & LIMA_CTX_BUFF_SUBMIT_GP)
|
||||
lima_submit_add_bo(ctx->gp_submit, res->bo, LIMA_SUBMIT_BO_READ);
|
||||
if (submit & LIMA_CTX_BUFF_SUBMIT_PP)
|
||||
lima_submit_add_bo(ctx->pp_submit, res->bo, LIMA_SUBMIT_BO_READ);
|
||||
|
||||
return res->bo->va + cbs->offset;
|
||||
}
|
||||
|
||||
void *
|
||||
lima_ctx_buff_map(struct lima_context *ctx, enum lima_ctx_buff buff)
|
||||
{
|
||||
struct lima_ctx_buff_state *cbs = ctx->buffer_state + buff;
|
||||
struct lima_resource *res = lima_resource(cbs->res);
|
||||
|
||||
return lima_bo_map(res->bo) + cbs->offset;
|
||||
}
|
||||
|
||||
void *
|
||||
lima_ctx_buff_alloc(struct lima_context *ctx, enum lima_ctx_buff buff,
|
||||
unsigned size, bool uploader)
|
||||
{
|
||||
struct lima_ctx_buff_state *cbs = ctx->buffer_state + buff;
|
||||
void *ret = NULL;
|
||||
|
||||
cbs->size = align(size, 0x40);
|
||||
|
||||
if (uploader)
|
||||
u_upload_alloc(ctx->uploader, 0, cbs->size, 0x40, &cbs->offset,
|
||||
&cbs->res, &ret);
|
||||
else
|
||||
u_suballocator_alloc(ctx->suballocator, cbs->size, 0x10,
|
||||
&cbs->offset, &cbs->res);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
lima_context_create_drm_ctx(struct lima_screen *screen)
|
||||
{
|
||||
struct drm_lima_ctx_create req = {0};
|
||||
|
||||
int ret = drmIoctl(screen->fd, DRM_IOCTL_LIMA_CTX_CREATE, &req);
|
||||
if (ret)
|
||||
return errno;
|
||||
|
||||
return req.id;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_context_free_drm_ctx(struct lima_screen *screen, int id)
|
||||
{
|
||||
struct drm_lima_ctx_free req = {
|
||||
.id = id,
|
||||
};
|
||||
|
||||
drmIoctl(screen->fd, DRM_IOCTL_LIMA_CTX_FREE, &req);
|
||||
}
|
||||
|
||||
static void
|
||||
lima_context_destroy(struct pipe_context *pctx)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
struct lima_screen *screen = lima_screen(pctx->screen);
|
||||
|
||||
if (ctx->pp_submit)
|
||||
lima_submit_free(ctx->pp_submit);
|
||||
if (ctx->gp_submit)
|
||||
lima_submit_free(ctx->gp_submit);
|
||||
|
||||
for (int i = 0; i < lima_ctx_buff_num; i++)
|
||||
pipe_resource_reference(&ctx->buffer_state[i].res, NULL);
|
||||
|
||||
lima_state_fini(ctx);
|
||||
|
||||
if (ctx->suballocator)
|
||||
u_suballocator_destroy(ctx->suballocator);
|
||||
|
||||
if (ctx->uploader)
|
||||
u_upload_destroy(ctx->uploader);
|
||||
|
||||
slab_destroy_child(&ctx->transfer_pool);
|
||||
|
||||
for (int i = 0; i < LIMA_CTX_PLB_MAX_NUM; i++) {
|
||||
if (ctx->plb[i])
|
||||
lima_bo_free(ctx->plb[i]);
|
||||
}
|
||||
|
||||
if (ctx->plb_gp_stream)
|
||||
lima_bo_free(ctx->plb_gp_stream);
|
||||
|
||||
if (ctx->plb_pp_stream)
|
||||
assert(!_mesa_hash_table_num_entries(ctx->plb_pp_stream));
|
||||
|
||||
lima_context_free_drm_ctx(screen, ctx->id);
|
||||
|
||||
ralloc_free(ctx);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
plb_pp_stream_hash(const void *key)
|
||||
{
|
||||
return _mesa_hash_data(key, sizeof(struct lima_ctx_plb_pp_stream_key));
|
||||
}
|
||||
|
||||
static bool
|
||||
plb_pp_stream_compare(const void *key1, const void *key2)
|
||||
{
|
||||
return memcmp(key1, key2, sizeof(struct lima_ctx_plb_pp_stream_key)) == 0;
|
||||
}
|
||||
|
||||
struct pipe_context *
|
||||
lima_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
|
||||
{
|
||||
struct lima_screen *screen = lima_screen(pscreen);
|
||||
struct lima_context *ctx;
|
||||
|
||||
ctx = rzalloc(screen, struct lima_context);
|
||||
if (!ctx)
|
||||
return NULL;
|
||||
|
||||
ctx->id = lima_context_create_drm_ctx(screen);
|
||||
if (ctx->id < 0) {
|
||||
ralloc_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ctx->base.screen = pscreen;
|
||||
ctx->base.destroy = lima_context_destroy;
|
||||
|
||||
lima_resource_context_init(ctx);
|
||||
lima_fence_context_init(ctx);
|
||||
lima_state_init(ctx);
|
||||
lima_draw_init(ctx);
|
||||
lima_program_init(ctx);
|
||||
lima_query_init(ctx);
|
||||
|
||||
slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);
|
||||
|
||||
ctx->uploader = u_upload_create_default(&ctx->base);
|
||||
if (!ctx->uploader)
|
||||
goto err_out;
|
||||
ctx->base.stream_uploader = ctx->uploader;
|
||||
ctx->base.const_uploader = ctx->uploader;
|
||||
|
||||
/* for varying output which need not mmap */
|
||||
ctx->suballocator =
|
||||
u_suballocator_create(&ctx->base, 1024 * 1024, 0,
|
||||
PIPE_USAGE_STREAM, 0, false);
|
||||
if (!ctx->suballocator)
|
||||
goto err_out;
|
||||
|
||||
util_dynarray_init(&ctx->vs_cmd_array, ctx);
|
||||
util_dynarray_init(&ctx->plbu_cmd_array, ctx);
|
||||
|
||||
if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI450)
|
||||
ctx->plb_max_blk = 4096;
|
||||
else
|
||||
ctx->plb_max_blk = 512;
|
||||
ctx->plb_size = ctx->plb_max_blk * LIMA_CTX_PLB_BLK_SIZE;
|
||||
ctx->plb_gp_size = ctx->plb_max_blk * 4;
|
||||
|
||||
for (int i = 0; i < lima_ctx_num_plb; i++) {
|
||||
ctx->plb[i] = lima_bo_create(screen, ctx->plb_size, 0);
|
||||
if (!ctx->plb[i])
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
unsigned plb_gp_stream_size =
|
||||
align(ctx->plb_gp_size * lima_ctx_num_plb, LIMA_PAGE_SIZE);
|
||||
ctx->plb_gp_stream =
|
||||
lima_bo_create(screen, plb_gp_stream_size, 0);
|
||||
if (!ctx->plb_gp_stream)
|
||||
goto err_out;
|
||||
lima_bo_map(ctx->plb_gp_stream);
|
||||
|
||||
/* plb gp stream is static for any framebuffer */
|
||||
for (int i = 0; i < lima_ctx_num_plb; i++) {
|
||||
uint32_t *plb_gp_stream = ctx->plb_gp_stream->map + i * ctx->plb_gp_size;
|
||||
for (int j = 0; j < ctx->plb_max_blk; j++)
|
||||
plb_gp_stream[j] = ctx->plb[i]->va + LIMA_CTX_PLB_BLK_SIZE * j;
|
||||
}
|
||||
|
||||
if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) {
|
||||
ctx->plb_pp_stream = _mesa_hash_table_create(
|
||||
ctx, plb_pp_stream_hash, plb_pp_stream_compare);
|
||||
if (!ctx->plb_pp_stream)
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
ctx->gp_submit = lima_submit_create(ctx, LIMA_PIPE_GP);
|
||||
if (!ctx->gp_submit)
|
||||
goto err_out;
|
||||
|
||||
ctx->pp_submit = lima_submit_create(ctx, LIMA_PIPE_PP);
|
||||
if (!ctx->pp_submit)
|
||||
goto err_out;
|
||||
|
||||
return &ctx->base;
|
||||
|
||||
err_out:
|
||||
lima_context_destroy(&ctx->base);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool
|
||||
lima_need_flush(struct lima_context *ctx, struct lima_bo *bo, bool write)
|
||||
{
|
||||
return lima_submit_has_bo(ctx->gp_submit, bo, write) ||
|
||||
lima_submit_has_bo(ctx->pp_submit, bo, write);
|
||||
}
|
|
@ -0,0 +1,294 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef H_LIMA_CONTEXT
|
||||
#define H_LIMA_CONTEXT
|
||||
|
||||
#include "util/slab.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_state.h"
|
||||
|
||||
struct lima_context_framebuffer {
|
||||
struct pipe_surface *cbuf, *zsbuf;
|
||||
int width, height;
|
||||
int tiled_w, tiled_h;
|
||||
int shift_w, shift_h;
|
||||
int block_w, block_h;
|
||||
int shift_min;
|
||||
int samples;
|
||||
};
|
||||
|
||||
struct lima_context_clear {
|
||||
unsigned buffers;
|
||||
uint32_t color_8pc;
|
||||
uint32_t depth;
|
||||
uint32_t stencil;
|
||||
uint64_t color_16pc;
|
||||
};
|
||||
|
||||
struct lima_depth_stencil_alpha_state {
|
||||
struct pipe_depth_stencil_alpha_state base;
|
||||
};
|
||||
|
||||
struct lima_fs_shader_state {
|
||||
void *shader;
|
||||
int shader_size;
|
||||
int stack_size;
|
||||
struct lima_bo *bo;
|
||||
};
|
||||
|
||||
#define LIMA_MAX_VARYING_NUM 13
|
||||
|
||||
struct lima_varying_info {
|
||||
int components;
|
||||
int component_size;
|
||||
int offset;
|
||||
};
|
||||
|
||||
struct lima_vs_shader_state {
|
||||
void *shader;
|
||||
int shader_size;
|
||||
int prefetch;
|
||||
|
||||
/* pipe_constant_buffer.size is aligned with some pad bytes,
|
||||
* so record here for the real start place of gpir lowered
|
||||
* uniforms */
|
||||
int uniform_pending_offset;
|
||||
|
||||
void *constant;
|
||||
int constant_size;
|
||||
|
||||
struct lima_varying_info varying[LIMA_MAX_VARYING_NUM];
|
||||
int varying_stride;
|
||||
int num_varying;
|
||||
|
||||
struct lima_bo *bo;
|
||||
};
|
||||
|
||||
struct lima_rasterizer_state {
|
||||
struct pipe_rasterizer_state base;
|
||||
};
|
||||
|
||||
struct lima_blend_state {
|
||||
struct pipe_blend_state base;
|
||||
};
|
||||
|
||||
struct lima_vertex_element_state {
|
||||
struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
|
||||
unsigned num_elements;
|
||||
};
|
||||
|
||||
struct lima_context_vertex_buffer {
|
||||
struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
|
||||
unsigned count;
|
||||
uint32_t enabled_mask;
|
||||
};
|
||||
|
||||
struct lima_context_viewport_state {
|
||||
struct pipe_viewport_state transform;
|
||||
float x, y, width, height;
|
||||
float near, far;
|
||||
};
|
||||
|
||||
struct lima_context_constant_buffer {
|
||||
const void *buffer;
|
||||
uint32_t size;
|
||||
bool dirty;
|
||||
};
|
||||
|
||||
enum lima_ctx_buff {
|
||||
lima_ctx_buff_sh_varying,
|
||||
lima_ctx_buff_sh_gl_pos,
|
||||
lima_ctx_buff_gp_varying_info,
|
||||
lima_ctx_buff_gp_attribute_info,
|
||||
lima_ctx_buff_gp_uniform,
|
||||
lima_ctx_buff_gp_vs_cmd,
|
||||
lima_ctx_buff_gp_plbu_cmd,
|
||||
lima_ctx_buff_pp_plb_rsw,
|
||||
lima_ctx_buff_pp_uniform_array,
|
||||
lima_ctx_buff_pp_uniform,
|
||||
lima_ctx_buff_pp_tex_desc,
|
||||
lima_ctx_buff_num,
|
||||
};
|
||||
|
||||
struct lima_ctx_buff_state {
|
||||
struct pipe_resource *res;
|
||||
unsigned offset;
|
||||
unsigned size;
|
||||
};
|
||||
|
||||
struct lima_texture_stateobj {
|
||||
struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
|
||||
unsigned num_textures;
|
||||
struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
|
||||
unsigned num_samplers;
|
||||
};
|
||||
|
||||
struct lima_ctx_plb_pp_stream_key {
|
||||
uint32_t plb_index;
|
||||
uint32_t tiled_w;
|
||||
uint32_t tiled_h;
|
||||
};
|
||||
|
||||
struct lima_ctx_plb_pp_stream {
|
||||
struct lima_ctx_plb_pp_stream_key key;
|
||||
uint32_t refcnt;
|
||||
struct lima_bo *bo;
|
||||
uint32_t offset[4];
|
||||
};
|
||||
|
||||
struct lima_damage_state {
|
||||
struct pipe_scissor_state *region;
|
||||
unsigned num_region;
|
||||
bool aligned;
|
||||
};
|
||||
|
||||
struct lima_pp_stream_state {
|
||||
struct lima_bo *bo;
|
||||
uint32_t bo_offset;
|
||||
uint32_t offset[8];
|
||||
};
|
||||
|
||||
struct lima_context {
|
||||
struct pipe_context base;
|
||||
|
||||
enum {
|
||||
LIMA_CONTEXT_DIRTY_FRAMEBUFFER = (1 << 0),
|
||||
LIMA_CONTEXT_DIRTY_CLEAR = (1 << 1),
|
||||
LIMA_CONTEXT_DIRTY_SHADER_VERT = (1 << 2),
|
||||
LIMA_CONTEXT_DIRTY_SHADER_FRAG = (1 << 3),
|
||||
LIMA_CONTEXT_DIRTY_VERTEX_ELEM = (1 << 4),
|
||||
LIMA_CONTEXT_DIRTY_VERTEX_BUFF = (1 << 5),
|
||||
LIMA_CONTEXT_DIRTY_VIEWPORT = (1 << 6),
|
||||
LIMA_CONTEXT_DIRTY_SCISSOR = (1 << 7),
|
||||
LIMA_CONTEXT_DIRTY_RASTERIZER = (1 << 8),
|
||||
LIMA_CONTEXT_DIRTY_ZSA = (1 << 9),
|
||||
LIMA_CONTEXT_DIRTY_BLEND_COLOR = (1 << 10),
|
||||
LIMA_CONTEXT_DIRTY_BLEND = (1 << 11),
|
||||
LIMA_CONTEXT_DIRTY_STENCIL_REF = (1 << 12),
|
||||
LIMA_CONTEXT_DIRTY_CONST_BUFF = (1 << 13),
|
||||
LIMA_CONTEXT_DIRTY_TEXTURES = (1 << 14),
|
||||
} dirty;
|
||||
|
||||
struct u_upload_mgr *uploader;
|
||||
struct u_suballocator *suballocator;
|
||||
|
||||
struct slab_child_pool transfer_pool;
|
||||
|
||||
struct lima_context_framebuffer framebuffer;
|
||||
struct lima_context_viewport_state viewport;
|
||||
struct pipe_scissor_state scissor;
|
||||
struct lima_context_clear clear;
|
||||
struct lima_vs_shader_state *vs;
|
||||
struct lima_fs_shader_state *fs;
|
||||
struct lima_vertex_element_state *vertex_elements;
|
||||
struct lima_context_vertex_buffer vertex_buffers;
|
||||
struct lima_rasterizer_state *rasterizer;
|
||||
struct lima_depth_stencil_alpha_state *zsa;
|
||||
struct pipe_blend_color blend_color;
|
||||
struct lima_blend_state *blend;
|
||||
struct pipe_stencil_ref stencil_ref;
|
||||
struct lima_context_constant_buffer const_buffer[PIPE_SHADER_TYPES];
|
||||
struct lima_texture_stateobj tex_stateobj;
|
||||
struct lima_damage_state damage;
|
||||
struct lima_pp_stream_state pp_stream;
|
||||
|
||||
unsigned min_index;
|
||||
unsigned max_index;
|
||||
|
||||
#define LIMA_CTX_PLB_MIN_NUM 1
|
||||
#define LIMA_CTX_PLB_MAX_NUM 4
|
||||
#define LIMA_CTX_PLB_DEF_NUM 2
|
||||
#define LIMA_CTX_PLB_BLK_SIZE 512
|
||||
unsigned plb_max_blk;
|
||||
unsigned plb_size;
|
||||
unsigned plb_gp_size;
|
||||
|
||||
struct lima_bo *plb[LIMA_CTX_PLB_MAX_NUM];
|
||||
struct lima_bo *plb_gp_stream;
|
||||
struct hash_table *plb_pp_stream;
|
||||
uint32_t plb_index;
|
||||
|
||||
struct lima_ctx_buff_state buffer_state[lima_ctx_buff_num];
|
||||
|
||||
struct util_dynarray vs_cmd_array;
|
||||
struct util_dynarray plbu_cmd_array;
|
||||
|
||||
struct lima_submit *gp_submit;
|
||||
struct lima_submit *pp_submit;
|
||||
|
||||
int id;
|
||||
};
|
||||
|
||||
static inline struct lima_context *
|
||||
lima_context(struct pipe_context *pctx)
|
||||
{
|
||||
return (struct lima_context *)pctx;
|
||||
}
|
||||
|
||||
struct lima_sampler_state {
|
||||
struct pipe_sampler_state base;
|
||||
};
|
||||
|
||||
static inline struct lima_sampler_state *
|
||||
lima_sampler_state(struct pipe_sampler_state *psstate)
|
||||
{
|
||||
return (struct lima_sampler_state *)psstate;
|
||||
}
|
||||
|
||||
struct lima_sampler_view {
|
||||
struct pipe_sampler_view base;
|
||||
};
|
||||
|
||||
static inline struct lima_sampler_view *
|
||||
lima_sampler_view(struct pipe_sampler_view *psview)
|
||||
{
|
||||
return (struct lima_sampler_view *)psview;
|
||||
}
|
||||
|
||||
#define LIMA_CTX_BUFF_SUBMIT_GP (1 << 0)
|
||||
#define LIMA_CTX_BUFF_SUBMIT_PP (1 << 1)
|
||||
|
||||
uint32_t lima_ctx_buff_va(struct lima_context *ctx, enum lima_ctx_buff buff,
|
||||
unsigned submit);
|
||||
void *lima_ctx_buff_map(struct lima_context *ctx, enum lima_ctx_buff buff);
|
||||
void *lima_ctx_buff_alloc(struct lima_context *ctx, enum lima_ctx_buff buff,
|
||||
unsigned size, bool uploader);
|
||||
|
||||
void lima_state_init(struct lima_context *ctx);
|
||||
void lima_state_fini(struct lima_context *ctx);
|
||||
void lima_draw_init(struct lima_context *ctx);
|
||||
void lima_program_init(struct lima_context *ctx);
|
||||
void lima_query_init(struct lima_context *ctx);
|
||||
|
||||
struct pipe_context *
|
||||
lima_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
|
||||
|
||||
void lima_flush(struct lima_context *ctx);
|
||||
|
||||
bool lima_need_flush(struct lima_context *ctx, struct lima_bo *bo, bool write);
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <libsync.h>
|
||||
|
||||
#include <util/u_memory.h>
|
||||
#include <util/u_inlines.h>
|
||||
|
||||
#include "drm-uapi/lima_drm.h"
|
||||
|
||||
#include "lima_screen.h"
|
||||
#include "lima_context.h"
|
||||
#include "lima_fence.h"
|
||||
#include "lima_submit.h"
|
||||
|
||||
struct pipe_fence_handle {
|
||||
struct pipe_reference reference;
|
||||
int fd;
|
||||
};
|
||||
|
||||
static void
|
||||
lima_create_fence_fd(struct pipe_context *pctx,
|
||||
struct pipe_fence_handle **fence,
|
||||
int fd, enum pipe_fd_type type)
|
||||
{
|
||||
assert(type == PIPE_FD_TYPE_NATIVE_SYNC);
|
||||
*fence = lima_fence_create(fcntl(fd, F_DUPFD_CLOEXEC, 3));
|
||||
}
|
||||
|
||||
static void
|
||||
lima_fence_server_sync(struct pipe_context *pctx,
|
||||
struct pipe_fence_handle *fence)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
|
||||
lima_submit_add_in_sync(ctx->gp_submit, fence->fd);
|
||||
}
|
||||
|
||||
void lima_fence_context_init(struct lima_context *ctx)
|
||||
{
|
||||
ctx->base.create_fence_fd = lima_create_fence_fd;
|
||||
ctx->base.fence_server_sync = lima_fence_server_sync;
|
||||
}
|
||||
|
||||
struct pipe_fence_handle *
|
||||
lima_fence_create(int fd)
|
||||
{
|
||||
struct pipe_fence_handle *fence;
|
||||
|
||||
fence = CALLOC_STRUCT(pipe_fence_handle);
|
||||
if (!fence)
|
||||
return NULL;
|
||||
|
||||
pipe_reference_init(&fence->reference, 1);
|
||||
fence->fd = fd;
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
static int
|
||||
lima_fence_get_fd(struct pipe_screen *pscreen,
|
||||
struct pipe_fence_handle *fence)
|
||||
{
|
||||
return fcntl(fence->fd, F_DUPFD_CLOEXEC, 3);
|
||||
}
|
||||
|
||||
static void
|
||||
lima_fence_destroy(struct pipe_fence_handle *fence)
|
||||
{
|
||||
if (fence->fd >= 0)
|
||||
close(fence->fd);
|
||||
FREE(fence);
|
||||
}
|
||||
|
||||
static void
|
||||
lima_fence_reference(struct pipe_screen *pscreen,
|
||||
struct pipe_fence_handle **ptr,
|
||||
struct pipe_fence_handle *fence)
|
||||
{
|
||||
if (pipe_reference(&(*ptr)->reference, &fence->reference))
|
||||
lima_fence_destroy(*ptr);
|
||||
*ptr = fence;
|
||||
}
|
||||
|
||||
static boolean
|
||||
lima_fence_finish(struct pipe_screen *pscreen, struct pipe_context *pctx,
|
||||
struct pipe_fence_handle *fence, uint64_t timeout)
|
||||
{
|
||||
return !sync_wait(fence->fd, timeout / 1000000);
|
||||
}
|
||||
|
||||
void
|
||||
lima_fence_screen_init(struct lima_screen *screen)
|
||||
{
|
||||
screen->base.fence_reference = lima_fence_reference;
|
||||
screen->base.fence_finish = lima_fence_finish;
|
||||
screen->base.fence_get_fd = lima_fence_get_fd;
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef H_LIMA_FENCE
|
||||
#define H_LIMA_FENCE
|
||||
|
||||
struct pipe_fence_handle;
|
||||
struct lima_context;
|
||||
struct lima_screen;
|
||||
|
||||
struct pipe_fence_handle *lima_fence_create(int fd);
|
||||
void lima_fence_screen_init(struct lima_screen *screen);
|
||||
void lima_fence_context_init(struct lima_context *ctx);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,317 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/u_memory.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_debug.h"
|
||||
|
||||
#include "tgsi/tgsi_dump.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "nir/tgsi_to_nir.h"
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
|
||||
#include "lima_screen.h"
|
||||
#include "lima_context.h"
|
||||
#include "lima_program.h"
|
||||
#include "lima_bo.h"
|
||||
#include "ir/lima_ir.h"
|
||||
|
||||
static const nir_shader_compiler_options vs_nir_options = {
|
||||
.lower_ffma = true,
|
||||
.lower_fpow = true,
|
||||
.lower_ffract = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_fsqrt = true,
|
||||
.lower_sub = true,
|
||||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
/* could be implemented by clamp */
|
||||
.lower_fsat = true,
|
||||
};
|
||||
|
||||
static const nir_shader_compiler_options fs_nir_options = {
|
||||
.lower_fpow = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_sub = true,
|
||||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
};
|
||||
|
||||
const void *
|
||||
lima_program_get_compiler_options(enum pipe_shader_type shader)
|
||||
{
|
||||
switch (shader) {
|
||||
case PIPE_SHADER_VERTEX:
|
||||
return &vs_nir_options;
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
return &fs_nir_options;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
type_size(const struct glsl_type *type)
|
||||
{
|
||||
return glsl_count_attribute_slots(type, false);
|
||||
}
|
||||
|
||||
static void
|
||||
lima_program_optimize_vs_nir(struct nir_shader *s)
|
||||
{
|
||||
bool progress;
|
||||
|
||||
NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, 0);
|
||||
NIR_PASS_V(s, nir_lower_regs_to_ssa);
|
||||
NIR_PASS_V(s, nir_lower_load_const_to_scalar);
|
||||
NIR_PASS_V(s, lima_nir_lower_uniform_to_scalar);
|
||||
NIR_PASS_V(s, nir_lower_io_to_scalar,
|
||||
nir_var_shader_in|nir_var_shader_out);
|
||||
|
||||
do {
|
||||
progress = false;
|
||||
|
||||
NIR_PASS_V(s, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, s, nir_lower_alu_to_scalar);
|
||||
NIR_PASS(progress, s, nir_lower_phis_to_scalar);
|
||||
NIR_PASS(progress, s, nir_copy_prop);
|
||||
NIR_PASS(progress, s, nir_opt_remove_phis);
|
||||
NIR_PASS(progress, s, nir_opt_dce);
|
||||
NIR_PASS(progress, s, nir_opt_dead_cf);
|
||||
NIR_PASS(progress, s, nir_opt_cse);
|
||||
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
|
||||
NIR_PASS(progress, s, nir_opt_algebraic);
|
||||
NIR_PASS(progress, s, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, s, nir_opt_undef);
|
||||
NIR_PASS(progress, s, nir_opt_loop_unroll,
|
||||
nir_var_shader_in |
|
||||
nir_var_shader_out |
|
||||
nir_var_function_temp);
|
||||
} while (progress);
|
||||
|
||||
NIR_PASS_V(s, nir_lower_locals_to_regs);
|
||||
NIR_PASS_V(s, nir_convert_from_ssa, true);
|
||||
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp);
|
||||
nir_sweep(s);
|
||||
}
|
||||
|
||||
static void
|
||||
lima_program_optimize_fs_nir(struct nir_shader *s)
|
||||
{
|
||||
bool progress;
|
||||
|
||||
NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, 0);
|
||||
NIR_PASS_V(s, nir_lower_regs_to_ssa);
|
||||
|
||||
do {
|
||||
progress = false;
|
||||
|
||||
NIR_PASS_V(s, nir_lower_vars_to_ssa);
|
||||
//NIR_PASS(progress, s, nir_lower_alu_to_scalar);
|
||||
NIR_PASS(progress, s, nir_lower_phis_to_scalar);
|
||||
NIR_PASS(progress, s, nir_copy_prop);
|
||||
NIR_PASS(progress, s, nir_opt_remove_phis);
|
||||
NIR_PASS(progress, s, nir_opt_dce);
|
||||
NIR_PASS(progress, s, nir_opt_dead_cf);
|
||||
NIR_PASS(progress, s, nir_opt_cse);
|
||||
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
|
||||
NIR_PASS(progress, s, nir_opt_algebraic);
|
||||
NIR_PASS(progress, s, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, s, nir_opt_undef);
|
||||
NIR_PASS(progress, s, nir_opt_loop_unroll,
|
||||
nir_var_shader_in |
|
||||
nir_var_shader_out |
|
||||
nir_var_function_temp);
|
||||
} while (progress);
|
||||
|
||||
/* Lower modifiers */
|
||||
NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_all_source_mods);
|
||||
NIR_PASS_V(s, nir_copy_prop);
|
||||
NIR_PASS_V(s, nir_opt_dce);
|
||||
|
||||
NIR_PASS_V(s, nir_lower_locals_to_regs);
|
||||
NIR_PASS_V(s, nir_convert_from_ssa, true);
|
||||
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp);
|
||||
|
||||
NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
|
||||
NIR_PASS_V(s, nir_lower_vec_to_movs);
|
||||
|
||||
nir_sweep(s);
|
||||
}
|
||||
|
||||
static void *
|
||||
lima_create_fs_state(struct pipe_context *pctx,
|
||||
const struct pipe_shader_state *cso)
|
||||
{
|
||||
struct lima_screen *screen = lima_screen(pctx->screen);
|
||||
struct lima_fs_shader_state *so = rzalloc(NULL, struct lima_fs_shader_state);
|
||||
|
||||
if (!so)
|
||||
return NULL;
|
||||
|
||||
nir_shader *nir;
|
||||
if (cso->type == PIPE_SHADER_IR_NIR)
|
||||
nir = cso->ir.nir;
|
||||
else {
|
||||
assert(cso->type == PIPE_SHADER_IR_TGSI);
|
||||
|
||||
nir = tgsi_to_nir(cso->tokens, pctx->screen);
|
||||
}
|
||||
|
||||
lima_program_optimize_fs_nir(nir);
|
||||
|
||||
if (lima_debug & LIMA_DEBUG_PP)
|
||||
nir_print_shader(nir, stdout);
|
||||
|
||||
if (!ppir_compile_nir(so, nir, screen->pp_ra)) {
|
||||
ralloc_free(so);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return so;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_bind_fs_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
|
||||
ctx->fs = hwcso;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_FRAG;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_delete_fs_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
struct lima_fs_shader_state *so = hwcso;
|
||||
|
||||
if (so->bo)
|
||||
lima_bo_free(so->bo);
|
||||
|
||||
ralloc_free(so);
|
||||
}
|
||||
|
||||
bool
|
||||
lima_update_vs_state(struct lima_context *ctx)
|
||||
{
|
||||
struct lima_vs_shader_state *vs = ctx->vs;
|
||||
if (!vs->bo) {
|
||||
struct lima_screen *screen = lima_screen(ctx->base.screen);
|
||||
vs->bo = lima_bo_create(screen, vs->shader_size, 0);
|
||||
if (!vs->bo) {
|
||||
fprintf(stderr, "lima: create vs shader bo fail\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(lima_bo_map(vs->bo), vs->shader, vs->shader_size);
|
||||
ralloc_free(vs->shader);
|
||||
vs->shader = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
lima_update_fs_state(struct lima_context *ctx)
|
||||
{
|
||||
struct lima_fs_shader_state *fs = ctx->fs;
|
||||
if (!fs->bo) {
|
||||
struct lima_screen *screen = lima_screen(ctx->base.screen);
|
||||
fs->bo = lima_bo_create(screen, fs->shader_size, 0);
|
||||
if (!fs->bo) {
|
||||
fprintf(stderr, "lima: create fs shader bo fail\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(lima_bo_map(fs->bo), fs->shader, fs->shader_size);
|
||||
ralloc_free(fs->shader);
|
||||
fs->shader = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void *
|
||||
lima_create_vs_state(struct pipe_context *pctx,
|
||||
const struct pipe_shader_state *cso)
|
||||
{
|
||||
struct lima_vs_shader_state *so = rzalloc(NULL, struct lima_vs_shader_state);
|
||||
|
||||
if (!so)
|
||||
return NULL;
|
||||
|
||||
nir_shader *nir;
|
||||
if (cso->type == PIPE_SHADER_IR_NIR)
|
||||
nir = cso->ir.nir;
|
||||
else {
|
||||
assert(cso->type == PIPE_SHADER_IR_TGSI);
|
||||
|
||||
nir = tgsi_to_nir(cso->tokens, pctx->screen);
|
||||
}
|
||||
|
||||
lima_program_optimize_vs_nir(nir);
|
||||
|
||||
if (lima_debug & LIMA_DEBUG_GP)
|
||||
nir_print_shader(nir, stdout);
|
||||
|
||||
if (!gpir_compile_nir(so, nir)) {
|
||||
ralloc_free(so);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return so;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_bind_vs_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
|
||||
ctx->vs = hwcso;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_VERT;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_delete_vs_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
struct lima_vs_shader_state *so = hwcso;
|
||||
|
||||
if (so->bo)
|
||||
lima_bo_free(so->bo);
|
||||
|
||||
ralloc_free(so);
|
||||
}
|
||||
|
||||
void
|
||||
lima_program_init(struct lima_context *ctx)
|
||||
{
|
||||
ctx->base.create_fs_state = lima_create_fs_state;
|
||||
ctx->base.bind_fs_state = lima_bind_fs_state;
|
||||
ctx->base.delete_fs_state = lima_delete_fs_state;
|
||||
|
||||
ctx->base.create_vs_state = lima_create_vs_state;
|
||||
ctx->base.bind_vs_state = lima_bind_vs_state;
|
||||
ctx->base.delete_vs_state = lima_delete_vs_state;
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef H_LIMA_PROGRAM
|
||||
#define H_LIMA_PROGRAM
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
|
||||
const void *lima_program_get_compiler_options(enum pipe_shader_type shader);
|
||||
|
||||
bool lima_update_vs_state(struct lima_context *ctx);
|
||||
bool lima_update_fs_state(struct lima_context *ctx);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* Stub support for occlusion queries.
|
||||
*
|
||||
* Since we expose support for GL 2.0, we have to expose occlusion queries,
|
||||
* but the spec allows you to expose 0 query counter bits, so we just return 0
|
||||
* as the result of all our queries.
|
||||
*/
|
||||
|
||||
#include "util/u_debug.h"
|
||||
|
||||
#include "lima_context.h"
|
||||
|
||||
struct lima_query
|
||||
{
|
||||
uint8_t pad;
|
||||
};
|
||||
|
||||
static struct pipe_query *
|
||||
lima_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
|
||||
{
|
||||
struct lima_query *query = calloc(1, sizeof(*query));
|
||||
|
||||
/* Note that struct pipe_query isn't actually defined anywhere. */
|
||||
return (struct pipe_query *)query;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
|
||||
{
|
||||
free(query);
|
||||
}
|
||||
|
||||
static boolean
|
||||
lima_begin_query(struct pipe_context *ctx, struct pipe_query *query)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lima_end_query(struct pipe_context *ctx, struct pipe_query *query)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static boolean
|
||||
lima_get_query_result(struct pipe_context *ctx, struct pipe_query *query,
|
||||
boolean wait, union pipe_query_result *vresult)
|
||||
{
|
||||
uint64_t *result = &vresult->u64;
|
||||
|
||||
*result = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_set_active_query_state(struct pipe_context *pipe, boolean enable)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
lima_query_init(struct lima_context *pctx)
|
||||
{
|
||||
pctx->base.create_query = lima_create_query;
|
||||
pctx->base.destroy_query = lima_destroy_query;
|
||||
pctx->base.begin_query = lima_begin_query;
|
||||
pctx->base.end_query = lima_end_query;
|
||||
pctx->base.get_query_result = lima_get_query_result;
|
||||
pctx->base.set_active_query_state = lima_set_active_query_state;
|
||||
}
|
||||
|
|
@ -0,0 +1,589 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_format.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_transfer.h"
|
||||
#include "util/u_surface.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/u_drm.h"
|
||||
#include "renderonly/renderonly.h"
|
||||
|
||||
#include "state_tracker/drm_driver.h"
|
||||
|
||||
#include "drm-uapi/drm_fourcc.h"
|
||||
#include "drm-uapi/lima_drm.h"
|
||||
|
||||
#include "lima_screen.h"
|
||||
#include "lima_context.h"
|
||||
#include "lima_resource.h"
|
||||
#include "lima_bo.h"
|
||||
#include "lima_util.h"
|
||||
#include "lima_tiling.h"
|
||||
|
||||
static struct pipe_resource *
|
||||
lima_resource_create_scanout(struct pipe_screen *pscreen,
|
||||
const struct pipe_resource *templat,
|
||||
unsigned width, unsigned height)
|
||||
{
|
||||
struct lima_screen *screen = lima_screen(pscreen);
|
||||
struct renderonly_scanout *scanout;
|
||||
struct winsys_handle handle;
|
||||
struct pipe_resource *pres;
|
||||
|
||||
struct pipe_resource scanout_templat = *templat;
|
||||
scanout_templat.width0 = width;
|
||||
scanout_templat.height0 = height;
|
||||
scanout_templat.screen = pscreen;
|
||||
|
||||
scanout = renderonly_scanout_for_resource(&scanout_templat,
|
||||
screen->ro, &handle);
|
||||
if (!scanout)
|
||||
return NULL;
|
||||
|
||||
assert(handle.type == WINSYS_HANDLE_TYPE_FD);
|
||||
pres = pscreen->resource_from_handle(pscreen, templat, &handle,
|
||||
PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE);
|
||||
|
||||
close(handle.handle);
|
||||
if (!pres) {
|
||||
renderonly_scanout_destroy(scanout, screen->ro);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct lima_resource *res = lima_resource(pres);
|
||||
res->scanout = scanout;
|
||||
|
||||
return pres;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
setup_miptree(struct lima_resource *res,
|
||||
unsigned width0, unsigned height0,
|
||||
bool should_align_dimensions)
|
||||
{
|
||||
struct pipe_resource *pres = &res->base;
|
||||
unsigned level;
|
||||
unsigned width = width0;
|
||||
unsigned height = height0;
|
||||
unsigned depth = pres->depth0;
|
||||
uint32_t size = 0;
|
||||
|
||||
for (level = 0; level <= pres->last_level; level++) {
|
||||
uint32_t actual_level_size;
|
||||
uint32_t stride;
|
||||
unsigned aligned_width;
|
||||
unsigned aligned_height;
|
||||
|
||||
if (should_align_dimensions) {
|
||||
aligned_width = align(width, 16);
|
||||
aligned_height = align(height, 16);
|
||||
} else {
|
||||
aligned_width = width;
|
||||
aligned_height = height;
|
||||
}
|
||||
|
||||
stride = util_format_get_stride(pres->format, aligned_width);
|
||||
actual_level_size = stride *
|
||||
util_format_get_nblocksy(pres->format, aligned_height) *
|
||||
pres->array_size * depth;
|
||||
|
||||
res->levels[level].width = aligned_width;
|
||||
res->levels[level].stride = stride;
|
||||
res->levels[level].offset = size;
|
||||
|
||||
/* The start address of each level <= 10 must be 64-aligned
|
||||
* in order to be able to pass the addresses
|
||||
* to the hardware.
|
||||
* The start addresses of level 11 and level 12 are passed
|
||||
* implicitely: they start at an offset of respectively
|
||||
* 0x0400 and 0x0800 from the start address of level 10 */
|
||||
if (level < 10)
|
||||
size += align(actual_level_size, 64);
|
||||
else if (level != pres->last_level)
|
||||
size += 0x0400;
|
||||
else
|
||||
size += actual_level_size; /* Save some memory */
|
||||
|
||||
width = u_minify(width, 1);
|
||||
height = u_minify(height, 1);
|
||||
depth = u_minify(depth, 1);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static struct pipe_resource *
|
||||
lima_resource_create_bo(struct pipe_screen *pscreen,
|
||||
const struct pipe_resource *templat,
|
||||
unsigned width, unsigned height,
|
||||
bool should_align_dimensions)
|
||||
{
|
||||
struct lima_screen *screen = lima_screen(pscreen);
|
||||
struct lima_resource *res;
|
||||
struct pipe_resource *pres;
|
||||
|
||||
res = CALLOC_STRUCT(lima_resource);
|
||||
if (!res)
|
||||
return NULL;
|
||||
|
||||
res->base = *templat;
|
||||
res->base.screen = pscreen;
|
||||
pipe_reference_init(&res->base.reference, 1);
|
||||
|
||||
pres = &res->base;
|
||||
|
||||
uint32_t size = setup_miptree(res, width, height, should_align_dimensions);
|
||||
size = align(size, LIMA_PAGE_SIZE);
|
||||
|
||||
res->bo = lima_bo_create(screen, size, 0);
|
||||
if (!res->bo) {
|
||||
FREE(res);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return pres;
|
||||
}
|
||||
|
||||
static struct pipe_resource *
|
||||
_lima_resource_create_with_modifiers(struct pipe_screen *pscreen,
|
||||
const struct pipe_resource *templat,
|
||||
const uint64_t *modifiers,
|
||||
int count)
|
||||
{
|
||||
struct lima_screen *screen = lima_screen(pscreen);
|
||||
bool should_tile = false;
|
||||
unsigned width, height;
|
||||
bool should_align_dimensions;
|
||||
|
||||
/* VBOs/PBOs are untiled (and 1 height). */
|
||||
if (templat->target == PIPE_BUFFER)
|
||||
should_tile = false;
|
||||
|
||||
if (templat->bind & (PIPE_BIND_LINEAR | PIPE_BIND_SCANOUT))
|
||||
should_tile = false;
|
||||
|
||||
/* if linear buffer is not allowed, alloc fail */
|
||||
if (!should_tile && !drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count))
|
||||
return NULL;
|
||||
|
||||
if (should_tile || (templat->bind & PIPE_BIND_RENDER_TARGET)) {
|
||||
should_align_dimensions = true;
|
||||
width = align(templat->width0, 16);
|
||||
height = align(templat->height0, 16);
|
||||
}
|
||||
else {
|
||||
should_align_dimensions = false;
|
||||
width = templat->width0;
|
||||
height = templat->height0;
|
||||
}
|
||||
|
||||
struct pipe_resource *pres;
|
||||
if (screen->ro && (templat->bind & PIPE_BIND_SCANOUT))
|
||||
pres = lima_resource_create_scanout(pscreen, templat, width, height);
|
||||
else
|
||||
pres = lima_resource_create_bo(pscreen, templat, width, height,
|
||||
should_align_dimensions);
|
||||
|
||||
if (pres) {
|
||||
struct lima_resource *res = lima_resource(pres);
|
||||
res->tiled = should_tile;
|
||||
|
||||
debug_printf("%s: pres=%p width=%u height=%u depth=%u target=%d "
|
||||
"bind=%x usage=%d tile=%d last_level=%d\n", __func__,
|
||||
pres, pres->width0, pres->height0, pres->depth0,
|
||||
pres->target, pres->bind, pres->usage, should_tile, templat->last_level);
|
||||
}
|
||||
return pres;
|
||||
}
|
||||
|
||||
static struct pipe_resource *
|
||||
lima_resource_create(struct pipe_screen *pscreen,
|
||||
const struct pipe_resource *templat)
|
||||
{
|
||||
static const uint64_t modifiers[] = {
|
||||
DRM_FORMAT_MOD_LINEAR,
|
||||
};
|
||||
return _lima_resource_create_with_modifiers(pscreen, templat, modifiers, ARRAY_SIZE(modifiers));
|
||||
}
|
||||
|
||||
static struct pipe_resource *
|
||||
lima_resource_create_with_modifiers(struct pipe_screen *pscreen,
|
||||
const struct pipe_resource *templat,
|
||||
const uint64_t *modifiers,
|
||||
int count)
|
||||
{
|
||||
struct pipe_resource tmpl = *templat;
|
||||
|
||||
/* gbm_bo_create_with_modifiers & gbm_surface_create_with_modifiers
|
||||
* don't have usage parameter, but buffer created by these functions
|
||||
* may be used for scanout. So we assume buffer created by this
|
||||
* function always enable scanout if linear modifier is permitted.
|
||||
*/
|
||||
if (drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count))
|
||||
tmpl.bind |= PIPE_BIND_SCANOUT;
|
||||
|
||||
return _lima_resource_create_with_modifiers(pscreen, &tmpl, modifiers, count);
|
||||
}
|
||||
|
||||
static void
|
||||
lima_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *pres)
|
||||
{
|
||||
struct lima_screen *screen = lima_screen(pscreen);
|
||||
struct lima_resource *res = lima_resource(pres);
|
||||
|
||||
if (res->bo)
|
||||
lima_bo_free(res->bo);
|
||||
|
||||
if (res->scanout)
|
||||
renderonly_scanout_destroy(res->scanout, screen->ro);
|
||||
|
||||
FREE(res);
|
||||
}
|
||||
|
||||
static struct pipe_resource *
|
||||
lima_resource_from_handle(struct pipe_screen *pscreen,
|
||||
const struct pipe_resource *templat,
|
||||
struct winsys_handle *handle, unsigned usage)
|
||||
{
|
||||
struct lima_resource *res;
|
||||
struct lima_screen *screen = lima_screen(pscreen);
|
||||
|
||||
res = CALLOC_STRUCT(lima_resource);
|
||||
if (!res)
|
||||
return NULL;
|
||||
|
||||
struct pipe_resource *pres = &res->base;
|
||||
*pres = *templat;
|
||||
pres->screen = pscreen;
|
||||
pipe_reference_init(&pres->reference, 1);
|
||||
res->levels[0].offset = 0;
|
||||
res->levels[0].stride = handle->stride;
|
||||
|
||||
res->bo = lima_bo_import(screen, handle);
|
||||
if (!res->bo) {
|
||||
FREE(res);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* check alignment for the buffer */
|
||||
if (pres->bind & PIPE_BIND_RENDER_TARGET) {
|
||||
unsigned width, height, stride, size;
|
||||
|
||||
width = align(pres->width0, 16);
|
||||
height = align(pres->height0, 16);
|
||||
stride = util_format_get_stride(pres->format, width);
|
||||
size = util_format_get_2d_size(pres->format, stride, height);
|
||||
|
||||
if (res->levels[0].stride != stride || res->bo->size < size) {
|
||||
debug_error("import buffer not properly aligned\n");
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
res->levels[0].width = width;
|
||||
}
|
||||
else
|
||||
res->levels[0].width = pres->width0;
|
||||
|
||||
handle->modifier = DRM_FORMAT_MOD_LINEAR;
|
||||
res->tiled = false;
|
||||
|
||||
return pres;
|
||||
|
||||
err_out:
|
||||
lima_resource_destroy(pscreen, pres);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static boolean
|
||||
lima_resource_get_handle(struct pipe_screen *pscreen,
|
||||
struct pipe_context *pctx,
|
||||
struct pipe_resource *pres,
|
||||
struct winsys_handle *handle, unsigned usage)
|
||||
{
|
||||
struct lima_screen *screen = lima_screen(pscreen);
|
||||
struct lima_resource *res = lima_resource(pres);
|
||||
|
||||
handle->modifier = DRM_FORMAT_MOD_LINEAR;
|
||||
|
||||
if (handle->type == WINSYS_HANDLE_TYPE_KMS && screen->ro &&
|
||||
renderonly_get_handle(res->scanout, handle))
|
||||
return TRUE;
|
||||
|
||||
if (!lima_bo_export(res->bo, handle))
|
||||
return FALSE;
|
||||
|
||||
handle->stride = res->levels[0].stride;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void
|
||||
lima_resource_screen_init(struct lima_screen *screen)
|
||||
{
|
||||
screen->base.resource_create = lima_resource_create;
|
||||
screen->base.resource_create_with_modifiers = lima_resource_create_with_modifiers;
|
||||
screen->base.resource_from_handle = lima_resource_from_handle;
|
||||
screen->base.resource_destroy = lima_resource_destroy;
|
||||
screen->base.resource_get_handle = lima_resource_get_handle;
|
||||
}
|
||||
|
||||
static struct pipe_surface *
|
||||
lima_surface_create(struct pipe_context *pctx,
|
||||
struct pipe_resource *pres,
|
||||
const struct pipe_surface *surf_tmpl)
|
||||
{
|
||||
struct lima_surface *surf = CALLOC_STRUCT(lima_surface);
|
||||
|
||||
if (!surf)
|
||||
return NULL;
|
||||
|
||||
assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
|
||||
|
||||
struct pipe_surface *psurf = &surf->base;
|
||||
unsigned level = surf_tmpl->u.tex.level;
|
||||
|
||||
pipe_reference_init(&psurf->reference, 1);
|
||||
pipe_resource_reference(&psurf->texture, pres);
|
||||
|
||||
psurf->context = pctx;
|
||||
psurf->format = surf_tmpl->format;
|
||||
psurf->width = u_minify(pres->width0, level);
|
||||
psurf->height = u_minify(pres->height0, level);
|
||||
psurf->u.tex.level = level;
|
||||
psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
|
||||
psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
|
||||
|
||||
surf->tiled_w = align(psurf->width, 16) >> 4;
|
||||
surf->tiled_h = align(psurf->height, 16) >> 4;
|
||||
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
if (ctx->plb_pp_stream) {
|
||||
struct lima_ctx_plb_pp_stream_key key = {
|
||||
.tiled_w = surf->tiled_w,
|
||||
.tiled_h = surf->tiled_h,
|
||||
};
|
||||
|
||||
for (int i = 0; i < lima_ctx_num_plb; i++) {
|
||||
key.plb_index = i;
|
||||
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search(ctx->plb_pp_stream, &key);
|
||||
if (entry) {
|
||||
struct lima_ctx_plb_pp_stream *s = entry->data;
|
||||
s->refcnt++;
|
||||
}
|
||||
else {
|
||||
struct lima_ctx_plb_pp_stream *s =
|
||||
ralloc(ctx->plb_pp_stream, struct lima_ctx_plb_pp_stream);
|
||||
s->key.plb_index = i;
|
||||
s->key.tiled_w = surf->tiled_w;
|
||||
s->key.tiled_h = surf->tiled_h;
|
||||
s->refcnt = 1;
|
||||
s->bo = NULL;
|
||||
_mesa_hash_table_insert(ctx->plb_pp_stream, &s->key, s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &surf->base;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
|
||||
{
|
||||
struct lima_surface *surf = lima_surface(psurf);
|
||||
/* psurf->context may be not equal with pctx (i.e. glxinfo) */
|
||||
struct lima_context *ctx = lima_context(psurf->context);
|
||||
|
||||
if (ctx->plb_pp_stream) {
|
||||
struct lima_ctx_plb_pp_stream_key key = {
|
||||
.tiled_w = surf->tiled_w,
|
||||
.tiled_h = surf->tiled_h,
|
||||
};
|
||||
|
||||
for (int i = 0; i < lima_ctx_num_plb; i++) {
|
||||
key.plb_index = i;
|
||||
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search(ctx->plb_pp_stream, &key);
|
||||
struct lima_ctx_plb_pp_stream *s = entry->data;
|
||||
if (--s->refcnt == 0) {
|
||||
if (s->bo)
|
||||
lima_bo_free(s->bo);
|
||||
_mesa_hash_table_remove(ctx->plb_pp_stream, entry);
|
||||
ralloc_free(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pipe_resource_reference(&psurf->texture, NULL);
|
||||
FREE(surf);
|
||||
}
|
||||
|
||||
static void *
|
||||
lima_transfer_map(struct pipe_context *pctx,
|
||||
struct pipe_resource *pres,
|
||||
unsigned level,
|
||||
unsigned usage,
|
||||
const struct pipe_box *box,
|
||||
struct pipe_transfer **pptrans)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
struct lima_resource *res = lima_resource(pres);
|
||||
struct lima_bo *bo = res->bo;
|
||||
struct lima_transfer *trans;
|
||||
struct pipe_transfer *ptrans;
|
||||
|
||||
/* No direct mappings of tiled, since we need to manually
|
||||
* tile/untile.
|
||||
*/
|
||||
if (res->tiled && (usage & PIPE_TRANSFER_MAP_DIRECTLY))
|
||||
return NULL;
|
||||
|
||||
/* use once buffers are made sure to not read/write overlapped
|
||||
* range, so no need to sync */
|
||||
if (pres->usage != PIPE_USAGE_STREAM) {
|
||||
if (usage & PIPE_TRANSFER_READ_WRITE) {
|
||||
if (lima_need_flush(ctx, bo, usage & PIPE_TRANSFER_WRITE))
|
||||
lima_flush(ctx);
|
||||
|
||||
unsigned op = usage & PIPE_TRANSFER_WRITE ?
|
||||
LIMA_GEM_WAIT_WRITE : LIMA_GEM_WAIT_READ;
|
||||
lima_bo_wait(bo, op, PIPE_TIMEOUT_INFINITE);
|
||||
}
|
||||
}
|
||||
|
||||
if (!lima_bo_map(bo))
|
||||
return NULL;
|
||||
|
||||
trans = slab_alloc(&ctx->transfer_pool);
|
||||
if (!trans)
|
||||
return NULL;
|
||||
|
||||
memset(trans, 0, sizeof(*trans));
|
||||
ptrans = &trans->base;
|
||||
|
||||
pipe_resource_reference(&ptrans->resource, pres);
|
||||
ptrans->level = level;
|
||||
ptrans->usage = usage;
|
||||
ptrans->box = *box;
|
||||
|
||||
*pptrans = ptrans;
|
||||
|
||||
if (res->tiled) {
|
||||
ptrans->stride = util_format_get_stride(pres->format, ptrans->box.width);
|
||||
ptrans->layer_stride = ptrans->stride * ptrans->box.height;
|
||||
|
||||
trans->staging = malloc(ptrans->stride * ptrans->box.height * ptrans->box.depth);
|
||||
|
||||
if (usage & PIPE_TRANSFER_READ)
|
||||
lima_load_tiled_image(trans->staging, bo->map + res->levels[level].offset,
|
||||
&ptrans->box,
|
||||
ptrans->stride,
|
||||
res->levels[level].stride,
|
||||
util_format_get_blocksize(pres->format));
|
||||
|
||||
return trans->staging;
|
||||
} else {
|
||||
ptrans->stride = res->levels[level].stride;
|
||||
ptrans->layer_stride = ptrans->stride * box->height;
|
||||
|
||||
return bo->map + res->levels[level].offset +
|
||||
box->z * ptrans->layer_stride +
|
||||
box->y / util_format_get_blockheight(pres->format) * ptrans->stride +
|
||||
box->x / util_format_get_blockwidth(pres->format) *
|
||||
util_format_get_blocksize(pres->format);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lima_transfer_flush_region(struct pipe_context *pctx,
|
||||
struct pipe_transfer *ptrans,
|
||||
const struct pipe_box *box)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
lima_transfer_unmap(struct pipe_context *pctx,
|
||||
struct pipe_transfer *ptrans)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
struct lima_transfer *trans = lima_transfer(ptrans);
|
||||
struct lima_resource *res = lima_resource(ptrans->resource);
|
||||
struct lima_bo *bo = res->bo;
|
||||
struct pipe_resource *pres;
|
||||
|
||||
if (trans->staging) {
|
||||
pres = &res->base;
|
||||
if (ptrans->usage & PIPE_TRANSFER_WRITE)
|
||||
lima_store_tiled_image(bo->map + res->levels[ptrans->level].offset, trans->staging,
|
||||
&ptrans->box,
|
||||
res->levels[ptrans->level].stride,
|
||||
ptrans->stride,
|
||||
util_format_get_blocksize(pres->format));
|
||||
free(trans->staging);
|
||||
}
|
||||
|
||||
pipe_resource_reference(&ptrans->resource, NULL);
|
||||
slab_free(&ctx->transfer_pool, trans);
|
||||
}
|
||||
|
||||
static void
|
||||
lima_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
|
||||
{
|
||||
debug_error("lima_blit not implemented\n");
|
||||
}
|
||||
|
||||
static void
|
||||
lima_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
lima_resource_context_init(struct lima_context *ctx)
|
||||
{
|
||||
ctx->base.create_surface = lima_surface_create;
|
||||
ctx->base.surface_destroy = lima_surface_destroy;
|
||||
|
||||
/* TODO: optimize these functions to read/write data directly
|
||||
* from/to target instead of creating a staging memory for tiled
|
||||
* buffer indirectly
|
||||
*/
|
||||
ctx->base.buffer_subdata = u_default_buffer_subdata;
|
||||
ctx->base.texture_subdata = u_default_texture_subdata;
|
||||
ctx->base.resource_copy_region = util_resource_copy_region;
|
||||
|
||||
ctx->base.blit = lima_blit;
|
||||
|
||||
ctx->base.transfer_map = lima_transfer_map;
|
||||
ctx->base.transfer_flush_region = lima_transfer_flush_region;
|
||||
ctx->base.transfer_unmap = lima_transfer_unmap;
|
||||
|
||||
ctx->base.flush_resource = lima_flush_resource;
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef H_LIMA_RESOURCE
|
||||
#define H_LIMA_RESOURCE
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
|
||||
/* max texture size is 4096x4096 */
|
||||
#define LIMA_MAX_MIP_LEVELS 13
|
||||
|
||||
struct lima_screen;
|
||||
|
||||
struct lima_resource_level {
|
||||
uint32_t width;
|
||||
uint32_t stride;
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
struct lima_resource {
|
||||
struct pipe_resource base;
|
||||
|
||||
struct renderonly_scanout *scanout;
|
||||
struct lima_bo *bo;
|
||||
bool tiled;
|
||||
|
||||
struct lima_resource_level levels[LIMA_MAX_MIP_LEVELS];
|
||||
};
|
||||
|
||||
struct lima_surface {
|
||||
struct pipe_surface base;
|
||||
int tiled_w, tiled_h;
|
||||
bool reload;
|
||||
};
|
||||
|
||||
struct lima_transfer {
|
||||
struct pipe_transfer base;
|
||||
void *staging;
|
||||
};
|
||||
|
||||
static inline struct lima_resource *
|
||||
lima_resource(struct pipe_resource *res)
|
||||
{
|
||||
return (struct lima_resource *)res;
|
||||
}
|
||||
|
||||
static inline struct lima_surface *
|
||||
lima_surface(struct pipe_surface *surf)
|
||||
{
|
||||
return (struct lima_surface *)surf;
|
||||
}
|
||||
|
||||
static inline struct lima_transfer *
|
||||
lima_transfer(struct pipe_transfer *trans)
|
||||
{
|
||||
return (struct lima_transfer *)trans;
|
||||
}
|
||||
|
||||
void
|
||||
lima_resource_screen_init(struct lima_screen *screen);
|
||||
|
||||
void
|
||||
lima_resource_context_init(struct lima_context *ctx);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,546 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_screen.h"
|
||||
#include "renderonly/renderonly.h"
|
||||
|
||||
#include "drm-uapi/drm_fourcc.h"
|
||||
#include "drm-uapi/lima_drm.h"
|
||||
|
||||
#include "lima_screen.h"
|
||||
#include "lima_context.h"
|
||||
#include "lima_resource.h"
|
||||
#include "lima_program.h"
|
||||
#include "lima_bo.h"
|
||||
#include "lima_fence.h"
|
||||
#include "ir/lima_ir.h"
|
||||
|
||||
#include "xf86drm.h"
|
||||
|
||||
static void
|
||||
lima_screen_destroy(struct pipe_screen *pscreen)
|
||||
{
|
||||
struct lima_screen *screen = lima_screen(pscreen);
|
||||
|
||||
if (lima_dump_command_stream) {
|
||||
fclose(lima_dump_command_stream);
|
||||
lima_dump_command_stream = NULL;
|
||||
}
|
||||
|
||||
slab_destroy_parent(&screen->transfer_pool);
|
||||
|
||||
if (screen->ro)
|
||||
free(screen->ro);
|
||||
|
||||
if (screen->gp_buffer)
|
||||
lima_bo_free(screen->gp_buffer);
|
||||
|
||||
if (screen->pp_buffer)
|
||||
lima_bo_free(screen->pp_buffer);
|
||||
|
||||
lima_bo_table_fini(screen);
|
||||
ralloc_free(screen);
|
||||
}
|
||||
|
||||
static const char *
|
||||
lima_screen_get_name(struct pipe_screen *pscreen)
|
||||
{
|
||||
struct lima_screen *screen = lima_screen(pscreen);
|
||||
|
||||
switch (screen->gpu_type) {
|
||||
case DRM_LIMA_PARAM_GPU_ID_MALI400:
|
||||
return "Mali400";
|
||||
case DRM_LIMA_PARAM_GPU_ID_MALI450:
|
||||
return "Mali450";
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const char *
|
||||
lima_screen_get_vendor(struct pipe_screen *pscreen)
|
||||
{
|
||||
return "lima";
|
||||
}
|
||||
|
||||
static const char *
|
||||
lima_screen_get_device_vendor(struct pipe_screen *pscreen)
|
||||
{
|
||||
return "ARM";
|
||||
}
|
||||
|
||||
static int
|
||||
lima_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
{
|
||||
switch (param) {
|
||||
case PIPE_CAP_NPOT_TEXTURES:
|
||||
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
|
||||
case PIPE_CAP_ACCELERATED:
|
||||
case PIPE_CAP_UMA:
|
||||
case PIPE_CAP_NATIVE_FENCE_FD:
|
||||
return 1;
|
||||
|
||||
/* Unimplemented, but for exporting OpenGL 2.0 */
|
||||
case PIPE_CAP_OCCLUSION_QUERY:
|
||||
case PIPE_CAP_POINT_SPRITE:
|
||||
return 1;
|
||||
|
||||
/* not clear supported */
|
||||
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
|
||||
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
|
||||
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
|
||||
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
|
||||
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
|
||||
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
|
||||
return LIMA_MAX_MIP_LEVELS;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
return 0x13B5;
|
||||
|
||||
case PIPE_CAP_VIDEO_MEMORY:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_PCI_GROUP:
|
||||
case PIPE_CAP_PCI_BUS:
|
||||
case PIPE_CAP_PCI_DEVICE:
|
||||
case PIPE_CAP_PCI_FUNCTION:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return u_pipe_screen_get_param_defaults(pscreen, param);
|
||||
}
|
||||
}
|
||||
|
||||
static float
|
||||
lima_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
||||
{
|
||||
switch (param) {
|
||||
case PIPE_CAPF_MAX_LINE_WIDTH:
|
||||
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
|
||||
case PIPE_CAPF_MAX_POINT_WIDTH:
|
||||
case PIPE_CAPF_MAX_POINT_WIDTH_AA:
|
||||
return 255.0f;
|
||||
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
|
||||
return 16.0f;
|
||||
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
|
||||
return 16.0f;
|
||||
|
||||
default:
|
||||
return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
get_vertex_shader_param(struct lima_screen *screen,
|
||||
enum pipe_shader_cap param)
|
||||
{
|
||||
switch (param) {
|
||||
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
|
||||
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
|
||||
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
|
||||
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
|
||||
return 16384; /* need investigate */
|
||||
|
||||
case PIPE_SHADER_CAP_MAX_INPUTS:
|
||||
return 16; /* attributes */
|
||||
|
||||
case PIPE_SHADER_CAP_MAX_OUTPUTS:
|
||||
return LIMA_MAX_VARYING_NUM; /* varying */
|
||||
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
|
||||
return 4096; /* need investigate */
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
|
||||
return 1;
|
||||
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_NIR;
|
||||
|
||||
case PIPE_SHADER_CAP_MAX_TEMPS:
|
||||
return 256; /* need investigate */
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
get_fragment_shader_param(struct lima_screen *screen,
|
||||
enum pipe_shader_cap param)
|
||||
{
|
||||
switch (param) {
|
||||
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
|
||||
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
|
||||
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
|
||||
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
|
||||
return 16384; /* need investigate */
|
||||
|
||||
case PIPE_SHADER_CAP_MAX_INPUTS:
|
||||
return LIMA_MAX_VARYING_NUM - 1; /* varying, minus gl_Position */
|
||||
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
|
||||
return 4096; /* need investigate */
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
|
||||
return 1;
|
||||
|
||||
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
|
||||
return 16; /* need investigate */
|
||||
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_NIR;
|
||||
|
||||
case PIPE_SHADER_CAP_MAX_TEMPS:
|
||||
return 256; /* need investigate */
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
lima_screen_get_shader_param(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_type shader,
|
||||
enum pipe_shader_cap param)
|
||||
{
|
||||
struct lima_screen *screen = lima_screen(pscreen);
|
||||
|
||||
switch (shader) {
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
return get_fragment_shader_param(screen, param);
|
||||
case PIPE_SHADER_VERTEX:
|
||||
return get_vertex_shader_param(screen, param);
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static boolean
|
||||
lima_screen_is_format_supported(struct pipe_screen *pscreen,
|
||||
enum pipe_format format,
|
||||
enum pipe_texture_target target,
|
||||
unsigned sample_count,
|
||||
unsigned storage_sample_count,
|
||||
unsigned usage)
|
||||
{
|
||||
switch (target) {
|
||||
case PIPE_BUFFER:
|
||||
case PIPE_TEXTURE_1D:
|
||||
case PIPE_TEXTURE_2D:
|
||||
break;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
|
||||
return false;
|
||||
|
||||
/* be able to support 16, now limit to 4 */
|
||||
if (sample_count > 1 && sample_count != 4)
|
||||
return FALSE;
|
||||
|
||||
if (usage & PIPE_BIND_RENDER_TARGET) {
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8X8_UNORM:
|
||||
break;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (usage & PIPE_BIND_DEPTH_STENCIL) {
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_Z16_UNORM:
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
break;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (usage & PIPE_BIND_VERTEX_BUFFER) {
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_R32G32B32_FLOAT:
|
||||
break;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (usage & PIPE_BIND_INDEX_BUFFER) {
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_I8_UINT:
|
||||
case PIPE_FORMAT_I16_UINT:
|
||||
case PIPE_FORMAT_I32_UINT:
|
||||
break;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (usage & PIPE_BIND_SAMPLER_VIEW) {
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_R8G8B8X8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_A8B8G8R8_SRGB:
|
||||
case PIPE_FORMAT_B8G8R8A8_SRGB:
|
||||
break;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static const void *
|
||||
lima_screen_get_compiler_options(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_ir ir,
|
||||
enum pipe_shader_type shader)
|
||||
{
|
||||
return lima_program_get_compiler_options(shader);
|
||||
}
|
||||
|
||||
static bool
|
||||
lima_screen_query_info(struct lima_screen *screen)
|
||||
{
|
||||
struct drm_lima_get_param param;
|
||||
|
||||
memset(¶m, 0, sizeof(param));
|
||||
param.param = DRM_LIMA_PARAM_GPU_ID;
|
||||
if (drmIoctl(screen->fd, DRM_IOCTL_LIMA_GET_PARAM, ¶m))
|
||||
return false;
|
||||
|
||||
switch (param.value) {
|
||||
case DRM_LIMA_PARAM_GPU_ID_MALI400:
|
||||
case DRM_LIMA_PARAM_GPU_ID_MALI450:
|
||||
screen->gpu_type = param.value;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
memset(¶m, 0, sizeof(param));
|
||||
param.param = DRM_LIMA_PARAM_NUM_PP;
|
||||
if (drmIoctl(screen->fd, DRM_IOCTL_LIMA_GET_PARAM, ¶m))
|
||||
return false;
|
||||
|
||||
screen->num_pp = param.value;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen,
|
||||
enum pipe_format format, int max,
|
||||
uint64_t *modifiers,
|
||||
unsigned int *external_only,
|
||||
int *count)
|
||||
{
|
||||
uint64_t available_modifiers[] = {
|
||||
DRM_FORMAT_MOD_LINEAR,
|
||||
};
|
||||
|
||||
if (!modifiers) {
|
||||
*count = ARRAY_SIZE(available_modifiers);
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < *count; i++) {
|
||||
modifiers[i] = available_modifiers[i];
|
||||
if (external_only)
|
||||
external_only = false;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct debug_named_value debug_options[] = {
|
||||
{ "gp", LIMA_DEBUG_GP,
|
||||
"print GP shader compiler result of each stage" },
|
||||
{ "pp", LIMA_DEBUG_PP,
|
||||
"print PP shader compiler result of each stage" },
|
||||
{ "dump", LIMA_DEBUG_DUMP,
|
||||
"dump GPU command stream to $PWD/lima.dump" },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
DEBUG_GET_ONCE_FLAGS_OPTION(lima_debug, "LIMA_DEBUG", debug_options, 0)
|
||||
uint32_t lima_debug;
|
||||
|
||||
static void
|
||||
lima_screen_parse_env(void)
|
||||
{
|
||||
lima_debug = debug_get_option_lima_debug();
|
||||
|
||||
if (lima_debug & LIMA_DEBUG_DUMP) {
|
||||
const char *dump_command = "lima.dump";
|
||||
printf("lima: dump command stream to file %s\n", dump_command);
|
||||
lima_dump_command_stream = fopen(dump_command, "w");
|
||||
if (!lima_dump_command_stream)
|
||||
fprintf(stderr, "lima: fail to open command stream log file %s\n",
|
||||
dump_command);
|
||||
}
|
||||
|
||||
lima_ctx_num_plb = debug_get_num_option("LIMA_CTX_NUM_PLB", LIMA_CTX_PLB_DEF_NUM);
|
||||
if (lima_ctx_num_plb > LIMA_CTX_PLB_MAX_NUM ||
|
||||
lima_ctx_num_plb < LIMA_CTX_PLB_MIN_NUM) {
|
||||
fprintf(stderr, "lima: LIMA_CTX_NUM_PLB %d out of range [%d %d], "
|
||||
"reset to default %d\n", lima_ctx_num_plb, LIMA_CTX_PLB_MIN_NUM,
|
||||
LIMA_CTX_PLB_MAX_NUM, LIMA_CTX_PLB_DEF_NUM);
|
||||
lima_ctx_num_plb = LIMA_CTX_PLB_DEF_NUM;
|
||||
}
|
||||
|
||||
lima_ppir_force_spilling = debug_get_num_option("LIMA_PPIR_FORCE_SPILLING", 0);
|
||||
if (lima_ppir_force_spilling < 0) {
|
||||
fprintf(stderr, "lima: LIMA_PPIR_FORCE_SPILLING %d less than 0, "
|
||||
"reset to default 0\n", lima_ppir_force_spilling);
|
||||
lima_ppir_force_spilling = 0;
|
||||
}
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
lima_screen_create(int fd, struct renderonly *ro)
|
||||
{
|
||||
struct lima_screen *screen;
|
||||
|
||||
screen = rzalloc(NULL, struct lima_screen);
|
||||
if (!screen)
|
||||
return NULL;
|
||||
|
||||
screen->fd = fd;
|
||||
|
||||
if (!lima_screen_query_info(screen))
|
||||
goto err_out0;
|
||||
|
||||
if (!lima_bo_table_init(screen))
|
||||
goto err_out0;
|
||||
|
||||
screen->pp_ra = ppir_regalloc_init(screen);
|
||||
if (!screen->pp_ra)
|
||||
goto err_out1;
|
||||
|
||||
screen->gp_buffer = lima_bo_create(screen, gp_buffer_size, 0);
|
||||
if (!screen->gp_buffer)
|
||||
goto err_out1;
|
||||
|
||||
screen->pp_buffer = lima_bo_create(screen, pp_buffer_size, 0);
|
||||
if (!screen->pp_buffer)
|
||||
goto err_out2;
|
||||
|
||||
/* fs program for clear buffer?
|
||||
* const0 1 0 0 -1.67773, mov.v0 $0 ^const0.xxxx, stop
|
||||
*/
|
||||
static const uint32_t pp_clear_program[] = {
|
||||
0x00020425, 0x0000000c, 0x01e007cf, 0xb0000000,
|
||||
0x000005f5, 0x00000000, 0x00000000, 0x00000000,
|
||||
};
|
||||
memcpy(lima_bo_map(screen->pp_buffer) + pp_clear_program_offset,
|
||||
pp_clear_program, sizeof(pp_clear_program));
|
||||
|
||||
/* copy texture to framebuffer, used to reload gpu tile buffer
|
||||
* load.v $1 0.xy, texld_2d 0, mov.v0 $0 ^tex_sampler, sync, stop
|
||||
*/
|
||||
static const uint32_t pp_reload_program[] = {
|
||||
0x000005e6, 0xf1003c20, 0x00000000, 0x39001000,
|
||||
0x00000e4e, 0x000007cf, 0x00000000, 0x00000000,
|
||||
};
|
||||
memcpy(lima_bo_map(screen->pp_buffer) + pp_reload_program_offset,
|
||||
pp_reload_program, sizeof(pp_reload_program));
|
||||
|
||||
/* 0/1/2 vertex index for reload/clear draw */
|
||||
static const uint8_t pp_shared_index[] = { 0, 1, 2 };
|
||||
memcpy(lima_bo_map(screen->pp_buffer) + pp_shared_index_offset,
|
||||
pp_shared_index, sizeof(pp_shared_index));
|
||||
|
||||
/* 4096x4096 gl pos used for partial clear */
|
||||
static const float pp_clear_gl_pos[] = {
|
||||
4096, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
0, 4096, 1, 1,
|
||||
};
|
||||
memcpy(lima_bo_map(screen->pp_buffer) + pp_clear_gl_pos_offset,
|
||||
pp_clear_gl_pos, sizeof(pp_clear_gl_pos));
|
||||
|
||||
/* is pp frame render state static? */
|
||||
uint32_t *pp_frame_rsw = lima_bo_map(screen->pp_buffer) + pp_frame_rsw_offset;
|
||||
memset(pp_frame_rsw, 0, 0x40);
|
||||
pp_frame_rsw[8] = 0x0000f008;
|
||||
pp_frame_rsw[9] = screen->pp_buffer->va + pp_clear_program_offset;
|
||||
pp_frame_rsw[13] = 0x00000100;
|
||||
|
||||
if (ro) {
|
||||
screen->ro = renderonly_dup(ro);
|
||||
if (!screen->ro) {
|
||||
fprintf(stderr, "Failed to dup renderonly object\n");
|
||||
goto err_out3;
|
||||
}
|
||||
}
|
||||
|
||||
screen->base.destroy = lima_screen_destroy;
|
||||
screen->base.get_name = lima_screen_get_name;
|
||||
screen->base.get_vendor = lima_screen_get_vendor;
|
||||
screen->base.get_device_vendor = lima_screen_get_device_vendor;
|
||||
screen->base.get_param = lima_screen_get_param;
|
||||
screen->base.get_paramf = lima_screen_get_paramf;
|
||||
screen->base.get_shader_param = lima_screen_get_shader_param;
|
||||
screen->base.context_create = lima_context_create;
|
||||
screen->base.is_format_supported = lima_screen_is_format_supported;
|
||||
screen->base.get_compiler_options = lima_screen_get_compiler_options;
|
||||
screen->base.query_dmabuf_modifiers = lima_screen_query_dmabuf_modifiers;
|
||||
|
||||
lima_resource_screen_init(screen);
|
||||
lima_fence_screen_init(screen);
|
||||
|
||||
slab_create_parent(&screen->transfer_pool, sizeof(struct lima_transfer), 16);
|
||||
|
||||
screen->refcnt = 1;
|
||||
|
||||
lima_screen_parse_env();
|
||||
|
||||
return &screen->base;
|
||||
|
||||
err_out3:
|
||||
lima_bo_free(screen->pp_buffer);
|
||||
err_out2:
|
||||
lima_bo_free(screen->gp_buffer);
|
||||
err_out1:
|
||||
lima_bo_table_fini(screen);
|
||||
err_out0:
|
||||
ralloc_free(screen);
|
||||
return NULL;
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef H_LIMA_SCREEN
|
||||
#define H_LIMA_SCREEN
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "util/slab.h"
|
||||
#include "util/list.h"
|
||||
#include "os/os_thread.h"
|
||||
|
||||
#include "pipe/p_screen.h"
|
||||
|
||||
#define LIMA_DEBUG_GP (1 << 0)
|
||||
#define LIMA_DEBUG_PP (1 << 1)
|
||||
#define LIMA_DEBUG_DUMP (1 << 2)
|
||||
|
||||
extern uint32_t lima_debug;
|
||||
extern FILE *lima_dump_command_stream;
|
||||
extern int lima_ctx_num_plb;
|
||||
extern int lima_ppir_force_spilling;
|
||||
|
||||
struct ra_regs;
|
||||
|
||||
struct lima_screen {
|
||||
struct pipe_screen base;
|
||||
struct renderonly *ro;
|
||||
|
||||
int refcnt;
|
||||
void *winsys_priv;
|
||||
|
||||
int fd;
|
||||
int gpu_type;
|
||||
int num_pp;
|
||||
|
||||
/* bo table */
|
||||
mtx_t bo_table_lock;
|
||||
struct util_hash_table *bo_handles;
|
||||
struct util_hash_table *bo_flink_names;
|
||||
|
||||
struct slab_parent_pool transfer_pool;
|
||||
|
||||
struct ra_regs *pp_ra;
|
||||
|
||||
struct lima_bo *gp_buffer;
|
||||
#define gp_tile_heap_offset 0x000000
|
||||
#define gp_buffer_size 0x100000
|
||||
|
||||
struct lima_bo *pp_buffer;
|
||||
#define pp_frame_rsw_offset 0x0000
|
||||
#define pp_clear_program_offset 0x0040
|
||||
#define pp_reload_program_offset 0x0080
|
||||
#define pp_shared_index_offset 0x00c0
|
||||
#define pp_clear_gl_pos_offset 0x0100
|
||||
#define pp_stack_offset 0x1000
|
||||
#define pp_stack_pp_size 0x400 /* per pp, up to 8 pp */
|
||||
#define pp_stack_offset_end 0x3000
|
||||
#define pp_buffer_size 0x3000
|
||||
|
||||
};
|
||||
|
||||
static inline struct lima_screen *
|
||||
lima_screen(struct pipe_screen *pscreen)
|
||||
{
|
||||
return (struct lima_screen *)pscreen;
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
lima_screen_create(int fd, struct renderonly *ro);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,506 @@
|
|||
/*
|
||||
* Copyright (c) 2011-2013 Luc Verhaegen <libv@skynet.be>
|
||||
* Copyright (c) 2017-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_helpers.h"
|
||||
#include "util/u_debug.h"
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
|
||||
#include "lima_screen.h"
|
||||
#include "lima_context.h"
|
||||
#include "lima_resource.h"
|
||||
|
||||
static void
|
||||
lima_set_framebuffer_state(struct pipe_context *pctx,
|
||||
const struct pipe_framebuffer_state *framebuffer)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
|
||||
/* submit need framebuffer info, flush before change it */
|
||||
lima_flush(ctx);
|
||||
|
||||
struct lima_context_framebuffer *fb = &ctx->framebuffer;
|
||||
|
||||
fb->samples = framebuffer->samples;
|
||||
|
||||
pipe_surface_reference(&fb->cbuf, framebuffer->cbufs[0]);
|
||||
pipe_surface_reference(&fb->zsbuf, framebuffer->zsbuf);
|
||||
|
||||
/* need align here? */
|
||||
fb->width = framebuffer->width;
|
||||
fb->height = framebuffer->height;
|
||||
|
||||
int width = align(framebuffer->width, 16) >> 4;
|
||||
int height = align(framebuffer->height, 16) >> 4;
|
||||
if (fb->tiled_w != width || fb->tiled_h != height) {
|
||||
fb->tiled_w = width;
|
||||
fb->tiled_h = height;
|
||||
|
||||
fb->shift_h = 0;
|
||||
fb->shift_w = 0;
|
||||
|
||||
int limit = ctx->plb_max_blk;
|
||||
while ((width * height) > limit) {
|
||||
if (width >= height) {
|
||||
width = (width + 1) >> 1;
|
||||
fb->shift_w++;
|
||||
} else {
|
||||
height = (height + 1) >> 1;
|
||||
fb->shift_h++;
|
||||
}
|
||||
}
|
||||
|
||||
fb->block_w = width;
|
||||
fb->block_h = height;
|
||||
|
||||
fb->shift_min = MIN3(fb->shift_w, fb->shift_h, 2);
|
||||
|
||||
debug_printf("fb dim change tiled=%d/%d block=%d/%d shift=%d/%d/%d\n",
|
||||
fb->tiled_w, fb->tiled_h, fb->block_w, fb->block_h,
|
||||
fb->shift_w, fb->shift_h, fb->shift_min);
|
||||
}
|
||||
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_FRAMEBUFFER;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_set_polygon_stipple(struct pipe_context *pctx,
|
||||
const struct pipe_poly_stipple *stipple)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static void *
|
||||
lima_create_depth_stencil_alpha_state(struct pipe_context *pctx,
|
||||
const struct pipe_depth_stencil_alpha_state *cso)
|
||||
{
|
||||
struct lima_depth_stencil_alpha_state *so;
|
||||
|
||||
so = CALLOC_STRUCT(lima_depth_stencil_alpha_state);
|
||||
if (!so)
|
||||
return NULL;
|
||||
|
||||
so->base = *cso;
|
||||
|
||||
return so;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_bind_depth_stencil_alpha_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
|
||||
ctx->zsa = hwcso;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_ZSA;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_delete_depth_stencil_alpha_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
FREE(hwcso);
|
||||
}
|
||||
|
||||
static void *
|
||||
lima_create_rasterizer_state(struct pipe_context *pctx,
|
||||
const struct pipe_rasterizer_state *cso)
|
||||
{
|
||||
struct lima_rasterizer_state *so;
|
||||
|
||||
so = CALLOC_STRUCT(lima_rasterizer_state);
|
||||
if (!so)
|
||||
return NULL;
|
||||
|
||||
so->base = *cso;
|
||||
|
||||
return so;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_bind_rasterizer_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
|
||||
ctx->rasterizer = hwcso;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_RASTERIZER;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_delete_rasterizer_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
FREE(hwcso);
|
||||
}
|
||||
|
||||
static void *
|
||||
lima_create_blend_state(struct pipe_context *pctx,
|
||||
const struct pipe_blend_state *cso)
|
||||
{
|
||||
struct lima_blend_state *so;
|
||||
|
||||
so = CALLOC_STRUCT(lima_blend_state);
|
||||
if (!so)
|
||||
return NULL;
|
||||
|
||||
so->base = *cso;
|
||||
|
||||
return so;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_bind_blend_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
|
||||
ctx->blend = hwcso;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_BLEND;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_delete_blend_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
FREE(hwcso);
|
||||
}
|
||||
|
||||
static void *
|
||||
lima_create_vertex_elements_state(struct pipe_context *pctx, unsigned num_elements,
|
||||
const struct pipe_vertex_element *elements)
|
||||
{
|
||||
struct lima_vertex_element_state *so;
|
||||
|
||||
so = CALLOC_STRUCT(lima_vertex_element_state);
|
||||
if (!so)
|
||||
return NULL;
|
||||
|
||||
memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
|
||||
so->num_elements = num_elements;
|
||||
|
||||
return so;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_bind_vertex_elements_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
|
||||
ctx->vertex_elements = hwcso;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_VERTEX_ELEM;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
FREE(hwcso);
|
||||
}
|
||||
|
||||
static void
|
||||
lima_set_vertex_buffers(struct pipe_context *pctx,
|
||||
unsigned start_slot, unsigned count,
|
||||
const struct pipe_vertex_buffer *vb)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
struct lima_context_vertex_buffer *so = &ctx->vertex_buffers;
|
||||
|
||||
util_set_vertex_buffers_mask(so->vb + start_slot, &so->enabled_mask,
|
||||
vb, start_slot, count);
|
||||
so->count = util_last_bit(so->enabled_mask);
|
||||
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_VERTEX_BUFF;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_set_viewport_states(struct pipe_context *pctx,
|
||||
unsigned start_slot,
|
||||
unsigned num_viewports,
|
||||
const struct pipe_viewport_state *viewport)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
|
||||
/* reverse calculate the parameter of glViewport */
|
||||
ctx->viewport.x = viewport->translate[0] - viewport->scale[0];
|
||||
ctx->viewport.y = fabsf(viewport->translate[1] - fabsf(viewport->scale[1]));
|
||||
ctx->viewport.width = viewport->scale[0] * 2;
|
||||
ctx->viewport.height = fabsf(viewport->scale[1] * 2);
|
||||
|
||||
/* reverse calculate the parameter of glDepthRange */
|
||||
ctx->viewport.near = viewport->translate[2] - viewport->scale[2];
|
||||
ctx->viewport.far = viewport->translate[2] + viewport->scale[2];
|
||||
|
||||
ctx->viewport.transform = *viewport;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_VIEWPORT;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_set_scissor_states(struct pipe_context *pctx,
|
||||
unsigned start_slot,
|
||||
unsigned num_scissors,
|
||||
const struct pipe_scissor_state *scissor)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
|
||||
ctx->scissor = *scissor;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_SCISSOR;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_set_blend_color(struct pipe_context *pctx,
|
||||
const struct pipe_blend_color *blend_color)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
|
||||
ctx->blend_color = *blend_color;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_BLEND_COLOR;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_set_stencil_ref(struct pipe_context *pctx,
|
||||
const struct pipe_stencil_ref *stencil_ref)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
|
||||
ctx->stencil_ref = *stencil_ref;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_STENCIL_REF;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_set_constant_buffer(struct pipe_context *pctx,
|
||||
enum pipe_shader_type shader, uint index,
|
||||
const struct pipe_constant_buffer *cb)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
struct lima_context_constant_buffer *so = ctx->const_buffer + shader;
|
||||
|
||||
assert(index == 0);
|
||||
|
||||
if (unlikely(!cb)) {
|
||||
so->buffer = NULL;
|
||||
so->size = 0;
|
||||
} else {
|
||||
assert(!cb->buffer);
|
||||
|
||||
so->buffer = cb->user_buffer + cb->buffer_offset;
|
||||
so->size = cb->buffer_size;
|
||||
}
|
||||
|
||||
so->dirty = true;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_CONST_BUFF;
|
||||
|
||||
}
|
||||
|
||||
static void *
|
||||
lima_create_sampler_state(struct pipe_context *pctx,
|
||||
const struct pipe_sampler_state *cso)
|
||||
{
|
||||
struct lima_sampler_state *so = CALLOC_STRUCT(lima_sampler_state);
|
||||
if (!so)
|
||||
return NULL;
|
||||
|
||||
memcpy(so, cso, sizeof(*cso));
|
||||
|
||||
return so;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_sampler_state_delete(struct pipe_context *pctx, void *sstate)
|
||||
{
|
||||
free(sstate);
|
||||
}
|
||||
|
||||
static void
|
||||
lima_sampler_states_bind(struct pipe_context *pctx,
|
||||
enum pipe_shader_type shader, unsigned start,
|
||||
unsigned nr, void **hwcso)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
struct lima_texture_stateobj *lima_tex = &ctx->tex_stateobj;
|
||||
unsigned i;
|
||||
unsigned new_nr = 0;
|
||||
|
||||
assert(start == 0);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
if (hwcso[i])
|
||||
new_nr = i + 1;
|
||||
lima_tex->samplers[i] = hwcso[i];
|
||||
}
|
||||
|
||||
for (; i < lima_tex->num_samplers; i++) {
|
||||
lima_tex->samplers[i] = NULL;
|
||||
}
|
||||
|
||||
lima_tex->num_samplers = new_nr;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_TEXTURES;
|
||||
}
|
||||
|
||||
static struct pipe_sampler_view *
|
||||
lima_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
|
||||
const struct pipe_sampler_view *cso)
|
||||
{
|
||||
struct lima_sampler_view *so = CALLOC_STRUCT(lima_sampler_view);
|
||||
|
||||
if (!so)
|
||||
return NULL;
|
||||
|
||||
so->base = *cso;
|
||||
|
||||
pipe_reference(NULL, &prsc->reference);
|
||||
so->base.texture = prsc;
|
||||
so->base.reference.count = 1;
|
||||
so->base.context = pctx;
|
||||
|
||||
return &so->base;
|
||||
}
|
||||
|
||||
static void
|
||||
lima_sampler_view_destroy(struct pipe_context *pctx,
|
||||
struct pipe_sampler_view *pview)
|
||||
{
|
||||
struct lima_sampler_view *view = lima_sampler_view(pview);
|
||||
|
||||
pipe_resource_reference(&pview->texture, NULL);
|
||||
|
||||
free(view);
|
||||
}
|
||||
|
||||
static void
|
||||
lima_set_sampler_views(struct pipe_context *pctx,
|
||||
enum pipe_shader_type shader,
|
||||
unsigned start, unsigned nr,
|
||||
struct pipe_sampler_view **views)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
struct lima_texture_stateobj *lima_tex = &ctx->tex_stateobj;
|
||||
int i;
|
||||
unsigned new_nr = 0;
|
||||
|
||||
assert(start == 0);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
if (views[i])
|
||||
new_nr = i + 1;
|
||||
pipe_sampler_view_reference(&lima_tex->textures[i], views[i]);
|
||||
}
|
||||
|
||||
for (; i < lima_tex->num_textures; i++) {
|
||||
pipe_sampler_view_reference(&lima_tex->textures[i], NULL);
|
||||
}
|
||||
|
||||
lima_tex->num_textures = new_nr;
|
||||
ctx->dirty |= LIMA_CONTEXT_DIRTY_TEXTURES;
|
||||
}
|
||||
|
||||
static boolean
|
||||
lima_set_damage_region(struct pipe_context *pctx, unsigned num_rects, int *rects)
|
||||
{
|
||||
struct lima_context *ctx = lima_context(pctx);
|
||||
struct lima_damage_state *damage = &ctx->damage;
|
||||
int i;
|
||||
|
||||
if (damage->region)
|
||||
ralloc_free(damage->region);
|
||||
|
||||
if (!num_rects) {
|
||||
damage->region = NULL;
|
||||
damage->num_region = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
damage->region = ralloc_size(ctx, sizeof(*damage->region) * num_rects);
|
||||
if (!damage->region) {
|
||||
damage->num_region = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_rects; i++) {
|
||||
struct pipe_scissor_state *r = damage->region + i;
|
||||
/* region in tile unit */
|
||||
r->minx = rects[i * 4] >> 4;
|
||||
r->miny = rects[i * 4 + 1] >> 4;
|
||||
r->maxx = (rects[i * 4] + rects[i * 4 + 2] + 0xf) >> 4;
|
||||
r->maxy = (rects[i * 4 + 1] + rects[i * 4 + 3] + 0xf) >> 4;
|
||||
}
|
||||
|
||||
/* is region aligned to tiles? */
|
||||
damage->aligned = true;
|
||||
for (i = 0; i < num_rects * 4; i++) {
|
||||
if (rects[i] & 0xf) {
|
||||
damage->aligned = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
damage->num_region = num_rects;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
lima_state_init(struct lima_context *ctx)
|
||||
{
|
||||
ctx->base.set_framebuffer_state = lima_set_framebuffer_state;
|
||||
ctx->base.set_polygon_stipple = lima_set_polygon_stipple;
|
||||
ctx->base.set_viewport_states = lima_set_viewport_states;
|
||||
ctx->base.set_scissor_states = lima_set_scissor_states;
|
||||
ctx->base.set_blend_color = lima_set_blend_color;
|
||||
ctx->base.set_stencil_ref = lima_set_stencil_ref;
|
||||
|
||||
ctx->base.set_vertex_buffers = lima_set_vertex_buffers;
|
||||
ctx->base.set_constant_buffer = lima_set_constant_buffer;
|
||||
|
||||
ctx->base.create_depth_stencil_alpha_state = lima_create_depth_stencil_alpha_state;
|
||||
ctx->base.bind_depth_stencil_alpha_state = lima_bind_depth_stencil_alpha_state;
|
||||
ctx->base.delete_depth_stencil_alpha_state = lima_delete_depth_stencil_alpha_state;
|
||||
|
||||
ctx->base.create_rasterizer_state = lima_create_rasterizer_state;
|
||||
ctx->base.bind_rasterizer_state = lima_bind_rasterizer_state;
|
||||
ctx->base.delete_rasterizer_state = lima_delete_rasterizer_state;
|
||||
|
||||
ctx->base.create_blend_state = lima_create_blend_state;
|
||||
ctx->base.bind_blend_state = lima_bind_blend_state;
|
||||
ctx->base.delete_blend_state = lima_delete_blend_state;
|
||||
|
||||
ctx->base.create_vertex_elements_state = lima_create_vertex_elements_state;
|
||||
ctx->base.bind_vertex_elements_state = lima_bind_vertex_elements_state;
|
||||
ctx->base.delete_vertex_elements_state = lima_delete_vertex_elements_state;
|
||||
|
||||
ctx->base.create_sampler_state = lima_create_sampler_state;
|
||||
ctx->base.delete_sampler_state = lima_sampler_state_delete;
|
||||
ctx->base.bind_sampler_states = lima_sampler_states_bind;
|
||||
|
||||
ctx->base.create_sampler_view = lima_create_sampler_view;
|
||||
ctx->base.sampler_view_destroy = lima_sampler_view_destroy;
|
||||
ctx->base.set_sampler_views = lima_set_sampler_views;
|
||||
}
|
||||
|
||||
void
|
||||
lima_state_fini(struct lima_context *ctx)
|
||||
{
|
||||
struct lima_context_vertex_buffer *so = &ctx->vertex_buffers;
|
||||
|
||||
util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, NULL,
|
||||
0, ARRAY_SIZE(so->vb));
|
||||
|
||||
pipe_surface_reference(&ctx->framebuffer.cbuf, NULL);
|
||||
pipe_surface_reference(&ctx->framebuffer.zsbuf, NULL);
|
||||
}
|
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* Copyright (C) 2017-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "xf86drm.h"
|
||||
#include "libsync.h"
|
||||
#include "drm-uapi/lima_drm.h"
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/os_time.h"
|
||||
|
||||
#include "lima_screen.h"
|
||||
#include "lima_context.h"
|
||||
#include "lima_submit.h"
|
||||
#include "lima_bo.h"
|
||||
#include "lima_util.h"
|
||||
|
||||
struct lima_submit {
|
||||
struct lima_screen *screen;
|
||||
uint32_t pipe;
|
||||
uint32_t ctx;
|
||||
|
||||
int in_sync_fd;
|
||||
uint32_t in_sync;
|
||||
uint32_t out_sync;
|
||||
|
||||
struct util_dynarray gem_bos;
|
||||
struct util_dynarray bos;
|
||||
};
|
||||
|
||||
|
||||
#define VOID2U64(x) ((uint64_t)(unsigned long)(x))
|
||||
|
||||
struct lima_submit *lima_submit_create(struct lima_context *ctx, uint32_t pipe)
|
||||
{
|
||||
struct lima_submit *s;
|
||||
|
||||
s = rzalloc(ctx, struct lima_submit);
|
||||
if (!s)
|
||||
return NULL;
|
||||
|
||||
s->screen = lima_screen(ctx->base.screen);
|
||||
s->pipe = pipe;
|
||||
s->ctx = ctx->id;
|
||||
s->in_sync_fd = -1;
|
||||
|
||||
int err = drmSyncobjCreate(s->screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
|
||||
&s->out_sync);
|
||||
if (err)
|
||||
goto err_out0;
|
||||
|
||||
err = drmSyncobjCreate(s->screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
|
||||
&s->in_sync);
|
||||
if (err)
|
||||
goto err_out1;
|
||||
|
||||
util_dynarray_init(&s->gem_bos, s);
|
||||
|
||||
return s;
|
||||
|
||||
err_out1:
|
||||
drmSyncobjDestroy(s->screen->fd, s->out_sync);
|
||||
err_out0:
|
||||
ralloc_free(s);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void lima_submit_free(struct lima_submit *submit)
|
||||
{
|
||||
if (submit->in_sync_fd >= 0)
|
||||
close(submit->in_sync_fd);
|
||||
drmSyncobjDestroy(submit->screen->fd, submit->in_sync);
|
||||
drmSyncobjDestroy(submit->screen->fd, submit->out_sync);
|
||||
}
|
||||
|
||||
bool lima_submit_add_bo(struct lima_submit *submit, struct lima_bo *bo, uint32_t flags)
|
||||
{
|
||||
util_dynarray_foreach(&submit->gem_bos, struct drm_lima_gem_submit_bo, gem_bo) {
|
||||
if (bo->handle == gem_bo->handle) {
|
||||
gem_bo->flags |= flags;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
struct drm_lima_gem_submit_bo *submit_bo =
|
||||
util_dynarray_grow(&submit->gem_bos, sizeof(*submit_bo));
|
||||
submit_bo->handle = bo->handle;
|
||||
submit_bo->flags = flags;
|
||||
|
||||
struct lima_bo **jbo = util_dynarray_grow(&submit->bos, sizeof(*jbo));
|
||||
*jbo = bo;
|
||||
|
||||
/* prevent bo from being freed when submit start */
|
||||
lima_bo_reference(bo);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool lima_submit_start(struct lima_submit *submit, void *frame, uint32_t size)
|
||||
{
|
||||
struct drm_lima_gem_submit req = {
|
||||
.ctx = submit->ctx,
|
||||
.pipe = submit->pipe,
|
||||
.nr_bos = submit->gem_bos.size / sizeof(struct drm_lima_gem_submit_bo),
|
||||
.bos = VOID2U64(util_dynarray_begin(&submit->gem_bos)),
|
||||
.frame = VOID2U64(frame),
|
||||
.frame_size = size,
|
||||
};
|
||||
|
||||
if (submit->in_sync_fd >= 0) {
|
||||
int err = drmSyncobjImportSyncFile(submit->screen->fd, submit->in_sync,
|
||||
submit->in_sync_fd);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
req.in_sync[0] = submit->in_sync;
|
||||
close(submit->in_sync_fd);
|
||||
submit->in_sync_fd = -1;
|
||||
}
|
||||
|
||||
bool ret = drmIoctl(submit->screen->fd, DRM_IOCTL_LIMA_GEM_SUBMIT, &req) == 0;
|
||||
|
||||
util_dynarray_foreach(&submit->bos, struct lima_bo *, bo) {
|
||||
lima_bo_free(*bo);
|
||||
}
|
||||
|
||||
util_dynarray_clear(&submit->gem_bos);
|
||||
util_dynarray_clear(&submit->bos);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool lima_submit_wait(struct lima_submit *submit, uint64_t timeout_ns)
|
||||
{
|
||||
int64_t abs_timeout = os_time_get_absolute_timeout(timeout_ns);
|
||||
|
||||
return !drmSyncobjWait(submit->screen->fd, &submit->out_sync, 1, abs_timeout, 0, NULL);
|
||||
}
|
||||
|
||||
bool lima_submit_has_bo(struct lima_submit *submit, struct lima_bo *bo, bool all)
|
||||
{
|
||||
util_dynarray_foreach(&submit->gem_bos, struct drm_lima_gem_submit_bo, gem_bo) {
|
||||
if (bo->handle == gem_bo->handle) {
|
||||
if (all)
|
||||
return true;
|
||||
else
|
||||
return gem_bo->flags & LIMA_SUBMIT_BO_WRITE;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool lima_submit_add_in_sync(struct lima_submit *submit, int fd)
|
||||
{
|
||||
return !sync_accumulate("lima", &submit->in_sync_fd, fd);
|
||||
}
|
||||
|
||||
bool lima_submit_get_out_sync(struct lima_submit *submit, int *fd)
|
||||
{
|
||||
return !drmSyncobjExportSyncFile(submit->screen->fd, submit->out_sync, fd);
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef H_LIMA_SUBMIT
|
||||
#define H_LIMA_SUBMIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
struct lima_context;
|
||||
struct lima_submit;
|
||||
struct lima_bo;
|
||||
|
||||
struct lima_submit *lima_submit_create(struct lima_context *ctx, uint32_t pipe);
|
||||
void lima_submit_free(struct lima_submit *submit);
|
||||
bool lima_submit_add_bo(struct lima_submit *submit, struct lima_bo *bo, uint32_t flags);
|
||||
bool lima_submit_start(struct lima_submit *submit, void *frame, uint32_t size);
|
||||
bool lima_submit_wait(struct lima_submit *submit, uint64_t timeout_ns);
|
||||
bool lima_submit_has_bo(struct lima_submit *submit, struct lima_bo *bo, bool all);
|
||||
bool lima_submit_add_in_sync(struct lima_submit *submit, int fd);
|
||||
bool lima_submit_get_out_sync(struct lima_submit *submit, int *fd);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,278 @@
|
|||
/*
|
||||
* Copyright (c) 2011-2013 Luc Verhaegen <libv@skynet.be>
|
||||
* Copyright (c) 2018-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_transfer.h"
|
||||
|
||||
#include "lima_bo.h"
|
||||
#include "lima_context.h"
|
||||
#include "lima_screen.h"
|
||||
#include "lima_texture.h"
|
||||
#include "lima_resource.h"
|
||||
#include "lima_submit.h"
|
||||
#include "lima_util.h"
|
||||
|
||||
#include <drm-uapi/lima_drm.h>
|
||||
|
||||
#define LIMA_TEXEL_FORMAT_BGR_565 0x0e
|
||||
#define LIMA_TEXEL_FORMAT_RGB_888 0x15
|
||||
#define LIMA_TEXEL_FORMAT_RGBA_8888 0x16
|
||||
#define LIMA_TEXEL_FORMAT_RGBX_8888 0x17
|
||||
|
||||
#define lima_tex_list_size 64
|
||||
|
||||
static uint32_t pipe_format_to_lima(enum pipe_format pformat)
|
||||
{
|
||||
unsigned swap_chans = 0, flag1 = 0, format;
|
||||
|
||||
switch (pformat) {
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
swap_chans = 1;
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
format = LIMA_TEXEL_FORMAT_RGBA_8888;
|
||||
break;
|
||||
case PIPE_FORMAT_R8G8B8X8_UNORM:
|
||||
swap_chans = 1;
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
format = LIMA_TEXEL_FORMAT_RGBX_8888;
|
||||
break;
|
||||
case PIPE_FORMAT_R8G8B8_UNORM:
|
||||
swap_chans = 1;
|
||||
format = LIMA_TEXEL_FORMAT_RGB_888;
|
||||
break;
|
||||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
format = LIMA_TEXEL_FORMAT_BGR_565;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
return (swap_chans << 7) | (flag1 << 6) | format;
|
||||
}
|
||||
|
||||
void
|
||||
lima_texture_desc_set_res(struct lima_context *ctx, uint32_t *desc,
|
||||
struct pipe_resource *prsc,
|
||||
unsigned first_level, unsigned last_level)
|
||||
{
|
||||
unsigned width, height, layout, i;
|
||||
struct lima_resource *lima_res = lima_resource(prsc);
|
||||
|
||||
width = prsc->width0;
|
||||
height = prsc->height0;
|
||||
if (first_level != 0) {
|
||||
width = u_minify(width, first_level);
|
||||
height = u_minify(height, first_level);
|
||||
}
|
||||
|
||||
desc[0] |= pipe_format_to_lima(prsc->format);
|
||||
desc[2] |= (width << 22);
|
||||
desc[3] |= 0x10000 | (height << 3) | (width >> 10);
|
||||
|
||||
if (lima_res->tiled)
|
||||
layout = 3;
|
||||
else {
|
||||
/* for padded linear texture */
|
||||
if (lima_res->levels[first_level].width != width) {
|
||||
desc[0] |= lima_res->levels[first_level].width << 18;
|
||||
desc[2] |= 0x100;
|
||||
}
|
||||
layout = 0;
|
||||
}
|
||||
|
||||
lima_submit_add_bo(ctx->pp_submit, lima_res->bo, LIMA_SUBMIT_BO_READ);
|
||||
|
||||
uint32_t base_va = lima_res->bo->va;
|
||||
|
||||
/* attach level 0 */
|
||||
desc[6] |= (base_va << 24) | (layout << 13);
|
||||
desc[7] |= base_va >> 8;
|
||||
|
||||
/* Attach remaining levels.
|
||||
* Each subsequent mipmap address is specified using the 26 msbs.
|
||||
* These addresses are then packed continuously in memory */
|
||||
unsigned current_desc_index = 7;
|
||||
unsigned current_desc_bit_index = 24;
|
||||
for (i = 1; i < LIMA_MAX_MIP_LEVELS; i++) {
|
||||
if (first_level + i > last_level)
|
||||
break;
|
||||
|
||||
uint32_t address = base_va + lima_res->levels[i].offset;
|
||||
address = (address >> 6);
|
||||
desc[current_desc_index] |= (address << current_desc_bit_index);
|
||||
if (current_desc_bit_index <= 6) {
|
||||
current_desc_bit_index += 26;
|
||||
if (current_desc_bit_index >= 32) {
|
||||
current_desc_bit_index &= 0x1F;
|
||||
current_desc_index++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
desc[current_desc_index + 1] |= (address >> (32 - current_desc_bit_index));
|
||||
current_desc_bit_index = (current_desc_bit_index + 26) & 0x1F;
|
||||
current_desc_index++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sampler,
|
||||
struct lima_sampler_view *texture, void *pdesc)
|
||||
{
|
||||
uint32_t *desc = pdesc;
|
||||
unsigned first_level;
|
||||
unsigned last_level;
|
||||
bool mipmapping;
|
||||
|
||||
memset(desc, 0, lima_tex_desc_size);
|
||||
|
||||
/* 2D texture */
|
||||
desc[1] |= 0x400;
|
||||
|
||||
desc[1] &= ~0xff000000;
|
||||
switch (sampler->base.min_mip_filter) {
|
||||
case PIPE_TEX_MIPFILTER_NEAREST:
|
||||
first_level = texture->base.u.tex.first_level;
|
||||
last_level = texture->base.u.tex.last_level;
|
||||
if (last_level - first_level >= LIMA_MAX_MIP_LEVELS)
|
||||
last_level = first_level + LIMA_MAX_MIP_LEVELS - 1;
|
||||
mipmapping = true;
|
||||
desc[1] |= ((last_level - first_level) << 24);
|
||||
desc[2] &= ~0x0600;
|
||||
break;
|
||||
case PIPE_TEX_MIPFILTER_LINEAR:
|
||||
first_level = texture->base.u.tex.first_level;
|
||||
last_level = texture->base.u.tex.last_level;
|
||||
if (last_level - first_level >= LIMA_MAX_MIP_LEVELS)
|
||||
last_level = first_level + LIMA_MAX_MIP_LEVELS - 1;
|
||||
mipmapping = true;
|
||||
desc[1] |= ((last_level - first_level) << 24);
|
||||
desc[2] |= 0x0600;
|
||||
break;
|
||||
case PIPE_TEX_MIPFILTER_NONE:
|
||||
default:
|
||||
first_level = 0;
|
||||
last_level = 0;
|
||||
mipmapping = false;
|
||||
desc[2] &= ~0x0600;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (sampler->base.mag_img_filter) {
|
||||
case PIPE_TEX_FILTER_LINEAR:
|
||||
desc[2] &= ~0x1000;
|
||||
/* no mipmap, filter_mag = linear */
|
||||
if (!mipmapping)
|
||||
desc[1] |= 0x80000000;
|
||||
break;
|
||||
case PIPE_TEX_FILTER_NEAREST:
|
||||
default:
|
||||
desc[2] |= 0x1000;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (sampler->base.min_img_filter) {
|
||||
break;
|
||||
case PIPE_TEX_FILTER_LINEAR:
|
||||
desc[2] &= ~0x0800;
|
||||
break;
|
||||
case PIPE_TEX_FILTER_NEAREST:
|
||||
default:
|
||||
desc[2] |= 0x0800;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Only clamp, clamp to edge, repeat and mirror repeat are supported */
|
||||
desc[2] &= ~0xe000;
|
||||
switch (sampler->base.wrap_s) {
|
||||
case PIPE_TEX_WRAP_CLAMP:
|
||||
desc[2] |= 0x4000;
|
||||
break;
|
||||
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
|
||||
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
|
||||
desc[2] |= 0x2000;
|
||||
break;
|
||||
case PIPE_TEX_WRAP_MIRROR_REPEAT:
|
||||
desc[2] |= 0x8000;
|
||||
break;
|
||||
case PIPE_TEX_WRAP_REPEAT:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Only clamp, clamp to edge, repeat and mirror repeat are supported */
|
||||
desc[2] &= ~0x070000;
|
||||
switch (sampler->base.wrap_t) {
|
||||
case PIPE_TEX_WRAP_CLAMP:
|
||||
desc[2] |= 0x020000;
|
||||
break;
|
||||
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
|
||||
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
|
||||
desc[2] |= 0x010000;
|
||||
break;
|
||||
case PIPE_TEX_WRAP_MIRROR_REPEAT:
|
||||
desc[2] |= 0x040000;
|
||||
break;
|
||||
case PIPE_TEX_WRAP_REPEAT:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
lima_texture_desc_set_res(ctx, desc, texture->base.texture,
|
||||
first_level, last_level);
|
||||
}
|
||||
|
||||
void
|
||||
lima_update_textures(struct lima_context *ctx)
|
||||
{
|
||||
struct lima_texture_stateobj *lima_tex = &ctx->tex_stateobj;
|
||||
|
||||
assert (lima_tex->num_samplers <= 16);
|
||||
|
||||
/* Nothing to do - we have no samplers or textures */
|
||||
if (!lima_tex->num_samplers || !lima_tex->num_textures)
|
||||
return;
|
||||
|
||||
unsigned size = lima_tex_list_size + lima_tex->num_samplers * lima_tex_desc_size;
|
||||
uint32_t *descs =
|
||||
lima_ctx_buff_alloc(ctx, lima_ctx_buff_pp_tex_desc, size, true);
|
||||
|
||||
for (int i = 0; i < lima_tex->num_samplers; i++) {
|
||||
off_t offset = lima_tex_desc_size * i + lima_tex_list_size;
|
||||
struct lima_sampler_state *sampler = lima_sampler_state(lima_tex->samplers[i]);
|
||||
struct lima_sampler_view *texture = lima_sampler_view(lima_tex->textures[i]);
|
||||
|
||||
descs[i] = lima_ctx_buff_va(ctx, lima_ctx_buff_pp_tex_desc,
|
||||
LIMA_CTX_BUFF_SUBMIT_PP) + offset;
|
||||
lima_update_tex_desc(ctx, sampler, texture, (void *)descs + offset);
|
||||
}
|
||||
|
||||
lima_dump_command_stream_print(
|
||||
descs, size, false, "add textures_desc at va %x\n",
|
||||
lima_ctx_buff_va(ctx, lima_ctx_buff_pp_tex_desc, 0));
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef H_LIMA_TEXTURE
|
||||
#define H_LIMA_TEXTURE
|
||||
|
||||
#define lima_tex_desc_size 64
|
||||
|
||||
void lima_texture_desc_set_res(struct lima_context *ctx, uint32_t *desc,
|
||||
struct pipe_resource *prsc,
|
||||
unsigned first_level, unsigned last_level);
|
||||
void lima_update_textures(struct lima_context *ctx);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* Copyright (c) 2011-2013 Luc Verhaegen <libv@skynet.be>
|
||||
* Copyright (c) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
||||
* Copyright (c) 2018 Vasily Khoruzhick <anarsoul@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "lima_tiling.h"
|
||||
|
||||
uint32_t space_filler[16][16] = {
|
||||
{ 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85, },
|
||||
{ 3, 2, 7, 6, 19, 18, 23, 22, 67, 66, 71, 70, 83, 82, 87, 86, },
|
||||
{ 12, 13, 8, 9, 28, 29, 24, 25, 76, 77, 72, 73, 92, 93, 88, 89, },
|
||||
{ 15, 14, 11, 10, 31, 30, 27, 26, 79, 78, 75, 74, 95, 94, 91, 90, },
|
||||
{ 48, 49, 52, 53, 32, 33, 36, 37, 112, 113, 116, 117, 96, 97, 100, 101, },
|
||||
{ 51, 50, 55, 54, 35, 34, 39, 38, 115, 114, 119, 118, 99, 98, 103, 102, },
|
||||
{ 60, 61, 56, 57, 44, 45, 40, 41, 124, 125, 120, 121, 108, 109, 104, 105, },
|
||||
{ 63, 62, 59, 58, 47, 46, 43, 42, 127, 126, 123, 122, 111, 110, 107, 106, },
|
||||
{ 192, 193, 196, 197, 208, 209, 212, 213, 128, 129, 132, 133, 144, 145, 148, 149, },
|
||||
{ 195, 194, 199, 198, 211, 210, 215, 214, 131, 130, 135, 134, 147, 146, 151, 150, },
|
||||
{ 204, 205, 200, 201, 220, 221, 216, 217, 140, 141, 136, 137, 156, 157, 152, 153, },
|
||||
{ 207, 206, 203, 202, 223, 222, 219, 218, 143, 142, 139, 138, 159, 158, 155, 154, },
|
||||
{ 240, 241, 244, 245, 224, 225, 228, 229, 176, 177, 180, 181, 160, 161, 164, 165, },
|
||||
{ 243, 242, 247, 246, 227, 226, 231, 230, 179, 178, 183, 182, 163, 162, 167, 166, },
|
||||
{ 252, 253, 248, 249, 236, 237, 232, 233, 188, 189, 184, 185, 172, 173, 168, 169, },
|
||||
{ 255, 254, 251, 250, 239, 238, 235, 234, 191, 190, 187, 186, 175, 174, 171, 170, },
|
||||
};
|
||||
|
||||
static void
|
||||
lima_store_tiled_image_bpp4(void *dst, const void *src,
|
||||
const struct pipe_box *box,
|
||||
uint32_t dst_stride,
|
||||
uint32_t src_stride)
|
||||
{
|
||||
for (int y = box->y, src_y = 0; src_y < box->height; ++y, ++src_y) {
|
||||
int block_y = y & ~0x0f;
|
||||
int rem_y = y & 0x0F;
|
||||
int block_start_s = block_y * dst_stride;
|
||||
int source_start = src_y * src_stride;
|
||||
|
||||
for (int x = box->x, src_x = 0; src_x < box->width; ++x, ++src_x) {
|
||||
int block_x_s = (x >> 4) * 256;
|
||||
int rem_x = x & 0x0F;
|
||||
|
||||
int index = space_filler[rem_y][rem_x];
|
||||
const uint32_t *source = src + source_start + 4 * src_x;
|
||||
uint32_t *dest = dst + block_start_s + 4 * (block_x_s + index);
|
||||
|
||||
*dest = *source;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lima_store_tiled_image_generic(void *dst, const void *src,
|
||||
const struct pipe_box *box,
|
||||
uint32_t dst_stride,
|
||||
uint32_t src_stride,
|
||||
uint32_t bpp)
|
||||
{
|
||||
for (int y = box->y, src_y = 0; src_y < box->height; ++y, ++src_y) {
|
||||
int block_y = y & ~0x0f;
|
||||
int rem_y = y & 0x0F;
|
||||
int block_start_s = block_y * dst_stride;
|
||||
int source_start = src_y * src_stride;
|
||||
|
||||
for (int x = box->x, src_x = 0; src_x < box->width; ++x, ++src_x) {
|
||||
int block_x_s = (x >> 4) * 256;
|
||||
int rem_x = x & 0x0F;
|
||||
|
||||
int index = space_filler[rem_y][rem_x];
|
||||
const uint8_t *src8 = src;
|
||||
const uint8_t *source = &src8[source_start + bpp * src_x];
|
||||
uint8_t *dest = dst + block_start_s + bpp * (block_x_s + index);
|
||||
|
||||
for (int b = 0; b < bpp; ++b)
|
||||
dest[b] = source[b];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lima_load_tiled_image_bpp4(void *dst, const void *src,
|
||||
const struct pipe_box *box,
|
||||
uint32_t dst_stride,
|
||||
uint32_t src_stride)
|
||||
{
|
||||
for (int y = box->y, dest_y = 0; dest_y < box->height; ++y, ++dest_y) {
|
||||
int block_y = y & ~0x0f;
|
||||
int rem_y = y & 0x0F;
|
||||
int block_start_s = block_y * src_stride;
|
||||
int dest_start = dest_y * dst_stride;
|
||||
|
||||
for (int x = box->x, dest_x = 0; dest_x < box->width; ++x, ++dest_x) {
|
||||
int block_x_s = (x >> 4) * 256;
|
||||
int rem_x = x & 0x0F;
|
||||
|
||||
int index = space_filler[rem_y][rem_x];
|
||||
uint32_t *dest = dst + dest_start + 4 * dest_x;
|
||||
const uint32_t *source = src + block_start_s + 4 * (block_x_s + index);
|
||||
|
||||
*dest = *source;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lima_load_tiled_image_generic(void *dst, const void *src,
|
||||
const struct pipe_box *box,
|
||||
uint32_t dst_stride,
|
||||
uint32_t src_stride,
|
||||
uint32_t bpp)
|
||||
{
|
||||
for (int y = box->y, dest_y = 0; dest_y < box->height; ++y, ++dest_y) {
|
||||
int block_y = y & ~0x0f;
|
||||
int rem_y = y & 0x0F;
|
||||
int block_start_s = block_y * src_stride;
|
||||
int dest_start = dest_y * dst_stride;
|
||||
|
||||
for (int x = box->x, dest_x = 0; dest_x < box->width; ++x, ++dest_x) {
|
||||
int block_x_s = (x >> 4) * 256;
|
||||
int rem_x = x & 0x0F;
|
||||
|
||||
int index = space_filler[rem_y][rem_x];
|
||||
uint8_t *dst8 = dst;
|
||||
uint8_t *dest = &dst8[dest_start + bpp * dest_x];
|
||||
const uint8_t *source = src + block_start_s + bpp * (block_x_s + index);
|
||||
|
||||
for (int b = 0; b < bpp; ++b)
|
||||
dest[b] = source[b];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
lima_store_tiled_image(void *dst, const void *src,
|
||||
const struct pipe_box *box,
|
||||
uint32_t dst_stride,
|
||||
uint32_t src_stride,
|
||||
uint32_t bpp)
|
||||
{
|
||||
switch (bpp) {
|
||||
case 4:
|
||||
lima_store_tiled_image_bpp4(dst, src, box, dst_stride, src_stride);
|
||||
break;
|
||||
default:
|
||||
lima_store_tiled_image_generic(dst, src, box, dst_stride, src_stride, bpp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
lima_load_tiled_image(void *dst, const void *src,
|
||||
const struct pipe_box *box,
|
||||
uint32_t dst_stride,
|
||||
uint32_t src_stride,
|
||||
uint32_t bpp)
|
||||
{
|
||||
switch (bpp) {
|
||||
case 4:
|
||||
lima_load_tiled_image_bpp4(dst, src, box, dst_stride, src_stride);
|
||||
break;
|
||||
default:
|
||||
lima_load_tiled_image_generic(dst, src, box, dst_stride, src_stride, bpp);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright (c) 2011-2013 Luc Verhaegen <libv@skynet.be>
|
||||
* Copyright (c) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
||||
* Copyright (c) 2018 Vasily Khoruzhick <anarsoul@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef H_LIMA_TILING
|
||||
#define H_LIMA_TILING
|
||||
|
||||
#include "util/u_box.h"
|
||||
|
||||
void lima_load_tiled_image(void *dst, const void *src,
|
||||
const struct pipe_box *box,
|
||||
uint32_t dst_stride,
|
||||
uint32_t src_stride,
|
||||
uint32_t bpp);
|
||||
|
||||
void lima_store_tiled_image(void *dst, const void *src,
|
||||
const struct pipe_box *box,
|
||||
uint32_t dst_stride,
|
||||
uint32_t src_stride,
|
||||
uint32_t bpp);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <pipe/p_defines.h>
|
||||
|
||||
#include "lima_util.h"
|
||||
|
||||
FILE *lima_dump_command_stream = NULL;
|
||||
|
||||
bool lima_get_absolute_timeout(uint64_t *timeout)
|
||||
{
|
||||
struct timespec current;
|
||||
uint64_t current_ns;
|
||||
|
||||
if (*timeout == PIPE_TIMEOUT_INFINITE)
|
||||
return true;
|
||||
|
||||
if (clock_gettime(CLOCK_MONOTONIC, ¤t))
|
||||
return false;
|
||||
|
||||
current_ns = ((uint64_t)current.tv_sec) * 1000000000ull;
|
||||
current_ns += current.tv_nsec;
|
||||
*timeout += current_ns;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void lima_dump_blob(FILE *fp, void *data, int size, bool is_float)
|
||||
{
|
||||
for (int i = 0; i * 4 < size; i++) {
|
||||
if (i % 4 == 0) {
|
||||
if (i) fprintf(fp, "\n");
|
||||
fprintf(fp, "%04x:", i * 4);
|
||||
}
|
||||
|
||||
if (is_float)
|
||||
fprintf(fp, " %f", ((float *)data)[i]);
|
||||
else
|
||||
fprintf(fp, " 0x%08x", ((uint32_t *)data)[i]);
|
||||
}
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
void
|
||||
lima_dump_command_stream_print(void *data, int size, bool is_float,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
if (lima_dump_command_stream) {
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
vfprintf(lima_dump_command_stream, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
lima_dump_blob(lima_dump_command_stream, data, size, is_float);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2019 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef H_LIMA_UTIL
|
||||
#define H_LIMA_UTIL
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define LIMA_PAGE_SIZE 4096
|
||||
|
||||
bool lima_get_absolute_timeout(uint64_t *timeout);
|
||||
void lima_dump_blob(FILE *fp, void *data, int size, bool is_float);
|
||||
void lima_dump_command_stream_print(void *data, int size, bool is_float,
|
||||
const char *fmt, ...);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,89 @@
|
|||
# Copyright © 2018 Lima Project
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
files_lima = files(
|
||||
'ir/gp/gpir.h',
|
||||
'ir/gp/nir.c',
|
||||
'ir/gp/node.c',
|
||||
'ir/gp/lower.c',
|
||||
'ir/gp/scheduler.c',
|
||||
'ir/gp/instr.c',
|
||||
'ir/gp/codegen.h',
|
||||
'ir/gp/codegen.c',
|
||||
'ir/gp/reduce_scheduler.c',
|
||||
'ir/gp/value_regalloc.c',
|
||||
'ir/gp/physical_regalloc.c',
|
||||
'ir/gp/disasm.c',
|
||||
|
||||
'ir/pp/ppir.h',
|
||||
'ir/pp/nir.c',
|
||||
'ir/pp/node.c',
|
||||
'ir/pp/lower.c',
|
||||
'ir/pp/scheduler.c',
|
||||
'ir/pp/instr.c',
|
||||
'ir/pp/regalloc.c',
|
||||
'ir/pp/codegen.h',
|
||||
'ir/pp/codegen.c',
|
||||
'ir/pp/node_to_instr.c',
|
||||
'ir/pp/disasm.c',
|
||||
|
||||
'ir/lima_nir_lower_uniform_to_scalar.c',
|
||||
|
||||
'ir/lima_ir.h',
|
||||
|
||||
'lima_screen.c',
|
||||
'lima_screen.h',
|
||||
'lima_context.c',
|
||||
'lima_context.h',
|
||||
'lima_resource.c',
|
||||
'lima_resource.h',
|
||||
'lima_state.c',
|
||||
'lima_draw.c',
|
||||
'lima_program.c',
|
||||
'lima_query.c',
|
||||
'lima_bo.c',
|
||||
'lima_bo.h',
|
||||
'lima_submit.c',
|
||||
'lima_submit.h',
|
||||
'lima_util.c',
|
||||
'lima_util.h',
|
||||
'lima_texture.c',
|
||||
'lima_texture.h',
|
||||
'lima_fence.c',
|
||||
'lima_fence.h',
|
||||
'lima_tiling.c',
|
||||
'lima_tiling.h',
|
||||
)
|
||||
|
||||
liblima = static_library(
|
||||
'lima',
|
||||
files_lima,
|
||||
include_directories : [
|
||||
inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_gallium_drivers,
|
||||
],
|
||||
dependencies : [dep_libdrm, idep_nir_headers],
|
||||
)
|
||||
|
||||
driver_lima = declare_dependency(
|
||||
compile_args : '-DGALLIUM_LIMA',
|
||||
link_with : [liblima, liblimawinsys],
|
||||
dependencies : idep_nir,
|
||||
)
|
|
@ -143,6 +143,12 @@ if with_gallium_virgl
|
|||
else
|
||||
driver_virgl = declare_dependency()
|
||||
endif
|
||||
if with_gallium_lima
|
||||
subdir('winsys/lima/drm')
|
||||
subdir('drivers/lima')
|
||||
else
|
||||
driver_lima = declare_dependency()
|
||||
endif
|
||||
if with_gallium_opencl
|
||||
# TODO: this isn't really clover specific, but ATM clover is the only
|
||||
# consumer
|
||||
|
|
|
@ -58,7 +58,7 @@ libgallium_dri = shared_library(
|
|||
driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
|
||||
driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
|
||||
driver_tegra, driver_i915, driver_svga, driver_virgl,
|
||||
driver_swr, driver_panfrost, driver_iris
|
||||
driver_swr, driver_panfrost, driver_iris, driver_lima
|
||||
],
|
||||
# Will be deleted during installation, see install_megadrivers.py
|
||||
install : true,
|
||||
|
@ -93,7 +93,8 @@ foreach d : [[with_gallium_kmsro, [
|
|||
[with_gallium_r300, 'r300_dri.so'],
|
||||
[with_gallium_r600, 'r600_dri.so'],
|
||||
[with_gallium_svga, 'vmwgfx_dri.so'],
|
||||
[with_gallium_virgl, 'virtio_gpu_dri.so']]
|
||||
[with_gallium_virgl, 'virtio_gpu_dri.so'],
|
||||
[with_gallium_lima, 'lima_dri.so']]
|
||||
if d[0]
|
||||
gallium_dri_drivers += d[1]
|
||||
endif
|
||||
|
|
|
@ -105,3 +105,6 @@ DEFINE_LOADER_DRM_ENTRYPOINT(st7586)
|
|||
DEFINE_LOADER_DRM_ENTRYPOINT(st7735r)
|
||||
#endif
|
||||
|
||||
#if defined(GALLIUM_LIMA)
|
||||
DEFINE_LOADER_DRM_ENTRYPOINT(lima)
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright © 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __LIMA_DRM_PUBLIC_H__
|
||||
#define __LIMA_DRM_PUBLIC_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
struct pipe_screen;
|
||||
struct renderonly;
|
||||
|
||||
struct pipe_screen *lima_drm_screen_create(int drmFD);
|
||||
struct pipe_screen *lima_drm_screen_create_renderonly(struct renderonly *ro);
|
||||
|
||||
#endif /* __LIMA_DRM_PUBLIC_H__ */
|
|
@ -0,0 +1,124 @@
|
|||
/*
|
||||
* Copyright © 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "c11/threads.h"
|
||||
#include "util/u_hash_table.h"
|
||||
#include "util/u_pointer.h"
|
||||
#include "renderonly/renderonly.h"
|
||||
|
||||
#include "lima_drm_public.h"
|
||||
|
||||
#include "lima/lima_screen.h"
|
||||
|
||||
static struct util_hash_table *fd_tab = NULL;
|
||||
static mtx_t lima_screen_mutex = _MTX_INITIALIZER_NP;
|
||||
|
||||
static void
|
||||
lima_drm_screen_destroy(struct pipe_screen *pscreen)
|
||||
{
|
||||
struct lima_screen *screen = lima_screen(pscreen);
|
||||
boolean destroy;
|
||||
int fd = screen->fd;
|
||||
|
||||
mtx_lock(&lima_screen_mutex);
|
||||
destroy = --screen->refcnt == 0;
|
||||
if (destroy)
|
||||
util_hash_table_remove(fd_tab, intptr_to_pointer(fd));
|
||||
mtx_unlock(&lima_screen_mutex);
|
||||
|
||||
if (destroy) {
|
||||
pscreen->destroy = screen->winsys_priv;
|
||||
pscreen->destroy(pscreen);
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned hash_fd(void *key)
|
||||
{
|
||||
int fd = pointer_to_intptr(key);
|
||||
struct stat stat;
|
||||
|
||||
fstat(fd, &stat);
|
||||
|
||||
return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
|
||||
}
|
||||
|
||||
static int compare_fd(void *key1, void *key2)
|
||||
{
|
||||
int fd1 = pointer_to_intptr(key1);
|
||||
int fd2 = pointer_to_intptr(key2);
|
||||
struct stat stat1, stat2;
|
||||
|
||||
fstat(fd1, &stat1);
|
||||
fstat(fd2, &stat2);
|
||||
|
||||
return stat1.st_dev != stat2.st_dev ||
|
||||
stat1.st_ino != stat2.st_ino ||
|
||||
stat1.st_rdev != stat2.st_rdev;
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
lima_drm_screen_create(int fd)
|
||||
{
|
||||
struct pipe_screen *pscreen = NULL;
|
||||
|
||||
mtx_lock(&lima_screen_mutex);
|
||||
if (!fd_tab) {
|
||||
fd_tab = util_hash_table_create(hash_fd, compare_fd);
|
||||
if (!fd_tab)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
|
||||
if (pscreen) {
|
||||
lima_screen(pscreen)->refcnt++;
|
||||
} else {
|
||||
int dup_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
|
||||
|
||||
pscreen = lima_screen_create(dup_fd, NULL);
|
||||
if (pscreen) {
|
||||
util_hash_table_set(fd_tab, intptr_to_pointer(dup_fd), pscreen);
|
||||
|
||||
/* Bit of a hack, to avoid circular linkage dependency,
|
||||
* ie. pipe driver having to call in to winsys, we
|
||||
* override the pipe drivers screen->destroy():
|
||||
*/
|
||||
lima_screen(pscreen)->winsys_priv = pscreen->destroy;
|
||||
pscreen->destroy = lima_drm_screen_destroy;
|
||||
}
|
||||
}
|
||||
|
||||
unlock:
|
||||
mtx_unlock(&lima_screen_mutex);
|
||||
return pscreen;
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
lima_drm_screen_create_renderonly(struct renderonly *ro)
|
||||
{
|
||||
return lima_screen_create(fcntl(ro->gpu_fd, F_DUPFD_CLOEXEC, 3), ro);
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
# Copyright © 2018 Lima Project
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
liblimawinsys = static_library(
|
||||
'limawinsys',
|
||||
files('lima_drm_winsys.c'),
|
||||
include_directories : [
|
||||
inc_src, inc_include,
|
||||
inc_gallium, inc_gallium_aux, inc_gallium_drivers,
|
||||
],
|
||||
)
|
Loading…
Reference in New Issue