diff --git a/CODEOWNERS b/CODEOWNERS index 55a5aa5e77e..1eedf461878 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -109,6 +109,11 @@ meson.build @dbaker @eric # Freedreno /src/gallium/drivers/freedreno/ @robclark +# Imagination +/include/drm-uapi/pvr_drm.h @CreativeCylon @frankbinns @rajnesh-kanwal +/src/imagination/ @CreativeCylon @frankbinns @rajnesh-kanwal +/src/imagination/rogue/ @simon-perretta-img + # Intel /include/drm-uapi/i915_drm.h @kwg @llandwerlin @jekstrand @idr /include/pci_ids/i*_pci_ids.h @kwg @llandwerlin @jekstrand @idr diff --git a/meson.build b/meson.build index 24165943a14..764381cdc0b 100644 --- a/meson.build +++ b/meson.build @@ -272,6 +272,8 @@ with_swrast_vk = _vulkan_drivers.contains('swrast') with_virtio_vk = _vulkan_drivers.contains('virtio-experimental') with_freedreno_kgsl = get_option('freedreno-kgsl') with_broadcom_vk = _vulkan_drivers.contains('broadcom') +with_imagination_vk = _vulkan_drivers.contains('imagination-experimental') +with_imagination_srv = get_option('imagination-srv') with_any_vk = _vulkan_drivers.length() != 0 with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk diff --git a/meson_options.txt b/meson_options.txt index e5df4735daa..9fe70b11c28 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -192,7 +192,7 @@ option( 'vulkan-drivers', type : 'array', value : ['auto'], - choices : ['auto', 'amd', 'broadcom', 'freedreno', 'intel', 'panfrost', 'swrast', 'virtio-experimental'], + choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'panfrost', 'swrast', 'virtio-experimental'], description : 'List of vulkan drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built' ) option( @@ -201,6 +201,12 @@ option( value : false, description : 'use kgsl backend for freedreno vulkan driver', ) +option( + 'imagination-srv', + type : 'boolean', + value : false, + description : 'Enable Services backend for Imagination Technologies vulkan driver', +) option( 'shader-cache', type : 'combo', diff --git a/src/imagination/.clang-format b/src/imagination/.clang-format new file mode 100644 index 00000000000..db00dca2cde --- /dev/null +++ b/src/imagination/.clang-format @@ -0,0 +1,247 @@ +# Copyright © 2022 Imagination Technologies Ltd. + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +Language: Cpp +Standard: c++11 + +UseCRLF: false +ColumnLimit: 80 + +DeriveLineEnding: false +DerivePointerAlignment: false +ExperimentalAutoDetectBinPacking: false + +DisableFormat: false + +######## +# Tabs # +######## +UseTab: Never +TabWidth: 3 + +ConstructorInitializerIndentWidth: 6 +ContinuationIndentWidth: 3 + +IndentWidth: 3 +#IndentCaseBlocks: true # Requires clang-11 +IndentCaseLabels: false +#IndentExternBlock: NoIndent # Requires clang-11 +IndentGotoLabels: false +IndentPPDirectives: AfterHash +IndentWrappedFunctionNames: false +AccessModifierOffset: -4 # -IndentWidth + +NamespaceIndentation: None + +########## +# Braces # +########## +AlignAfterOpenBracket: Align +AllowAllArgumentsOnNextLine: false +AllowAllConstructorInitializersOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false +BinPackArguments: false +BinPackParameters: false + +Cpp11BracedListStyle: false + +######################## +# Whitespace Alignment # +######################## +AlignConsecutiveAssignments: false +#AlignConsecutiveBitFields: false # Requires clang-11 +AlignConsecutiveDeclarations: false +AlignConsecutiveMacros: false +AlignTrailingComments: false + +AlignEscapedNewlines: Left + +#AlignOperands: Align # Requires clang-11 +#BitFieldColonSpacing: Both # Requires clang-12 + +PointerAlignment: Right +#SpaceAroundPointerQualifiers: Both # Requires clang-12 + +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +#SpaceBeforeParens: ControlStatementsExceptForEachMacros # Requires clang-11 +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesInAngles: false +SpacesInCStyleCastParentheses: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpacesBeforeTrailingComments: 2 + +############################ +# Multi-line constructions # +############################ +AllowShortBlocksOnASingleLine: Empty +AllowShortCaseLabelsOnASingleLine: false +#AllowShortEnumsOnASingleLine: false # Requires clang-11 +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false + +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: Yes + +BreakBeforeBraces: Custom +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false +# BeforeLambdaBody: false # Requires clang-11 + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyNamespace: true + SplitEmptyRecord: true + +BreakBeforeBinaryOperators: None +BreakBeforeTernaryOperators: true + +BreakConstructorInitializers: AfterColon +BreakInheritanceList: AfterColon + +BreakStringLiterals: false + +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true + +#InsertTrailingCommas: Wrapped # Requires clang-11 + +KeepEmptyLinesAtTheStartOfBlocks: false +MaxEmptyLinesToKeep: 1 + +SortUsingDeclarations: true + +############ +# Includes # +############ +# TODO: Temporary config +IncludeBlocks: Preserve +SortIncludes: false +# TODO: This requires additional work to clean up headers & includes first +#IncludeBlocks: Regroup +#SortIncludes: true +#IncludeIsMainRegex: '(_test)?$' +##IncludeIsMainSourceRegex: +#IncludeCategories: +# - Regex: '^"' +# Priority: 1 + +############ +# Comments # +############ +FixNamespaceComments: false + +############# +# Penalties # +############# +# Taken from torvalds/kernel:.clang-format +PenaltyBreakAssignment: 10 +PenaltyBreakBeforeFirstCallParameter: 30 +PenaltyBreakComment: 10 +PenaltyBreakFirstLessLess: 0 +PenaltyBreakString: 10 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 100 +PenaltyReturnTypeOnItsOwnLine: 60 + +####################### +# User-defined macros # +####################### +CommentPragmas: '^ IWYU pragma:' + +MacroBlockBegin: '' +MacroBlockEnd: '' + +#AttributeMacros: [] # Requires clang-12 + +ForEachMacros: [ + 'foreach_instr', + 'foreach_instr_safe', + 'hash_table_foreach', + 'LIST_FOR_EACH_ENTRY', + 'LIST_FOR_EACH_ENTRY_FROM', + 'LIST_FOR_EACH_ENTRY_FROM_REV', + 'LIST_FOR_EACH_ENTRY_SAFE', + 'LIST_FOR_EACH_ENTRY_SAFE_REV', + 'list_for_each_entry', + 'list_for_each_entry_from', + 'list_for_each_entry_from_rev', + 'list_for_each_entry_from_safe', + 'list_for_each_entry_rev', + 'list_for_each_entry_safe', + 'list_for_each_entry_safe_rev', + 'list_pair_for_each_entry', + 'pvr_csb_emit', + 'pvr_csb_emit_merge', + 'pvr_csb_pack', + 'nir_foreach_block', + 'nir_foreach_block_safe', + 'nir_foreach_function', + 'nir_foreach_instr', + 'nir_foreach_instr_safe', + 'nir_foreach_shader_in_variable', + 'nir_foreach_shader_out_variable', + 'nir_foreach_use', + 'nir_foreach_use_safe', + 'nir_foreach_variable_with_modes', + 'u_vector_foreach', + 'util_dynarray_foreach', + 'vk_foreach_struct', + 'vk_foreach_struct_const', +# FIXME: vk_outarray_append doesn't fit here, remove +# it when a better solution exists for it. + 'vk_outarray_append' +] + +NamespaceMacros: [ +] + +StatementMacros: [ +] + +TypenameMacros: [ +] + +#WhitespaceSensitiveMacros: [] # Requires clang-11 diff --git a/src/imagination/.dir-locals.el b/src/imagination/.dir-locals.el new file mode 100644 index 00000000000..05e2a13126e --- /dev/null +++ b/src/imagination/.dir-locals.el @@ -0,0 +1,9 @@ +((nil . ((show-trailing-whitespace . t))) + (prog-mode + (indent-tabs-mode . nil) + (tab-width . 3) + (c-basic-offset . 3) + (c-file-style . "linux") + (fill-column . 80) + ) + ) diff --git a/src/imagination/.editorconfig b/src/imagination/.editorconfig new file mode 100644 index 00000000000..d98c43ace72 --- /dev/null +++ b/src/imagination/.editorconfig @@ -0,0 +1,2 @@ +[*.{c,h,cpp,hpp,cc,hh}] +max_line_length = 80 diff --git a/src/imagination/common/meson.build b/src/imagination/common/meson.build new file mode 100644 index 00000000000..6948c8da014 --- /dev/null +++ b/src/imagination/common/meson.build @@ -0,0 +1,33 @@ +# Copyright © 2022 Imagination Technologies Ltd. + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +libpowervr_common = static_library( + 'powervr_common', + [ + 'pvr_device_info.c', + ], + include_directories : [ + inc_include, + inc_src, + ], + c_args : [no_override_init_args], + gnu_symbol_visibility : 'hidden', +) diff --git a/src/imagination/common/pvr_device_info.c b/src/imagination/common/pvr_device_info.c new file mode 100644 index 00000000000..342dba46e3d --- /dev/null +++ b/src/imagination/common/pvr_device_info.c @@ -0,0 +1,294 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* TODO: This file is currently hand-maintained. However, the intention is to + * auto-generate it in the future based on the hwdefs. + */ + +#include "assert.h" +#include "errno.h" +#include "pvr_device_info.h" + +const struct pvr_device_ident pvr_device_ident_4_V_2_51 = { + .device_id = 0x6250, + .series_name = "Rogue", + .public_name = "GX6250", +}; + +const struct pvr_device_features pvr_device_features_4_V_2_51 = { + .has_astc = true, + .has_cluster_grouping = true, + .has_common_store_size_in_dwords = true, + .has_compute = true, + .has_compute_morton_capable = true, + .has_compute_overlap = true, + .has_eight_output_registers = true, + .has_gs_rta_support = true, + .has_isp_max_tiles_in_flight = true, + .has_isp_samples_per_pixel = true, + .has_max_multisample = true, + .has_max_partitions = true, + .has_max_usc_tasks = true, + .has_num_clusters = true, + .has_num_raster_pipes = true, + .has_num_user_clip_planes = true, + .has_robust_buffer_access = true, + .has_slc_cache_line_size_bits = true, + .has_slc_mcu_cache_controls = true, + .has_tile_size_x = true, + .has_tile_size_y = true, + .has_tpu_array_textures = true, + .has_tpu_extended_integer_lookup = true, + .has_tpu_image_state_v2 = true, + .has_usc_f16sop_u8 = true, + .has_usc_min_output_registers_per_pix = true, + .has_uvs_banks = true, + .has_uvs_pba_entries = true, + .has_uvs_vtx_entries = true, + .has_vdm_cam_size = true, + .has_xt_top_infrastructure = true, + .has_zls_subtile = true, + + .common_store_size_in_dwords = 1280U * 4U * 4U, + .isp_max_tiles_in_flight = 4U, + .isp_samples_per_pixel = 2U, + .max_multisample = 8U, + .max_partitions = 8U, + .max_usc_tasks = 56U, + .num_clusters = 2U, + .num_raster_pipes = 1U, + .num_user_clip_planes = 8U, + .slc_cache_line_size_bits = 512U, + .tile_size_x = 32U, + .tile_size_y = 32U, + .usc_min_output_registers_per_pix = 2U, + .uvs_banks = 8U, + .uvs_pba_entries = 320U, + .uvs_vtx_entries = 288U, + .vdm_cam_size = 256U, +}; + +const struct pvr_device_enhancements pvr_device_enhancements_4_40_2_51 = { + .has_ern35421 = true, + .has_ern38020 = true, + .has_ern38748 = true, + .has_ern42307 = true, +}; + +const struct pvr_device_quirks pvr_device_quirks_4_40_2_51 = { + .has_brn44079 = true, + .has_brn47727 = true, + .has_brn48492 = true, + .has_brn48545 = true, + .has_brn49032 = true, + .has_brn51210 = true, + .has_brn51764 = true, + .has_brn52354 = true, + .has_brn52942 = true, + .has_brn56279 = true, + .has_brn58839 = true, + .has_brn62269 = true, + .has_brn66011 = true, + .has_brn70165 = true, +}; + +const struct pvr_device_ident pvr_device_ident_33_V_11_3 = { + .device_id = 0x33011003, + .series_name = "A-Series", + .public_name = "AXE-1-16M", +}; + +const struct pvr_device_features pvr_device_features_33_V_11_3 = { + .has_common_store_size_in_dwords = true, + .has_compute = true, + .has_isp_max_tiles_in_flight = true, + .has_isp_samples_per_pixel = true, + .has_max_multisample = true, + .has_max_partitions = true, + .has_max_usc_tasks = true, + .has_num_clusters = true, + .has_num_raster_pipes = true, + .has_num_user_clip_planes = true, + .has_roguexe = true, + .has_screen_size8K = true, + .has_simple_internal_parameter_format = true, + .has_simple_internal_parameter_format_v2 = true, + .has_simple_parameter_format_version = true, + .has_slc_cache_line_size_bits = true, + .has_tile_size_x = true, + .has_tile_size_y = true, + .has_tile_size_16x16 = true, + .has_tpu_extended_integer_lookup = true, + .has_tpu_image_state_v2 = true, + .has_usc_f16sop_u8 = true, + .has_usc_min_output_registers_per_pix = true, + .has_usc_pixel_partition_mask = true, + .has_uvs_banks = true, + .has_uvs_pba_entries = true, + .has_uvs_vtx_entries = true, + .has_vdm_cam_size = true, + + .common_store_size_in_dwords = 512U * 4U * 4U, + .isp_max_tiles_in_flight = 1U, + .isp_samples_per_pixel = 1U, + .max_multisample = 4U, + .max_partitions = 4U, + .max_usc_tasks = 24U, + .num_clusters = 1U, + .num_raster_pipes = 1U, + .num_user_clip_planes = 8U, + .simple_parameter_format_version = 2U, + .slc_cache_line_size_bits = 512U, + .tile_size_x = 16U, + .tile_size_y = 16U, + .usc_min_output_registers_per_pix = 1U, + .uvs_banks = 2U, + .uvs_pba_entries = 320U, + .uvs_vtx_entries = 288U, + .vdm_cam_size = 32U, + + .has_s8xe = true, +}; + +const struct pvr_device_enhancements pvr_device_enhancements_33_15_11_3 = { + .has_ern35421 = true, + .has_ern38748 = true, + .has_ern42307 = true, + .has_ern45493 = true, +}; + +const struct pvr_device_quirks pvr_device_quirks_33_15_11_3 = { + .has_brn70165 = true, +}; + +const struct pvr_device_ident pvr_device_ident_36_V_104_796 = { + .device_id = 0x36104796, + .series_name = "B-Series", + .public_name = "BXS-4-64", +}; + +const struct pvr_device_features pvr_device_features_36_V_104_796 = { + .has_astc = true, + .has_common_store_size_in_dwords = true, + .has_compute = true, + .has_compute_overlap = true, + .has_gpu_multicore_support = true, + .has_gs_rta_support = true, + .has_isp_max_tiles_in_flight = true, + .has_isp_samples_per_pixel = true, + .has_max_multisample = true, + .has_max_partitions = true, + .has_max_usc_tasks = true, + .has_num_clusters = true, + .has_num_raster_pipes = true, + .has_num_user_clip_planes = true, + .has_paired_tiles = true, + .has_pds_ddmadt = true, + .has_roguexe = true, + .has_screen_size8K = true, + .has_simple_internal_parameter_format = true, + .has_simple_internal_parameter_format_v2 = true, + .has_simple_parameter_format_version = true, + .has_slc_cache_line_size_bits = true, + .has_tile_size_x = true, + .has_tile_size_y = true, + .has_tile_size_16x16 = true, + .has_tpu_extended_integer_lookup = true, + .has_tpu_image_state_v2 = true, + .has_usc_f16sop_u8 = true, + .has_usc_min_output_registers_per_pix = true, + .has_usc_pixel_partition_mask = true, + .has_uvs_banks = true, + .has_uvs_pba_entries = true, + .has_uvs_vtx_entries = true, + .has_vdm_cam_size = true, + .has_xpu_max_slaves = true, + + .common_store_size_in_dwords = 1344U * 4U * 4U, + .isp_max_tiles_in_flight = 6U, + .isp_samples_per_pixel = 4U, + .max_multisample = 4U, + .max_partitions = 16U, + .max_usc_tasks = 156U, + .num_clusters = 1U, + .num_raster_pipes = 1U, + .num_user_clip_planes = 8U, + .simple_parameter_format_version = 2U, + .slc_cache_line_size_bits = 512U, + .tile_size_x = 16U, + .tile_size_y = 16U, + .usc_min_output_registers_per_pix = 2U, + .uvs_banks = 8U, + .uvs_pba_entries = 160U, + .uvs_vtx_entries = 144U, + .vdm_cam_size = 64U, + .xpu_max_slaves = 3U, + + .has_s8xe = true, +}; + +const struct pvr_device_enhancements pvr_device_enhancements_36_53_104_796 = { + .has_ern35421 = true, + .has_ern38748 = true, + .has_ern42307 = true, + .has_ern45493 = true, +}; + +const struct pvr_device_quirks pvr_device_quirks_36_53_104_796 = { + .has_brn44079 = true, + .has_brn70165 = true, +}; + +/** + * Initialize PowerVR device information. + * + * \param info Device info structure to initialize. + * \param bvnc Packed BVNC. + * \return + * * 0 on success, or + * * -%ENODEV if the device is not supported. + */ +int pvr_device_info_init(struct pvr_device_info *info, uint64_t bvnc) +{ +#define CASE_PACKED_BVNC_DEVICE_INFO(_b, _v, _n, _c) \ + case PVR_BVNC_PACK(_b, _v, _n, _c): \ + info->ident = pvr_device_ident_##_b##_V_##_n##_##_c; \ + info->ident.b = _b; \ + info->ident.n = _n; \ + info->ident.v = _v; \ + info->ident.c = _c; \ + info->features = pvr_device_features_##_b##_V_##_n##_##_c; \ + info->enhancements = pvr_device_enhancements_##_b##_##_v##_##_n##_##_c; \ + info->quirks = pvr_device_quirks_##_b##_##_v##_##_n##_##_c; \ + return 0 + + switch (bvnc) { + CASE_PACKED_BVNC_DEVICE_INFO(4, 40, 2, 51); + } + +#undef CASE_PACKED_BVNC_DEVICE_INFO + + assert(!"Unsupported Device"); + + return -ENODEV; +} diff --git a/src/imagination/common/pvr_device_info.h b/src/imagination/common/pvr_device_info.h new file mode 100644 index 00000000000..c192d58d0a5 --- /dev/null +++ b/src/imagination/common/pvr_device_info.h @@ -0,0 +1,381 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DEVICE_INFO_H +#define PVR_DEVICE_INFO_H + +/* TODO: This file is currently hand-maintained. However, the intention is to + * auto-generate it in the future based on the hwdefs. + */ + +#include +#include +#include +#include + +#include "util/log.h" +#include "util/macros.h" + +#define PVR_BVNC_PACK_SHIFT_B 48 +#define PVR_BVNC_PACK_SHIFT_V 32 +#define PVR_BVNC_PACK_SHIFT_N 16 +#define PVR_BVNC_PACK_SHIFT_C 0 + +#define PVR_BVNC_PACK_MASK_B UINT64_C(0xFFFF000000000000) +#define PVR_BVNC_PACK_MASK_V UINT64_C(0x0000FFFF00000000) +#define PVR_BVNC_PACK_MASK_N UINT64_C(0x00000000FFFF0000) +#define PVR_BVNC_PACK_MASK_C UINT64_C(0x000000000000FFFF) + +/** + * Packs B, V, N and C values into a 64-bit unsigned integer. + * + * The packed layout is as follows: + * + * \verbatim + * +--------+--------+--------+-------+ + * | 63..48 | 47..32 | 31..16 | 15..0 | + * +========+========+========+=======+ + * | B | V | N | C | + * +--------+--------+--------+-------+ + * \endverbatim + * + * #pvr_get_packed_bvnc() should be used instead of this macro when a + * #pvr_device_information is available in order to ensure proper type checking. + * + * \param b Branch ID. + * \param v Version ID. + * \param n Number of scalable units. + * \param c Config ID. + * \return Packed BVNC. + * + * \sa #pvr_get_packed_bvnc(), #PVR_BVNC_UNPACK_B(), #PVR_BVNC_UNPACK_V(), + * #PVR_BVNC_UNPACK_N() and #PVR_BVNC_UNPACK_C() + */ +#define PVR_BVNC_PACK(b, v, n, c) \ + ((((uint64_t)(b) << PVR_BVNC_PACK_SHIFT_B) & PVR_BVNC_PACK_MASK_B) | \ + (((uint64_t)(v) << PVR_BVNC_PACK_SHIFT_V) & PVR_BVNC_PACK_MASK_V) | \ + (((uint64_t)(n) << PVR_BVNC_PACK_SHIFT_N) & PVR_BVNC_PACK_MASK_N) | \ + (((uint64_t)(c) << PVR_BVNC_PACK_SHIFT_C) & PVR_BVNC_PACK_MASK_C)) + +/** + * Unpacks B value (branch ID) from packed BVNC. + * + * \param bvnc Packed BVNC. + * \return Branch ID. + * + * \sa #PVR_BVNC_UNPACK_V(), #PVR_BVNC_UNPACK_N(), #PVR_BVNC_UNPACK_C(), + * #pvr_get_packed_bvnc() and #PVR_BVNC_PACK() + */ +#define PVR_BVNC_UNPACK_B(bvnc) \ + ((uint16_t)(((bvnc)&PVR_BVNC_PACK_MASK_B) >> PVR_BVNC_PACK_SHIFT_B)) + +/** + * Unpacks V value (version ID) from packed BVNC. + * + * \param bvnc Packed BVNC. + * \return Version ID. + * + * \sa #PVR_BVNC_UNPACK_B(), #PVR_BVNC_UNPACK_N(), #PVR_BVNC_UNPACK_C(), + * #pvr_get_packed_bvnc() and #PVR_BVNC_PACK() + */ +#define PVR_BVNC_UNPACK_V(bvnc) \ + ((uint16_t)(((bvnc)&PVR_BVNC_PACK_MASK_V) >> PVR_BVNC_PACK_SHIFT_V)) + +/** + * Unpacks N value (number of scalable units) from packed BVNC. + * + * \param bvnc Packed BVNC. + * \return Number of scalable units. + * + * \sa #PVR_BVNC_UNPACK_B(), #PVR_BVNC_UNPACK_V(), #PVR_BVNC_UNPACK_C(), + * #pvr_get_packed_bvnc() and #PVR_BVNC_PACK() + */ +#define PVR_BVNC_UNPACK_N(bvnc) \ + ((uint16_t)(((bvnc)&PVR_BVNC_PACK_MASK_N) >> PVR_BVNC_PACK_SHIFT_N)) + +/** + * Unpacks C value (config ID) from packed BVNC. + * + * \param bvnc Packed BVNC. + * \return Config ID. + * + * \sa #PVR_BVNC_UNPACK_B(), #PVR_BVNC_UNPACK_V(), #PVR_BVNC_UNPACK_N(), + * #pvr_get_packed_bvnc() and #PVR_BVNC_PACK() + */ +#define PVR_BVNC_UNPACK_C(bvnc) \ + ((uint16_t)(((bvnc)&PVR_BVNC_PACK_MASK_C) >> PVR_BVNC_PACK_SHIFT_C)) + +/** + * Tests whether a physical device has a given feature. + * + * Feature names are derived from those found in #pvr_device_features by + * dropping the 'has_' prefix, which is applied by this macro. + * + * \param dev_info #pvr_device_info object associated with the target physical + * device. + * \param feature Device feature name. + * + * \return + * * true if the named feature is present in the hardware. + * * false if the named feature is not present in the hardware. + * + * \sa #PVR_FEATURE_VALUE() and #PVR_GET_FEATURE_VALUE() + */ +#define PVR_HAS_FEATURE(dev_info, feature) ((dev_info)->features.has_##feature) + +/** + * Gets a physical device feature value if feature is supported. + * + * Feature names are derived from those found in #pvr_device_features by + * dropping the 'has_' prefix. + * + * This macro should be used in preference to #PVR_GET_FEATURE_VALUE() as it has + * proper error handling. + * + * \param dev_info #pvr_device_info object associated with the target physical + * device. + * \param feature Feature name. + * \param value_out Feature value. + * + * \return + * * 0 on success, or + * * -%EINVAL if the named feature is not present in the hardware. + * + * \sa #PVR_HAS_FEATURE() and #PVR_GET_FEATURE_VALUE() + */ +#define PVR_FEATURE_VALUE(dev_info, feature, value_out) \ + ({ \ + const struct pvr_device_info *__dev_info = dev_info; \ + int __ret = -EINVAL; \ + if (__dev_info->features.has_##feature) { \ + *(value_out) = __dev_info->features.feature; \ + __ret = 0; \ + } \ + __ret; \ + }) + +/** + * Gets a physical device feature value if supported, but otherwise returns a + * default value. + * + * Feature names are derived from those found in #pvr_device_features by + * dropping the 'has_' prefix. + * + * #PVR_FEATURE_VALUE() should be used in preference to this macro when errors + * can be returned by the caller. This macro is intended for cases where errors + * can't be returned. + * + * \param dev_info #pvr_device_info object associated with the target + * physical device. + * \param feature Feature name. + * \param default_value Default feature value. + * + * \return Feature value. + * + * \sa #PVR_HAS_FEATURE() and #PVR_FEATURE_VALUE() + */ +#define PVR_GET_FEATURE_VALUE(dev_info, feature, default_value) \ + ({ \ + const struct pvr_device_info *__dev_info = dev_info; \ + __typeof__(default_value) __ret = default_value; \ + if (__dev_info->features.has_##feature) { \ + __ret = __dev_info->features.feature; \ + } else { \ + mesa_logw("Missing " #feature \ + " feature (defaulting to: " #default_value ")"); \ + assert(0); \ + } \ + __ret; \ + }) + +/** + * Tests whether a physical device has a given enhancement. + * + * Enhancement numbers are derived from those found in #pvr_device_enhancements + * by dropping the 'has_ern' prefix, which is applied by this macro. + * + * \param dev_info #pvr_device_info object associated with the target physical + * device. + * \param number Enhancement number. + * + * \return + * * true if the enhancement is present in the hardware. + * * false if the enhancement is not present in the hardware. + */ +#define PVR_HAS_ERN(dev_info, number) ((dev_info)->enhancements.has_ern##number) + +/** + * Tests whether a physical device has a given quirk. + * + * Quirk numbers are derived from those found in #pvr_device_quirks by + * dropping the 'has_brn' prefix, which is applied by this macro. + * + * \param dev_info #pvr_device_info object associated with the target physical + * device. + * \param number Quirk number. + * + * \return + * * true if the quirk is present in the hardware. + * * false if the quirk is not present in the hardware. + */ +#define PVR_HAS_QUIRK(dev_info, number) ((dev_info)->quirks.has_brn##number) + +struct pvr_device_ident { + uint16_t b, v, n, c; + uint32_t device_id; + const char *series_name; + const char *public_name; +}; + +struct pvr_device_features { + bool has_astc : 1; + bool has_cluster_grouping : 1; + bool has_common_store_size_in_dwords : 1; + bool has_compute : 1; + bool has_compute_morton_capable : 1; + bool has_compute_overlap : 1; + bool has_eight_output_registers : 1; + bool has_gpu_multicore_support : 1; + bool has_gs_rta_support : 1; + bool has_isp_max_tiles_in_flight : 1; + bool has_isp_samples_per_pixel : 1; + bool has_max_multisample : 1; + bool has_max_partitions : 1; + bool has_max_usc_tasks : 1; + bool has_num_clusters : 1; + bool has_num_raster_pipes : 1; + bool has_num_user_clip_planes : 1; + bool has_paired_tiles : 1; + bool has_pds_ddmadt : 1; + bool has_robust_buffer_access : 1; + bool has_roguexe : 1; + bool has_screen_size8K : 1; + bool has_simple_internal_parameter_format : 1; + bool has_simple_internal_parameter_format_v2 : 1; + bool has_simple_parameter_format_version : 1; + bool has_slc_cache_line_size_bits : 1; + bool has_slc_mcu_cache_controls : 1; + bool has_tile_size_x : 1; + bool has_tile_size_y : 1; + bool has_tile_size_16x16 : 1; + bool has_tpu_array_textures : 1; + bool has_tpu_extended_integer_lookup : 1; + bool has_tpu_image_state_v2 : 1; + bool has_usc_f16sop_u8 : 1; + bool has_usc_min_output_registers_per_pix : 1; + bool has_usc_pixel_partition_mask : 1; + bool has_uvs_banks : 1; + bool has_uvs_pba_entries : 1; + bool has_uvs_vtx_entries : 1; + bool has_vdm_cam_size : 1; + bool has_xpu_max_slaves : 1; + bool has_xt_top_infrastructure : 1; + bool has_zls_subtile : 1; + + uint32_t common_store_size_in_dwords; + uint32_t isp_max_tiles_in_flight; + uint32_t isp_samples_per_pixel; + uint32_t max_multisample; + uint32_t max_partitions; + uint32_t max_usc_tasks; + uint32_t num_clusters; + uint32_t num_raster_pipes; + uint32_t num_user_clip_planes; + uint32_t simple_parameter_format_version; + uint32_t slc_cache_line_size_bits; + uint32_t tile_size_x; + uint32_t tile_size_y; + uint32_t usc_min_output_registers_per_pix; + uint32_t uvs_banks; + uint32_t uvs_pba_entries; + uint32_t uvs_vtx_entries; + uint32_t vdm_cam_size; + uint32_t xpu_max_slaves; + + /* Derived features. */ + bool has_s8xe : 1; +}; + +struct pvr_device_enhancements { + bool has_ern35421 : 1; + bool has_ern38020 : 1; + bool has_ern38748 : 1; + bool has_ern42307 : 1; + bool has_ern45493 : 1; +}; + +struct pvr_device_quirks { + bool has_brn44079 : 1; + bool has_brn47727 : 1; + bool has_brn48492 : 1; + bool has_brn48545 : 1; + bool has_brn49032 : 1; + bool has_brn51210 : 1; + bool has_brn51764 : 1; + bool has_brn52354 : 1; + bool has_brn52942 : 1; + bool has_brn56279 : 1; + bool has_brn58839 : 1; + bool has_brn62269 : 1; + bool has_brn66011 : 1; + bool has_brn70165 : 1; +}; + +struct pvr_device_info { + struct pvr_device_ident ident; + struct pvr_device_features features; + struct pvr_device_enhancements enhancements; + struct pvr_device_quirks quirks; +}; + +/** + * Packs B, V, N and C values into a 64-bit unsigned integer. + * + * The packed layout is as follows: + * + * \verbatim + * +--------+--------+--------+-------+ + * | 63..48 | 47..32 | 31..16 | 15..0 | + * +========+========+========+=======+ + * | B | V | N | C | + * +--------+--------+--------+-------+ + * \endverbatim + * + * This should be used in preference to #PVR_BVNC_PACK() when a + * #pvr_device_info is available in order to ensure proper type checking. + * + * \param dev_info Device information. + * \return Packed BVNC. + */ +static ALWAYS_INLINE uint64_t +pvr_get_packed_bvnc(const struct pvr_device_info *dev_info) +{ + return PVR_BVNC_PACK(dev_info->ident.b, + dev_info->ident.v, + dev_info->ident.n, + dev_info->ident.c); +} + +int pvr_device_info_init(struct pvr_device_info *info, uint64_t bvnc); + +#endif /* PVR_DEVICE_INFO_H */ diff --git a/src/imagination/csbgen/gen_pack_header.py b/src/imagination/csbgen/gen_pack_header.py new file mode 100644 index 00000000000..e6df6c82b75 --- /dev/null +++ b/src/imagination/csbgen/gen_pack_header.py @@ -0,0 +1,934 @@ +# encoding=utf-8 + +# Copyright © 2022 Imagination Technologies Ltd. + +# based on anv driver gen_pack_header.py which is: +# Copyright © 2016 Intel Corporation + +# based on v3dv driver gen_pack_header.py which is: +# Copyright (C) 2016 Broadcom + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import argparse +import ast +import xml.parsers.expat +import re +import sys +import copy +import os +import textwrap + +license = """/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */""" + +pack_header = """%(license)s + +/* Enums, structures and pack functions for %(platform)s. + * + * This file has been generated, do not hand edit. + */ + +#ifndef %(guard)s +#define %(guard)s + +#include "csbgen/pvr_packet_helpers.h" + +""" + +def safe_name(name): + if not name[0].isalpha(): + name = '_' + name + + return name + +def num_from_str(num_str): + if num_str.lower().startswith('0x'): + return int(num_str, base=16) + + if num_str.startswith('0') and len(num_str) > 1: + raise ValueError('Octal numbers not allowed') + + return int(num_str) + +class Node: + def __init__(self, parent, name, name_is_safe = False): + self.parent = parent + if name_is_safe: + self.name = name + else: + self.name = safe_name(name) + + @property + def full_name(self): + if self.name[0] == '_': + return self.parent.prefix + self.name.upper() + + return self.parent.prefix + "_" + self.name.upper() + + @property + def prefix(self): + return self.parent.prefix + +class Csbgen(Node): + def __init__(self, name, prefix, filename): + super().__init__(None, name.upper()) + self.prefix_field = safe_name(prefix.upper()) + self.filename = filename + + self._defines = [] + self._enums = {} + self._structs = {} + + @property + def full_name(self): + return self.name + "_" + self.prefix_field + + @property + def prefix(self): + return self.full_name + + def add(self, element): + if isinstance(element, Enum): + if element.name in self._enums: + raise RuntimeError('Enum redefined. Enum: %s' % element.name) + + self._enums[element.name] = element + elif isinstance(element, Struct): + if element.name in self._structs: + raise RuntimeError('Struct redefined. Struct: %s' % element.name) + + self._structs[element.name] = element + elif isinstance(element, Define): + define_names = map(lambda d: d.full_name, self._defines) + if element.full_name in define_names: + raise RuntimeError('Define redefined. Define: %s' % element.full_name) + + self._defines.append(element) + else: + raise RuntimeError('Element "%s" cannot be nested in csbgen.' % + type(element).__name__) + + def _gen_guard(self): + return os.path.basename(self.filename).replace('.xml', '_h').upper() + + def emit(self): + print(pack_header % {'license': license, + 'platform': self.name, + 'guard': self._gen_guard()}) + + for define in self._defines: + define.emit(self) + + print() + + for enum in self._enums.values(): + enum.emit(self) + + for struct in self._structs.values(): + struct.emit(self) + + print('#endif /* %s */' % self._gen_guard()) + + def is_known_struct(self, struct_name): + return struct_name in self._structs.keys() + + def is_known_enum(self, enum_name): + return enum_name in self._enums.keys() + + def get_enum(self, enum_name): + return self._enums[enum_name] + +class Enum(Node): + def __init__(self, parent, name): + super().__init__(parent, name) + + self._values = {} + + self.parent.add(self) + + # We override prefix so that the values will contain the enum's name too. + @property + def prefix(self): + return self.full_name + + def get_value(self, value_name): + return self._values[value_name] + + def add(self, element): + if not isinstance(element, Value): + raise RuntimeError('Element cannot be nested in enum. ' + + 'Element Type: %s, Enum: %s' % + (type(element).__name__, self.full_name)) + + if element.name in self._values: + raise RuntimeError('Value is being redefined. Value: "%s"' % element.name) + + self._values[element.name] = element + + def emit(self, root): + # This check is invalid if tags other than Value can be nested within an enum. + if not self._values.values(): + raise RuntimeError('Enum definition is empty. Enum: "%s"' % self.full_name) + + print('enum %s {' % self.full_name) + for value in self._values.values(): + value.emit() + print('};\n') + +class Value(Node): + def __init__(self, parent, name, value): + super().__init__(parent, name) + + self.value = int(value) + + self.parent.add(self) + + def emit(self): + print(' %-36s = %6d,' % (self.full_name, self.value)) + +class Struct(Node): + def __init__(self, parent, name, length): + super().__init__(parent, name) + + self.length = int(length) + self.size = self.length * 32 + + if self.length <= 0: + raise ValueError('Struct length must be greater than 0. ' + + 'Struct: "%s".' % self.full_name) + + self._children = {} + + self.parent.add(self) + + @property + def fields(self): + # TODO: Should we cache? See TODO in equivalent Condition getter. + + fields = [] + for child in self._children.values(): + if isinstance(child, Condition): + fields += child.fields + else: + fields.append(child) + + return fields + + @property + def prefix(self): + return self.full_name + + def add(self, element): + # We don't support conditions and field having the same name. + if isinstance(element, Field): + if element.name in self._children.keys(): + raise ValueError('Field is being redefined. ' + + 'Field: "%s", Struct: "%s"' % + (element.name, self.full_name)) + + self._children[element.name] = element + + elif isinstance(element, Condition): + # We only save ifs, and ignore the rest. The rest will be linked to + # the if condition so we just need to call emit() on the if and the + # rest will also be emitted. + if element.type == 'if': + self._children[element.name] = element + else: + if element.name not in self._children.keys(): + raise RuntimeError('Unknown condition: "%s"' % element.name) + + else: + raise RuntimeError('Element cannot be nested in struct. ' + + 'Element Type: %s, Struct: %s' % + (type(element).__name__, self.full_name)) + + def _emit_header(self, root): + fields = filter(lambda f: hasattr(f, 'default'), self.fields) + + default_fields = [] + for field in fields: + if field.is_builtin_type: + default_fields.append(" .%-35s = %6d" % + (field.name, field.default)) + else: + if not root.is_known_enum(field.type): + # Default values should not apply to structures + raise RuntimeError('Unknown type. Field: "%s" Type: "%s"' % + (field.name, field.type)) + + enum = root.get_enum(field.type) + + try: + value = enum.get_value(field.default) + except KeyError: + raise ValueError('Unknown enum value. ' + + 'Value: "%s", Enum: "%s", Field: "%s"' % + (field.default, enum.full_name, field.name)) + + default_fields.append(" .%-35s = %s" % + (field.name, value.full_name)) + + print('#define %-40s\\' % (self.full_name + '_header')) + print(", \\\n".join(default_fields)) + print('') + + def _emit_helper_macros(self, root): + fields_with_defines = filter(lambda f: f.defines, self.fields) + + for field in fields_with_defines: + print("/* Helper macros for %s */" % (field.name)) + + for define in field.defines: + define.emit(root) + + print() + + def _emit_pack_function(self, root): + print(textwrap.dedent("""\ + static inline __attribute__((always_inline)) void + %s_pack(__attribute__((unused)) void * restrict dst, + %s__attribute__((unused)) const struct %s * restrict values) + {""") % (self.full_name, ' ' * len(self.full_name), self.full_name)) + + group = Group(0, 1, self.size, self.fields) + (dwords, length) = group.collect_dwords_and_length() + if length: + # Cast dst to make header C++ friendly + print(" uint32_t * restrict dw = (uint32_t * restrict) dst;") + + group.emit_pack_function(root, dwords, length) + + print("}\n") + + + def emit(self, root): + print('#define %-33s %6d' % (self.full_name + "_length", self.length)) + + self._emit_header(root) + + self._emit_helper_macros(root) + + print("struct %s {" % self.full_name) + for child in self._children.values(): + child.emit(root) + print("};\n") + + self._emit_pack_function(root) + +class Field(Node): + def __init__(self, parent, name, start, end, type, default=None, shift=None): + super().__init__(parent, name) + + self.start = int(start) + self.end = int(end) + self.type = type + + self._defines = {} + + self.parent.add(self) + + if self.start > self.end: + raise ValueError('Start cannot be after end. ' + + 'Start: %d, End: %d, Field: "%s"' % + (self.start, self.end, self.name)) + + if self.type == 'bool' and self.end != self.start: + raise ValueError('Bool field can only be 1 bit long. ' + + 'Field "%s"' % self.name) + + if default is not None: + if not self.is_builtin_type: + # Assuming it's an enum type. + self.default = safe_name(default) + else: + self.default = num_from_str(default) + + if shift is not None: + if self.type != 'address': + raise RuntimeError('Only address fields can have a shift ' + + 'attribute. Field: "%s"' % self.name) + + self.shift = int(shift) + + Define(self, "ALIGNMENT", 2 ** self.shift) + else: + if self.type == 'address': + raise RuntimeError('Field of address type ' + + 'requires a shift attribute. Field "%s"' % + self.name) + + @property + def defines(self): + return self._defines.values() + + # We override prefix so that the defines will contain the field's name too. + @property + def prefix(self): + return self.full_name + + @property + def is_builtin_type(self): + builtins = {'address', 'bool', 'float', 'mbo', 'offset', 'int', 'uint'} + return self.type in builtins + + def _get_c_type(self, root): + if self.type == 'address': + return '__pvr_address_type' + elif self.type == 'bool': + return 'bool' + elif self.type == 'float': + return 'float' + elif self.type == 'offset': + return 'uint64_t' + elif self.type == 'int': + return 'int32_t' + elif self.type == 'uint': + if self.end - self.start < 32: + return 'uint32_t' + elif self.end - self.self < 64: + return 'uint64_t' + + raise RuntimeError('No known C type found to hold %d bit sized value. ' + + 'Field: "%s"' % + (self.end - self.start, self.name)) + elif root.is_known_struct(self.type): + return 'struct ' + self.type + elif root.is_known_enum(self.type): + return 'enum ' + root.get_enum(self.type).full_name + raise RuntimeError('Unknown type. Type: "%s", Field: "%s"' % + (self.type, self.name)) + + def add(self, element): + if self.type == 'mbo': + raise RuntimeError('No element can be nested in an mbo field. ' + + 'Element Type: %s, Field: %s' % + (type(element).__name__, self.name)) + + if isinstance(element, Define): + if element.name in self._defines: + raise RuntimeError('Duplicate define. Define: "%s"' % + element.name) + + self._defines[element.name] = element + else: + raise RuntimeError('Element cannot be nested in a field. ' + + 'Element Type: %s, Field: %s' % + (type(element).__name__, self.name)) + + def emit(self, root): + if self.type == 'mbo': + return + + print(" %-36s %s;" % (self._get_c_type(root), self.name)) + +class Define(Node): + def __init__(self, parent, name, value): + super().__init__(parent, name) + + self.value = value + + self.parent.add(self) + + def emit(self, root): + print("#define %-40s %d" % (self.full_name, self.value)) + +class Condition(Node): + def __init__(self, parent, name, type): + super().__init__(parent, name, name_is_safe = True) + + self.type = type + if not Condition._is_valid_type(self.type): + raise RuntimeError('Unknown type: "%s"' % self.name) + + self._children = {} + + # This is the link to the next branch for the if statement so either + # elif, else, or endif. They themselves will also have a link to the + # next branch up until endif which terminates the chain. + self._child_branch = None + + self.parent.add(self) + + @property + def fields(self): + # TODO: Should we use some kind of state to indicate the all of the + # child nodes have been added and then cache the fields in here on the + # first call so that we don't have to traverse them again per each call? + # The state could be changed wither when we reach the endif and pop from + # the context, or when we start emitting. + + fields = [] + + for child in self._children.values(): + if isinstance(child, Condition): + fields += child.fields + else: + fields.append(child) + + if self._child_branch is not None: + fields += self._child_branch.fields + + return fields + + def _is_valid_type(type): + types = {'if', 'elif', 'else', 'endif'} + return type in types + + def _is_compatible_child_branch(self, branch): + types = ['if', 'elif', 'else', 'endif'] + idx = types.index(self.type) + return (branch.type in types[idx + 1:] or + self.type == 'elif' and branch.type == 'elif') + + def _add_branch(self, branch): + if branch.type == 'elif' and branch.name == self.name: + raise RuntimeError('Elif branch cannot have same check as previous branch. ' + + 'Check: "%s"' % (branch.name)) + + if not self._is_compatible_child_branch(branch): + raise RuntimeError('Invalid branch. Check: "%s", Type: "%s"' % + (branch.name, branch.type)) + + self._child_branch = branch + + # Returns the name of the if condition. This is used for elif branches since + # they have a different name than the if condition thus we have to traverse + # the chain of branches. + # This is used to discriminate nested if conditions from branches since + # branches like 'endif' and 'else' will have the same name as the 'if' (the + # elif is an exception) while nested conditions will have different names. + # + # TODO: Redo this to improve speed? Would caching this be helpful? We could + # just save the name of the if instead of having to walk towards it whenever + # a new condition is being added. + def _top_branch_name(self): + if self.type == 'if': + return self.name + + return self.parent._top_branch_name() + + def add(self, element): + if isinstance(element, Field): + if element.name in self._children.keys(): + raise ValueError('Duplicate field. Field: "%s"' % element.name) + + self._children[element.name] = element + elif isinstance(element, Condition): + if element.type == 'elif' or self._top_branch_name() == element.name: + self._add_branch(element) + else: + if element.type != 'if': + raise RuntimeError('Branch of an unopened if condition. ' + + 'Check: "%s", Type: "%s".' % (element.name, element.type)) + + # This is a nested condition and we made sure that the name + # doesn't match _top_branch_name() so we can recognize the else + # and endif. + # We recognized the elif by its type however its name differs + # from the if condition thus when we add an if condition with + # the same name as the elif nested in it, the _top_branch_name() + # check doesn't hold true as the name matched the elif and not + # the if statement which the elif was a branch of, thus the + # nested if condition is not recognized as an invalid branch of + # the outer if statement. + # Sample: + # + # + # + # + # + # + # + # We fix this by checking the if condition name against its + # parent. + if element.name == self.name: + raise RuntimeError('Invalid if condition. Check: "%s"' % + element.name) + + self._children[element.name] = element + else: + raise RuntimeError('Element cannot be nested in a condition. ' + + 'Element Type: %s, Check: %s' % + (type(element).__name__, self.name)) + + def emit(self, root): + if self.type == "if": + print("/* if %s is supported use: */" % (self.name)) + elif self.type == "elif": + print("/* else if %s is supported use: */" % (self.name)) + elif self.type == "else": + print("/* else %s is not-supported use: */" % (self.name)) + elif self.type == "endif": + print("/* endif %s */" % (self.name)) + return + else: + raise RuntimeError('Unknown condition type. Implementation error.') + + for child in self._children.values(): + child.emit(root) + + self._child_branch.emit(root) + +class Group(object): + def __init__(self, start, count, size, fields): + self.start = start + self.count = count + self.size = size + self.fields = fields + + class DWord: + def __init__(self): + self.size = 32 + self.fields = [] + self.addresses = [] + + def collect_dwords(self, dwords, start, dim): + for field in self.fields: + index = (start + field.start) // 32 + if index not in dwords: + dwords[index] = self.DWord() + + clone = copy.copy(field) + clone.start = clone.start + start + clone.end = clone.end + start + clone.dim = dim + dwords[index].fields.append(clone) + + if field.type == "address": + # assert dwords[index].address == None + dwords[index].addresses.append(clone) + + # Coalesce all the dwords covered by this field. The two cases we + # handle are where multiple fields are in a 64 bit word (typically + # and address and a few bits) or where a single struct field + # completely covers multiple dwords. + while index < (start + field.end) // 32: + if index + 1 in dwords and \ + not dwords[index] == dwords[index + 1]: + dwords[index].fields.extend(dwords[index + 1].fields) + dwords[index].addresses.extend(dwords[index + 1].addresses) + dwords[index].size = 64 + dwords[index + 1] = dwords[index] + index = index + 1 + + def collect_dwords_and_length(self): + dwords = {} + self.collect_dwords(dwords, 0, "") + + # Determine number of dwords in this group. If we have a size, use + # that, since that'll account for MBZ dwords at the end of a group + # (like dword 8 on BDW+ 3DSTATE_HS). Otherwise, use the largest dword + # index we've seen plus one. + if self.size > 0: + length = self.size // 32 + elif dwords: + length = max(dwords.keys()) + 1 + else: + length = 0 + + return (dwords, length) + + def emit_pack_function(self, root, dwords, length): + for index in range(length): + # Handle MBZ dwords + if index not in dwords: + print("") + print(" dw[%d] = 0;" % index) + continue + + # For 64 bit dwords, we aliased the two dword entries in the dword + # dict it occupies. Now that we're emitting the pack function, + # skip the duplicate entries. + dw = dwords[index] + if index > 0 and index - 1 in dwords and dw == dwords[index - 1]: + continue + + # Special case: only one field and it's a struct at the beginning + # of the dword. In this case we pack directly into the + # destination. This is the only way we handle embedded structs + # larger than 32 bits. + if len(dw.fields) == 1: + field = dw.fields[0] + name = field.name + field.dim + if root.is_known_struct(field.type) and field.start % 32 == 0: + print("") + print(" %s_pack(data, &dw[%d], &values->%s);" % + (self.parser.gen_prefix(safe_name(field.type)), + index, name)) + continue + + # Pack any fields of struct type first so we have integer values + # to the dword for those fields. + field_index = 0 + for field in dw.fields: + if isinstance(field, Field) and root.is_known_struct(field.type): + name = field.name + field.dim + print("") + print(" uint32_t v%d_%d;" % (index, field_index)) + print(" %s_pack(data, &v%d_%d, &values->%s);" % + (self.parser.gen_prefix(safe_name(field.type)), + index, field_index, name)) + field_index = field_index + 1 + + print("") + dword_start = index * 32 + address_count = len(dw.addresses); + + if dw.size == 32 and not dw.addresses: + v = None + print(" dw[%d] =" % index) + elif len(dw.fields) > address_count: + v = "v%d" % index + print(" const uint%d_t %s =" % (dw.size, v)) + else: + v = "0" + + field_index = 0 + non_address_fields = [] + for field in dw.fields: + if field.type != "mbo": + name = field.name + field.dim + + if field.type == "mbo": + non_address_fields.append("__pvr_mbo(%d, %d)" % + (field.start - dword_start, + field.end - dword_start)) + elif field.type == "address": + pass + elif field.type == "uint": + non_address_fields.append("__pvr_uint(values->%s, %d, %d)" % + (name, field.start - dword_start, + field.end - dword_start)) + elif root.is_known_enum(field.type): + non_address_fields.append("__pvr_uint(values->%s, %d, %d)" % + (name, field.start - dword_start, + field.end - dword_start)) + elif field.type == "int": + non_address_fields.append("__pvr_sint(values->%s, %d, %d)" % + (name, field.start - dword_start, + field.end - dword_start)) + elif field.type == "bool": + non_address_fields.append("__pvr_uint(values->%s, %d, %d)" % + (name, field.start - dword_start, + field.end - dword_start)) + elif field.type == "float": + non_address_fields.append("__pvr_float(values->%s)" % name) + elif field.type == "offset": + non_address_fields.append( + "__pvr_offset(values->%s,"" %d, %d)" % + (name, field.start - dword_start, + field.end - dword_start)) + elif field.is_struct_type(): + non_address_fields.append("__pvr_uint(v%d_%d, %d, %d)" % + (index, field_index, + field.start - dword_start, + field.end - dword_start)) + field_index = field_index + 1 + else: + non_address_fields.append("/* unhandled field %s," + " type %s */\n" % + (name, field.type)) + + if non_address_fields: + print(" |\n".join(" " + f for f in non_address_fields) + + ";") + + if dw.size == 32: + for i in range(address_count): + print(" dw[%d] = __pvr_address(" + "values->%s, %d, %d, %d) | %s;" % + (index, dw.addresses[i].name + field.dim, + dw.addresses[i].shift, dw.addresses[i].start - dword_start, + dw.addresses[i].end - dword_start, v)) + continue + + v_accumulated_addr = "" + for i in range(address_count): + v_address = "v%d_address" % i + v_accumulated_addr += "v%d_address" % i + print(" const uint64_t %s =\n " + " __pvr_address(values->%s, %d, %d, %d);" % + (v_address, dw.addresses[i].name + field.dim, dw.addresses[i].shift, + dw.addresses[i].start - dword_start, + dw.addresses[i].end - dword_start)) + if i < (address_count - 1): + v_accumulated_addr += " |\n " + + if dw.addresses: + if len(dw.fields) > address_count: + print(" dw[%d] = %s | %s;" % (index, v_accumulated_addr, v)) + print(" dw[%d] = (%s >> 32) | (%s >> 32);" % + (index + 1, v_accumulated_addr, v)) + continue + else: + v = v_accumulated_addr + + print(" dw[%d] = %s;" % (index, v)) + print(" dw[%d] = %s >> 32;" % (index + 1, v)) + +class Parser(object): + def __init__(self): + self.parser = xml.parsers.expat.ParserCreate() + self.parser.StartElementHandler = self.start_element + self.parser.EndElementHandler = self.end_element + + self.context = [] + + def start_element(self, name, attrs): + if not name == "csbgen": + parent = self.context[-1] + + if name == "csbgen": + if self.context: + raise RuntimeError('Can only have 1 csbgen block and it has ' + + 'to contain all of the other elements.') + + csbgen = Csbgen(attrs["name"], attrs["prefix"], self.filename) + self.context.append(csbgen) + + elif name == "struct": + struct = Struct(parent , attrs["name"], attrs["length"]) + self.context.append(struct) + + elif name == "field": + default = None + if "default" in attrs.keys(): + default = attrs["default"] + + shift = None + if "shift" in attrs.keys(): + shift = attrs["shift"] + + field = Field(parent, + name = attrs["name"], + start = int(attrs["start"]), + end = int(attrs["end"]), + type = attrs["type"], + default = default, + shift = shift) + self.context.append(field) + + elif name == "enum": + enum = Enum(parent, attrs["name"]) + self.context.append(enum) + + elif name == "value": + value = Value(parent, attrs["name"], ast.literal_eval(attrs["value"])) + self.context.append(value) + + elif name == "define": + define = Define(parent, attrs["name"], ast.literal_eval(attrs["value"])) + self.context.append(define) + + elif name == "condition": + condition = Condition(parent, name=attrs["check"], type=attrs["type"]) + + # Starting with the if statement we push it in the context. For each + # branch following (elif, and else) we assign the top of stack as + # its parent, pop() and push the new condition. So per branch we end + # up having [..., struct, condition]. We don't push an endif since + # it's not supposed to have any children and it's supposed to close + # the whole if statement. + + if condition.type != 'if': + # Remove the parent condition from the context. We were peeking + # before, now we pop(). + self.context.pop() + + if condition.type == 'endif': + if not isinstance(parent, Condition): + raise RuntimeError('Cannot close unopened or already ' + + 'closed condition. Condition: "%s"' % condition.name) + else: + self.context.append(condition) + + else: + raise RuntimeError('Unknown tag: "%s"' % name) + + def end_element(self, name): + if name == 'condition': + element = self.context[-1] + if not isinstance(element, Condition) and not isinstance(element, Struct): + raise RuntimeError("Expected condition or struct tag to be closed.") + + return + + element = self.context.pop() + + if name == "struct": + if not isinstance(element, Struct): + raise RuntimeError("Expected struct tag to be closed.") + elif name == "field": + if not isinstance(element, Field): + raise RuntimeError("Expected field tag to be closed.") + elif name == "enum": + if not isinstance(element, Enum): + raise RuntimeError("Expected enum tag to be closed.") + elif name == "value": + if not isinstance(element, Value): + raise RuntimeError("Expected value tag to be closed.") + elif name == "define": + if not isinstance(element, Define): + raise RuntimeError("Expected define tag to be closed.") + elif name == "csbgen": + if not isinstance(element, Csbgen): + raise RuntimeError("""Expected csbgen tag to be closed. + Some tags may have not been closed""") + + element.emit() + else: + raise RuntimeError('Unknown closing element: "%s"' % name) + + def parse(self, filename): + file = open(filename, "rb") + self.filename = filename + self.parser.ParseFile(file) + file.close() + +if len(sys.argv) < 2: + print("No input xml file specified") + sys.exit(1) + +input_file = sys.argv[1] + +p = Parser() +p.parse(input_file) diff --git a/src/imagination/csbgen/meson.build b/src/imagination/csbgen/meson.build new file mode 100644 index 00000000000..ea6a8ff26ad --- /dev/null +++ b/src/imagination/csbgen/meson.build @@ -0,0 +1,46 @@ +# Copyright © 2022 Imagination Technologies Ltd. + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +pvr_xml_files = [ + 'rogue_cdm.xml', + 'rogue_cr.xml', + 'rogue_ipf.xml', + 'rogue_lls.xml', + 'rogue_pbestate.xml', + 'rogue_pds.xml', + 'rogue_ppp.xml', + 'rogue_texstate.xml', + 'rogue_vdm.xml', +] + +pvr_xml_pack = [] +foreach f : pvr_xml_files + _name = '@0@.h'.format(f.split('.')[0]) + pvr_xml_pack += custom_target( + _name, + input : ['gen_pack_header.py', f], + output : _name, + command : [prog_python, '@INPUT@'], + capture : true, + ) +endforeach + +dep_csbgen = declare_dependency(sources : [pvr_xml_pack]) diff --git a/src/imagination/csbgen/pvr_packet_helpers.h b/src/imagination/csbgen/pvr_packet_helpers.h new file mode 100644 index 00000000000..9cef13265a6 --- /dev/null +++ b/src/imagination/csbgen/pvr_packet_helpers.h @@ -0,0 +1,176 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * based in part on anv driver which is: + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_PACKET_HELPERS_H +#define PVR_PACKET_HELPERS_H + +#include +#include +#include +#include +#include + +#ifndef __pvr_validate_value +# define __pvr_validate_value(x) +#endif + +#ifdef NDEBUG +# define NDEBUG_UNUSED __attribute__((unused)) +#else +# define NDEBUG_UNUSED +#endif + +#ifndef __pvr_address_type +# error #define __pvr_address_type before including this file +#endif + +#ifndef __pvr_get_address +# error #define __pvr_get_address before including this file +#endif + +union __pvr_value { + float f; + uint32_t dw; +}; + +static inline __attribute__((always_inline)) uint64_t __pvr_mbo(uint32_t start, + uint32_t end) +{ + return (~0ull >> (64 - (end - start + 1))) << start; +} + +static inline __attribute__((always_inline)) uint64_t +__pvr_uint(uint64_t v, uint32_t start, NDEBUG_UNUSED uint32_t end) +{ + __pvr_validate_value(v); + +#ifndef NDEBUG + const int width = end - start + 1; + if (width < 64) { + const uint64_t max = (1ull << width) - 1; + assert(v <= max); + } +#endif + + return v << start; +} + +static inline __attribute__((always_inline)) uint64_t +__pvr_sint(int64_t v, uint32_t start, uint32_t end) +{ + const int width = end - start + 1; + + __pvr_validate_value(v); + +#ifndef NDEBUG + if (width < 64) { + const int64_t max = (1ll << (width - 1)) - 1; + const int64_t min = -(1ll << (width - 1)); + assert(min <= v && v <= max); + } +#endif + + const uint64_t mask = ~0ull >> (64 - width); + + return (v & mask) << start; +} + +static inline __attribute__((always_inline)) uint64_t +__pvr_offset(uint64_t v, + NDEBUG_UNUSED uint32_t start, + NDEBUG_UNUSED uint32_t end) +{ + __pvr_validate_value(v); +#ifndef NDEBUG + uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline __attribute__((always_inline)) uint64_t +__pvr_address(__pvr_address_type address, + uint32_t shift, + uint32_t start, + uint32_t end) +{ + uint64_t addr_u64 = __pvr_get_address(address); + uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; + + return ((addr_u64 >> shift) << start) & mask; +} + +static inline __attribute__((always_inline)) uint32_t __pvr_float(float v) +{ + __pvr_validate_value(v); + return ((union __pvr_value){ .f = (v) }).dw; +} + +static inline __attribute__((always_inline)) uint64_t +__pvr_sfixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits) +{ + __pvr_validate_value(v); + + const float factor = (1 << fract_bits); + +#ifndef NDEBUG + const float max = ((1 << (end - start)) - 1) / factor; + const float min = -(1 << (end - start)) / factor; + assert(min <= v && v <= max); +#endif + + const int64_t int_val = llroundf(v * factor); + const uint64_t mask = ~0ull >> (64 - (end - start + 1)); + + return (int_val & mask) << start; +} + +static inline __attribute__((always_inline)) uint64_t +__pvr_ufixed(float v, + uint32_t start, + NDEBUG_UNUSED uint32_t end, + uint32_t fract_bits) +{ + __pvr_validate_value(v); + + const float factor = (1 << fract_bits); + +#ifndef NDEBUG + const float max = ((1 << (end - start + 1)) - 1) / factor; + const float min = 0.0f; + assert(min <= v && v <= max); +#endif + + const uint64_t uint_val = llroundf(v * factor); + + return uint_val << start; +} + +#undef NDEBUG_UNUSED + +#endif /* PVR_PACKET_HELPERS_H */ diff --git a/src/imagination/csbgen/rogue_cdm.xml b/src/imagination/csbgen/rogue_cdm.xml new file mode 100644 index 00000000000..804652b2491 --- /dev/null +++ b/src/imagination/csbgen/rogue_cdm.xml @@ -0,0 +1,132 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/imagination/csbgen/rogue_cr.xml b/src/imagination/csbgen/rogue_cr.xml new file mode 100644 index 00000000000..a3f6903175e --- /dev/null +++ b/src/imagination/csbgen/rogue_cr.xml @@ -0,0 +1,631 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/imagination/csbgen/rogue_hwdefs.h b/src/imagination/csbgen/rogue_hwdefs.h new file mode 100644 index 00000000000..75a3a472d4c --- /dev/null +++ b/src/imagination/csbgen/rogue_hwdefs.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_HWDEFS_H +#define ROGUE_HWDEFS_H + +#include "rogue_cdm.h" +#include "rogue_cr.h" +#include "rogue_ipf.h" +#include "rogue_lls.h" +#include "rogue_pbestate.h" +#include "rogue_pds.h" +#include "rogue_ppp.h" +#include "rogue_texstate.h" +#include "rogue_vdm.h" + +#endif /* ROGUE_HWDEFS_H */ diff --git a/src/imagination/csbgen/rogue_ipf.xml b/src/imagination/csbgen/rogue_ipf.xml new file mode 100644 index 00000000000..aa96c7b3933 --- /dev/null +++ b/src/imagination/csbgen/rogue_ipf.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + diff --git a/src/imagination/csbgen/rogue_lls.xml b/src/imagination/csbgen/rogue_lls.xml new file mode 100644 index 00000000000..2da0af2b147 --- /dev/null +++ b/src/imagination/csbgen/rogue_lls.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/imagination/csbgen/rogue_pbestate.xml b/src/imagination/csbgen/rogue_pbestate.xml new file mode 100644 index 00000000000..29346d249dc --- /dev/null +++ b/src/imagination/csbgen/rogue_pbestate.xml @@ -0,0 +1,300 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/imagination/csbgen/rogue_pds.xml b/src/imagination/csbgen/rogue_pds.xml new file mode 100644 index 00000000000..e3b673dd850 --- /dev/null +++ b/src/imagination/csbgen/rogue_pds.xml @@ -0,0 +1,116 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/imagination/csbgen/rogue_ppp.xml b/src/imagination/csbgen/rogue_ppp.xml new file mode 100644 index 00000000000..f3a8878df78 --- /dev/null +++ b/src/imagination/csbgen/rogue_ppp.xml @@ -0,0 +1,329 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/imagination/csbgen/rogue_texstate.xml b/src/imagination/csbgen/rogue_texstate.xml new file mode 100644 index 00000000000..f11eab48455 --- /dev/null +++ b/src/imagination/csbgen/rogue_texstate.xml @@ -0,0 +1,334 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/imagination/csbgen/rogue_vdm.xml b/src/imagination/csbgen/rogue_vdm.xml new file mode 100644 index 00000000000..a0b2376783d --- /dev/null +++ b/src/imagination/csbgen/rogue_vdm.xml @@ -0,0 +1,256 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/imagination/include/hwdef/rogue_hw_defs.h b/src/imagination/include/hwdef/rogue_hw_defs.h new file mode 100644 index 00000000000..1ee1eb5ee34 --- /dev/null +++ b/src/imagination/include/hwdef/rogue_hw_defs.h @@ -0,0 +1,127 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* This file is based on rgxdefs.h and should only contain object-like macros. + * Any function-like macros or inline functions should instead appear in + * rogue_hw_utils.h. + */ + +#ifndef ROGUE_HW_DEFS_H +#define ROGUE_HW_DEFS_H + +#include + +#include "util/macros.h" + +#define ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT 12U +#define ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE \ + BITFIELD_BIT(ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT) + +/* ISP triangle merging constants. */ +/* tan(15) (0x3E8930A3) */ +#define ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR 0.267949f +/* tan(60) (0x3FDDB3D7) */ +#define ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR 1.732051f +#define ROGUE_ISP_MERGE_SCALE_FACTOR 16.0f + +#define ROGUE_MAX_INSTR_BYTES 32U + +/* MList entry stride in bytes */ +#define ROGUE_MLIST_ENTRY_STRIDE 4U + +/* VCE & TE share virtual space and Alist. */ +#define ROGUE_NUM_PM_ADDRESS_SPACES 2U + +/* PM Maximum addressable limit (as determined by the size field of the + * PM_*_FSTACK registers). + */ +#define ROGUE_PM_MAX_PB_VIRT_ADDR_SPACE UINT64_C(0x400000000) + +/* Vheap entry size in bytes. */ +#define ROGUE_PM_VHEAP_ENTRY_SIZE 4U + +#define ROGUE_RTC_SIZE_IN_BYTES 256U + +#define ROGUE_NUM_VCE 1U + +#define ROGUE_NUM_TEAC 1U + +#define ROGUE_NUM_TE 1U + +/* Tail pointer size in bytes. */ +#define ROGUE_TAIL_POINTER_SIZE 8U + +/* Tail pointer cache line size. */ +#define ROGUE_TE_TPC_CACHE_LINE_SIZE 64U + +#define ROGUE_MAX_VERTEX_SHARED_REGISTERS 1024U + +#define ROGUE_MAX_PIXEL_SHARED_REGISTERS 1024U + +/* Number of CR_PDS_BGRND values that need setting up. */ +#define ROGUE_NUM_CR_PDS_BGRND_WORDS 3U + +/* Number of PBESTATE_REG_WORD values that need setting up. */ +#define ROGUE_NUM_PBESTATE_REG_WORDS 3U + +/* Number of PBESTATE_STATE_WORD values that need setting up. */ +#define ROGUE_NUM_PBESTATE_STATE_WORDS 2U + +/* Number of TEXSTATE_IMAGE_WORD values that need setting up. */ +#define ROGUE_NUM_TEXSTATE_IMAGE_WORDS 2U + +#define ROGUE_MAX_RENDER_TARGETS 2048U + +/* 12 dwords reserved for shared register management. The first dword is the + * number of shared register blocks to reload. Should be a multiple of 4 dwords, + * size in bytes. + */ +#define ROGUE_LLS_SHARED_REGS_RESERVE_SIZE 48U + +#define ROGUE_USC_TASK_PROGRAM_SIZE 512U + +#define ROGUE_CSRM_LINE_SIZE_IN_DWORDS (64U * 4U * 4U) + +/* The maximum amount of local memory which can be allocated by a single kernel + * (in dwords/32-bit registers). + * + * ROGUE_CDMCTRL_USC_COMMON_SIZE_UNIT_SIZE is in bytes so we divide by four. + */ +#define ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS \ + ((ROGUE_CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE * \ + ROGUE_CDMCTRL_KERNEL0_USC_COMMON_SIZE_MAX_SIZE) >> \ + 2) + +#define ROGUE_MAX_INSTANCES_PER_TASK \ + (ROGUE_CDMCTRL_KERNEL8_MAX_INSTANCES_MAX_SIZE + 1U) + +/* Optimal number for packing work groups into a slot. */ +#define ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK 8U + +/* The maximum number of pixel task instances which might be running overlapped + * with compute. Once we have 8 pixel task instances we have a complete set and + * task will be able to run and allocations will be freed. + */ +#define ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES 7U + +#endif /* ROGUE_HW_DEFS_H */ diff --git a/src/imagination/include/hwdef/rogue_hw_utils.h b/src/imagination/include/hwdef/rogue_hw_utils.h new file mode 100644 index 00000000000..fc1e4e6fd59 --- /dev/null +++ b/src/imagination/include/hwdef/rogue_hw_utils.h @@ -0,0 +1,379 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* This file is based on rgxdefs.h and should only contain function-like macros + * and inline functions. Any object-like macros should instead appear in + * rogue_hw_defs.h. + */ + +#ifndef ROGUE_HW_UTILS_H +#define ROGUE_HW_UTILS_H + +#include + +#include "pvr_winsys.h" + +#define __pvr_address_type pvr_dev_addr_t +#define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr).addr + +#include "csbgen/rogue_cdm.h" +#include "csbgen/rogue_lls.h" + +#undef __pvr_get_address +#undef __pvr_address_type + +#include "rogue_hw_defs.h" +#include "pvr_device_info.h" +#include "util/compiler.h" +#include "util/macros.h" + +static inline void +rogue_get_isp_samples_per_tile_xy(const struct pvr_device_info *dev_info, + uint32_t samples, + uint32_t *const x_out, + uint32_t *const y_out) +{ + const uint32_t tile_size_x = + PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U); + const uint32_t tile_size_y = + PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U); + const uint32_t samples_per_pixel = + PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0U); + +#if !defined(NDEBUG) + switch (samples_per_pixel) { + case 1: + case 2: + case 4: + break; + default: + assert(!"Unsupported ISP samples per pixel"); + } +#endif + + *x_out = tile_size_x; + *y_out = tile_size_y; + + switch (samples) { + default: + assert(!"Unsupported number of samples"); + FALLTHROUGH; + case 1: + break; + case 2: + if (samples_per_pixel == 2 || samples_per_pixel == 4) + *y_out *= 2; + + break; + case 4: + if (samples_per_pixel == 2 || samples_per_pixel == 4) + *x_out *= 2; + + if (samples_per_pixel == 2) + *y_out *= 2; + + break; + case 8: + *y_out *= 2; + break; + } +} + +static inline uint64_t +rogue_get_min_free_list_size(const struct pvr_device_info *dev_info) +{ + uint64_t min_num_pages; + + if (PVR_HAS_FEATURE(dev_info, roguexe)) { + if (PVR_HAS_QUIRK(dev_info, 66011)) + min_num_pages = 40U; + else + min_num_pages = 25U; + } else { + min_num_pages = 50U; + } + + return min_num_pages << ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT; +} + +static inline uint32_t +rogue_get_max_num_vdm_pds_tasks(const struct pvr_device_info *dev_info) +{ + /* Default value based on the minimum value found in all existing cores. */ + uint32_t max_usc_tasks = PVR_GET_FEATURE_VALUE(dev_info, max_usc_tasks, 24U); + + /* FIXME: Where does the 9 come from? */ + return max_usc_tasks - 9; +} + +static inline uint32_t +rogue_get_max_output_regs_per_pixel(const struct pvr_device_info *dev_info) +{ + if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) + return 8U; + + return 4U; +} + +static inline void +rogue_get_num_macrotiles_xy(const struct pvr_device_info *dev_info, + uint32_t *const x_out, + uint32_t *const y_out) +{ + uint32_t version; + + if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version)) + version = 0; + + if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) || + version == 2) { + *x_out = 4; + *y_out = 4; + } else { + *x_out = 1; + *y_out = 1; + } +} + +static inline uint32_t +rogue_get_macrotile_array_size(const struct pvr_device_info *dev_info) +{ + uint32_t num_macrotiles_x; + uint32_t num_macrotiles_y; + + if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) + return 0; + + rogue_get_num_macrotiles_xy(dev_info, &num_macrotiles_x, &num_macrotiles_y); + + return num_macrotiles_x * num_macrotiles_y * 8U; +} + +/* To get the number of required Bernado/Phantom(s), divide the number of + * clusters by 4 and round up. + */ +static inline uint32_t +rogue_get_num_phantoms(const struct pvr_device_info *dev_info) +{ + return DIV_ROUND_UP(PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U), 4U); +} + +/* Region header size in bytes. */ +static inline uint32_t +rogue_get_region_header_size(const struct pvr_device_info *dev_info) +{ + uint32_t version; + + if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version)) + version = 0; + + if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) && + version == 2) { + return 6; + } + + return 5; +} + +/* Return the total reserved size of partition in dwords. */ +static inline uint32_t +rogue_get_total_reserved_partition_size(const struct pvr_device_info *dev_info) +{ + uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0); + uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0); + uint32_t max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0); + + if (tile_size_x == 16 && tile_size_y == 16) { + return tile_size_x * tile_size_y * max_partitions * + PVR_GET_FEATURE_VALUE(dev_info, + usc_min_output_registers_per_pix, + 0); + } + + return max_partitions * 1024U; +} + +static inline uint32_t +rogue_get_render_size_max(const struct pvr_device_info *dev_info) +{ + if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) + if (!PVR_HAS_FEATURE(dev_info, screen_size8K)) + return 4096U; + + return 8192U; +} + +#define rogue_get_render_size_max_x(dev_info) \ + rogue_get_render_size_max(dev_info) + +#define rogue_get_render_size_max_y(dev_info) \ + rogue_get_render_size_max(dev_info) + +static inline uint32_t +rogue_get_slc_cache_line_size(const struct pvr_device_info *dev_info) +{ + return PVR_GET_FEATURE_VALUE(dev_info, slc_cache_line_size_bits, 8U) / 8U; +} + +static inline uint32_t pvr_get_max_user_vertex_output_components( + const struct pvr_device_info *dev_info) +{ + const uint32_t uvs_pba_entries = + PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 0U); + const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 0U); + + if (uvs_banks <= 8U && uvs_pba_entries == 160U) + return 64U; + + return 128U; +} + +static inline uint32_t +rogue_get_reserved_shared_size(const struct pvr_device_info *dev_info) +{ + uint32_t common_store_size_in_dwords = + PVR_GET_FEATURE_VALUE(dev_info, + common_store_size_in_dwords, + 512U * 4U * 4U); + uint32_t reserved_shared_size = + common_store_size_in_dwords - (256U * 4U) - + rogue_get_total_reserved_partition_size(dev_info); + + if (PVR_HAS_QUIRK(dev_info, 44079)) { + uint32_t common_store_split_point = (768U * 4U * 4U); + + return MIN2(common_store_split_point - (256U * 4U), reserved_shared_size); + } + + return reserved_shared_size; +} + +static inline uint32_t +rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info) +{ + if (PVR_HAS_FEATURE(dev_info, compute)) + return 2U * 1024U; + + return 0U; +} + +static inline uint32_t +rogue_get_max_coeffs(const struct pvr_device_info *dev_info) +{ + uint32_t max_coeff_additional_portion = ROGUE_MAX_VERTEX_SHARED_REGISTERS; + uint32_t pending_allocation_shared_regs = 2U * 1024U; + uint32_t pending_allocation_coeff_regs = 0U; + uint32_t num_phantoms = rogue_get_num_phantoms(dev_info); + uint32_t tiles_in_flight = + PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0); + uint32_t max_coeff_pixel_portion = + DIV_ROUND_UP(tiles_in_flight, num_phantoms); + + max_coeff_pixel_portion *= ROGUE_MAX_PIXEL_SHARED_REGISTERS; + + /* Compute tasks on cores with BRN48492 and without compute overlap may lock + * up without two additional lines of coeffs. + */ + if (PVR_HAS_QUIRK(dev_info, 48492) && + !PVR_HAS_FEATURE(dev_info, compute_overlap)) { + pending_allocation_coeff_regs = 2U * 1024U; + } + + if (PVR_HAS_ERN(dev_info, 38748)) + pending_allocation_shared_regs = 0U; + + if (PVR_HAS_ERN(dev_info, 38020)) { + max_coeff_additional_portion += + rogue_max_compute_shared_registers(dev_info); + } + + return rogue_get_reserved_shared_size(dev_info) + + pending_allocation_coeff_regs - + (max_coeff_pixel_portion + max_coeff_additional_portion + + pending_allocation_shared_regs); +} + +static inline uint32_t +rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info) +{ + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { + const uint32_t max_num_cores = + PVR_GET_FEATURE_VALUE(dev_info, xpu_max_slaves, 0U) + 1U; + const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); + const uint32_t cdm_context_resume_buffer_stride = + ALIGN_POT(ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE, cache_line_size); + + return cdm_context_resume_buffer_stride * max_num_cores; + } + + return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE; +} + +static inline uint32_t rogue_get_cdm_context_resume_buffer_alignment( + const struct pvr_device_info *dev_info) +{ + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) + return rogue_get_slc_cache_line_size(dev_info); + + return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_ALIGNMENT; +} + +static inline uint32_t +rogue_get_cdm_max_local_mem_size_regs(const struct pvr_device_info *dev_info) +{ + uint32_t available_coeffs_in_dwords = rogue_get_max_coeffs(dev_info); + + if (PVR_HAS_QUIRK(dev_info, 48492) && PVR_HAS_FEATURE(dev_info, roguexe) && + !PVR_HAS_FEATURE(dev_info, compute_overlap)) { + /* Driver must not use the 2 reserved lines. */ + available_coeffs_in_dwords -= ROGUE_CSRM_LINE_SIZE_IN_DWORDS * 2; + } + + /* The maximum amount of local memory available to a kernel is the minimum + * of the total number of coefficient registers available and the max common + * store allocation size which can be made by the CDM. + * + * If any coeff lines are reserved for tessellation or pixel then we need to + * subtract those too. + */ + return MIN2(available_coeffs_in_dwords, + ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS); +} + +static inline uint32_t +rogue_get_compute_max_work_group_size(const struct pvr_device_info *dev_info) +{ + /* The number of tasks which can be executed per USC - Limited to 16U by the + * CDM. + */ + const uint32_t max_tasks_per_usc = 16U; + + if (!PVR_HAS_ERN(dev_info, 35421)) { + /* Barriers on work-groups > 32 instances aren't supported. */ + return ROGUE_MAX_INSTANCES_PER_TASK; + } + + return ROGUE_MAX_INSTANCES_PER_TASK * max_tasks_per_usc; +} + +#endif /* ROGUE_HW_UTILS_H */ diff --git a/src/imagination/include/pvr_rogue_fw.h b/src/imagination/include/pvr_rogue_fw.h new file mode 100644 index 00000000000..a737ac6bc0d --- /dev/null +++ b/src/imagination/include/pvr_rogue_fw.h @@ -0,0 +1,58 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* This file is based on rgx_fwif_shared.h and rgx_fwif_client.h. It contains + * information about the firmware that is needed by the driver. + */ + +#ifndef PVR_ROGUE_FW_H +#define PVR_ROGUE_FW_H + +/** + * Maximum PB free list size supported by RGX and Services. + * + * Maximum PB free list size must ensure that no PM address space can be fully + * used, because if the full address space was used it would wrap and corrupt + * itself. Since there are two freelists (local is always minimum sized) this + * can be described as following three conditions being met: + * + * Minimum PB + Maximum PB < ALIST PM address space size (16GB) + * Minimum PB + Maximum PB < TE PM address space size (16GB) / NUM_TE_PIPES + * Minimum PB + Maximum PB < VCE PM address space size (16GB) / NUM_VCE_PIPES + * + * Since the max of NUM_TE_PIPES and NUM_VCE_PIPES is 4, we have a hard limit + * of 4GB minus the Minimum PB. For convenience we take the smaller power-of-2 + * value of 2GB. This is far more than any normal application would request + * or use. + */ +#define ROGUE_FREE_LIST_MAX_SIZE (2ULL * 1024ULL * 1024ULL * 1024ULL) + +/* FIXME: This will change based on the firmware configuration, which will vary + * depending on the BVNC and firmware version. The powervr KM driver allows this + * information to be queried, but the pvrsrvkm KM driver doesn't. This + * information should really be queried from the winsys. + */ +/* Indicates the number of Render Target Datas in a Render Target Dataset. */ +#define ROGUE_NUM_RTDATAS 2U + +#endif /* PVR_ROGUE_FW_H */ diff --git a/src/imagination/meson.build b/src/imagination/meson.build new file mode 100644 index 00000000000..626a03a3d76 --- /dev/null +++ b/src/imagination/meson.build @@ -0,0 +1,33 @@ +# Copyright © 2022 Imagination Technologies Ltd. + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +inc_imagination = include_directories([ + '.', + 'common', + 'include', +]) + +if with_imagination_vk + subdir('common') + subdir('csbgen') + subdir('rogue') + subdir('vulkan') +endif diff --git a/src/imagination/rogue/meson.build b/src/imagination/rogue/meson.build new file mode 100644 index 00000000000..240dfa207b1 --- /dev/null +++ b/src/imagination/rogue/meson.build @@ -0,0 +1,77 @@ +# Copyright © 2022 Imagination Technologies Ltd. + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +libpowervr_rogue_files = files( + 'nir/rogue_nir_constreg.c', + 'nir/rogue_nir_lower_io.c', + 'nir/rogue_nir_pfo.c', + + 'rogue.c', + 'rogue_build_data.c', + 'rogue_compiler.c', + 'rogue_constreg.c', + 'rogue_dump.c', + 'rogue_encode.c', + 'rogue_encoders.c', + 'rogue_instr.c', + 'rogue_nir.c', + 'rogue_operand.c', + 'rogue_regalloc.c', + 'rogue_shader.c', + 'rogue_util.c', + 'rogue_validate.c', +) + +libpowervr_rogue = shared_library( + 'powervr_rogue', + libpowervr_rogue_files, + include_directories : [ + inc_imagination, + inc_include, + inc_compiler, + inc_src, + inc_mapi, + inc_mesa, + inc_gallium, + inc_gallium_aux, + ], + c_args : [c_msvc_compat_args, no_override_init_args], + gnu_symbol_visibility : 'hidden', + dependencies : [idep_mesautil, idep_nir, dep_csbgen], + install : true, +) + +rogue_compiler = executable( + 'rogue_compiler', + 'tools/offline_compiler.c', + link_with : [libpowervr_rogue], + dependencies : [idep_mesautil, idep_nir], + include_directories : [ + inc_mesa, + inc_include, + inc_src, + inc_mapi, + inc_gallium, + inc_gallium_aux, + inc_compiler, + ], + install : false, +) diff --git a/src/imagination/rogue/nir/rogue_nir_constreg.c b/src/imagination/rogue/nir/rogue_nir_constreg.c new file mode 100644 index 00000000000..30bda0cc971 --- /dev/null +++ b/src/imagination/rogue/nir/rogue_nir_constreg.c @@ -0,0 +1,74 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "nir/nir_search_helpers.h" +#include "rogue_constreg.h" +#include "rogue_nir.h" + +/* TODO: optimize: if value is in const regs, replace, else, use shared regs and + * notify driver they need to be populated? + */ + +/* Replaces multiple ssa uses from load_const with a single use -> a register. + */ +void rogue_nir_constreg(nir_shader *shader) +{ + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + nir_builder b; + + nir_builder_init(&b, impl); + + /* Find load_const instructions. */ + nir_foreach_block (block, impl) { + nir_foreach_instr_safe (instr, block) { + if (instr->type != nir_instr_type_load_const) + continue; + + nir_load_const_instr *load_const = nir_instr_as_load_const(instr); + + /* Skip values that can be pulled from constant registers. */ + uint32_t value = nir_const_value_as_uint(load_const->value[0], 32); + size_t const_reg = rogue_constreg_lookup(value); + if (const_reg != ROGUE_NO_CONST_REG) + continue; + + b.cursor = nir_after_instr(&load_const->instr); + nir_ssa_def *mov = nir_mov(&b, &load_const->def); + + nir_foreach_use_safe (use_src, &load_const->def) { + if (use_src->parent_instr == mov->parent_instr) + continue; + + /* Skip when used as an index for intrinsics, as we want to + * access that value directly. + */ + if (use_src->parent_instr->type == nir_instr_type_intrinsic) + continue; + + nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, mov); + } + } + } +} diff --git a/src/imagination/rogue/nir/rogue_nir_lower_io.c b/src/imagination/rogue/nir/rogue_nir_lower_io.c new file mode 100644 index 00000000000..63cbae1867a --- /dev/null +++ b/src/imagination/rogue/nir/rogue_nir_lower_io.c @@ -0,0 +1,171 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "nir/nir_search_helpers.h" +#include "rogue_nir.h" +#include "rogue_nir_helpers.h" + +static void lower_vulkan_resource_index(nir_builder *b, + nir_intrinsic_instr *intr, + void *pipeline_layout) +{ + unsigned desc_set = nir_intrinsic_desc_set(intr); + unsigned binding = nir_intrinsic_binding(intr); + + nir_ssa_def *def = nir_vec3(b, + nir_imm_int(b, desc_set), + nir_imm_int(b, binding), + nir_imm_int(b, 0)); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, def); + nir_instr_remove(&intr->instr); +} + +static void lower_load_vulkan_descriptor(nir_builder *b, + nir_intrinsic_instr *intr) +{ + /* Loading the descriptor happens as part of the load/store instruction so + * this is a no-op. + */ + + nir_ssa_def_rewrite_uses(&intr->dest.ssa, intr->src[0].ssa); + nir_instr_remove(&intr->instr); +} + +static void lower_load_ubo_to_scalar(nir_builder *b, nir_intrinsic_instr *intr) +{ + /* Scalarize the load_ubo. */ + b->cursor = nir_before_instr(&intr->instr); + + assert(intr->dest.is_ssa); + assert(intr->num_components > 1); + + nir_ssa_def *loads[NIR_MAX_VEC_COMPONENTS]; + + for (uint8_t i = 0; i < intr->num_components; i++) { + size_t scaled_range = nir_intrinsic_range(intr) / intr->num_components; + nir_intrinsic_instr *chan_intr = + nir_intrinsic_instr_create(b->shader, intr->intrinsic); + nir_ssa_dest_init(&chan_intr->instr, + &chan_intr->dest, + 1, + intr->dest.ssa.bit_size, + NULL); + chan_intr->num_components = 1; + + nir_intrinsic_set_access(chan_intr, nir_intrinsic_access(intr)); + nir_intrinsic_set_align_mul(chan_intr, nir_intrinsic_align_mul(intr)); + nir_intrinsic_set_align_offset(chan_intr, + nir_intrinsic_align_offset(intr)); + nir_intrinsic_set_range_base(chan_intr, + nir_intrinsic_range_base(intr) + + (i * intr->num_components)); + nir_intrinsic_set_range(chan_intr, scaled_range); + + /* Base (desc_set, binding). */ + nir_src_copy(&chan_intr->src[0], &intr->src[0]); + + /* Offset (unused). */ + chan_intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); + + nir_builder_instr_insert(b, &chan_intr->instr); + + loads[i] = &chan_intr->dest.ssa; + } + + nir_ssa_def_rewrite_uses(&intr->dest.ssa, + nir_vec(b, loads, intr->num_components)); + nir_instr_remove(&intr->instr); +} + +static bool +lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, void *layout) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_vulkan_descriptor: + lower_load_vulkan_descriptor(b, instr); + return true; + + case nir_intrinsic_vulkan_resource_index: + lower_vulkan_resource_index(b, instr, layout); + return true; + + case nir_intrinsic_load_ubo: + lower_load_ubo_to_scalar(b, instr); + return true; + + default: + break; + } + + return false; +} + +static bool lower_impl(nir_function_impl *impl, void *layout) +{ + bool progress = false; + nir_builder b; + + nir_builder_init(&b, impl); + + nir_foreach_block (block, impl) { + nir_foreach_instr_safe (instr, block) { + b.cursor = nir_before_instr(instr); + switch (instr->type) { + case nir_instr_type_intrinsic: + progress |= + lower_intrinsic(&b, nir_instr_as_intrinsic(instr), layout); + break; + + default: + break; + } + } + } + + if (progress) + nir_metadata_preserve(impl, nir_metadata_none); + else + nir_metadata_preserve(impl, nir_metadata_all); + + return progress; +} + +bool rogue_nir_lower_io(nir_shader *shader, void *layout) +{ + bool progress = false; + + nir_foreach_function (function, shader) { + if (function->impl) + progress |= lower_impl(function->impl, layout); + } + + if (progress) + nir_opt_dce(shader); + + return progress; +} diff --git a/src/imagination/rogue/nir/rogue_nir_pfo.c b/src/imagination/rogue/nir/rogue_nir_pfo.c new file mode 100644 index 00000000000..d14d11b8240 --- /dev/null +++ b/src/imagination/rogue/nir/rogue_nir_pfo.c @@ -0,0 +1,86 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "nir/nir_search_helpers.h" +#include "rogue_nir.h" + +static void insert_pfo(nir_builder *b, + nir_intrinsic_instr *store_output, + nir_src *output_src) +{ + /* TODO: Support complex PFO with blending. */ + /* TODO: Verify type is vec4. */ + + /* Pack the output color components into U8888 format. */ + nir_ssa_def *new_output_src_ssa = nir_pack_unorm_4x8(b, output_src->ssa); + nir_src new_output_src = nir_src_for_ssa(new_output_src_ssa); + + /* Update the store_output intrinsic. */ + nir_instr_rewrite_src(&store_output->instr, output_src, new_output_src); + nir_intrinsic_set_write_mask(store_output, 1); + store_output->num_components = 1; + nir_intrinsic_set_src_type(store_output, nir_type_uint32); +} + +void rogue_nir_pfo(nir_shader *shader) +{ + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + nir_builder b; + + /* Only apply to fragment shaders. */ + if (shader->info.stage != MESA_SHADER_FRAGMENT) + return; + + nir_builder_init(&b, impl); + + nir_foreach_block (block, impl) { + nir_foreach_instr_safe (instr, block) { + if (instr->type == nir_instr_type_intrinsic) { + /* Find the store_output intrinsic and pack the output value. */ + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + if (intr->intrinsic != nir_intrinsic_store_output) + continue; + + b.cursor = nir_before_instr(&intr->instr); + insert_pfo(&b, intr, &intr->src[0]); + } else if (instr->type == nir_instr_type_deref) { + /* Find variable derefs and update their type. */ + nir_deref_instr *deref = nir_instr_as_deref(instr); + + if (!nir_deref_mode_is(deref, nir_var_shader_out)) + continue; + + if (deref->deref_type != nir_deref_type_var) + continue; + + nir_variable *out = nir_deref_instr_get_variable(deref); + + deref->type = glsl_uintN_t_type(32); + out->type = glsl_uintN_t_type(32); + } + } + } +} diff --git a/src/imagination/rogue/rogue.c b/src/imagination/rogue/rogue.c new file mode 100644 index 00000000000..5481bc905f1 --- /dev/null +++ b/src/imagination/rogue/rogue.c @@ -0,0 +1,789 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "compiler/shader_enums.h" +#include "compiler/spirv/nir_spirv.h" +#include "nir/nir.h" +#include "rogue.h" +#include "rogue_build_data.h" +#include "rogue_compiler.h" +#include "rogue_constreg.h" +#include "rogue_encode.h" +#include "rogue_nir.h" +#include "rogue_nir_helpers.h" +#include "rogue_operand.h" +#include "rogue_regalloc.h" +#include "rogue_shader.h" +#include "rogue_validate.h" +#include "util/macros.h" +#include "util/memstream.h" +#include "util/ralloc.h" + +/** + * \file rogue.c + * + * \brief Contains the top-level Rogue compiler interface for Vulkan driver and + * the offline compiler. + */ + +/** + * \brief Converts a SPIR-V shader to NIR. + * + * \param[in] ctx Shared multi-stage build context. + * \param[in] stage Shader stage. + * \param[in] spirv_size SPIR-V data length in DWORDs. + * \param[in] spirv_data SPIR-V data. + * \param[in] num_spec Number of SPIR-V specializations. + * \param[in] spec SPIR-V specializations. + * \return A nir_shader* if successful, or NULL if unsuccessful. + */ +nir_shader *rogue_spirv_to_nir(struct rogue_build_ctx *ctx, + gl_shader_stage stage, + const char *entry, + size_t spirv_size, + const uint32_t *spirv_data, + unsigned num_spec, + struct nir_spirv_specialization *spec) +{ + nir_shader *nir; + + nir = spirv_to_nir(spirv_data, + spirv_size, + spec, + num_spec, + stage, + entry, + rogue_get_spirv_options(ctx->compiler), + rogue_get_compiler_options(ctx->compiler)); + if (!nir) + return NULL; + + ralloc_steal(ctx, nir); + + /* Apply passes. */ + if (!rogue_nir_passes(ctx, nir, stage)) { + ralloc_free(nir); + return NULL; + } + + /* Collect I/O data to pass back to the driver. */ + if (!rogue_collect_io_data(ctx, nir)) { + ralloc_free(nir); + return NULL; + } + + return nir; +} + +/** + * \brief Converts a Rogue shader to binary. + * + * \param[in] ctx Shared multi-stage build context. + * \param[in] shader Rogue shader. + * \return A rogue_shader_binary* if successful, or NULL if unsuccessful. + */ +struct rogue_shader_binary *rogue_to_binary(struct rogue_build_ctx *ctx, + const struct rogue_shader *shader) +{ + struct rogue_shader_binary *binary; + struct u_memstream mem; + size_t buf_size; + char *buf; + + if (!rogue_validate_shader(shader)) + return NULL; + + if (!u_memstream_open(&mem, &buf, &buf_size)) + return NULL; + + if (!rogue_encode_shader(shader, u_memstream_get(&mem))) { + u_memstream_close(&mem); + free(buf); + return NULL; + } + + u_memstream_close(&mem); + + binary = rzalloc_size(ctx, sizeof(*binary) + buf_size); + if (!binary) { + free(buf); + return NULL; + } + + binary->size = buf_size; + memcpy(binary->data, buf, buf_size); + + free(buf); + + return binary; +} + +static bool +setup_alu_dest(struct rogue_instr *instr, size_t dest_index, nir_alu_instr *alu) +{ + assert(dest_index == 0); + + /* Dest validation. */ + assert(nir_dest_num_components(alu->dest.dest) == 1 || + nir_dest_num_components(alu->dest.dest) == 4); + assert(nir_dest_bit_size(alu->dest.dest) == 32); + + size_t nir_dest_reg = nir_alu_dest_regindex(alu); + + if (nir_dest_num_components(alu->dest.dest) == 1) { + CHECK(rogue_instr_set_operand_vreg(instr, dest_index, nir_dest_reg)); + } else { + size_t comp = nir_alu_dest_comp(alu); + CHECK(rogue_instr_set_operand_vreg_vec(instr, + dest_index, + comp, + nir_dest_reg)); + } + + return true; +} + +static bool trans_constreg_operand(struct rogue_instr *instr, + size_t operand_index, + uint32_t const_value) +{ + size_t const_reg = rogue_constreg_lookup(const_value); + + /* Only values that can be sourced from const regs should be left from the + * rogue_nir_constreg pass. + */ + assert(const_reg != ROGUE_NO_CONST_REG); + + CHECK(rogue_instr_set_operand_reg(instr, + operand_index, + ROGUE_OPERAND_TYPE_REG_CONST, + const_reg)); + + return true; +} + +static bool trans_nir_alu_fmax(struct rogue_shader *shader, nir_alu_instr *alu) +{ + /* Src validation. */ + assert(nir_src_num_components(alu->src[0].src) == 1); + assert(nir_src_bit_size(alu->src[0].src) == 32); + + assert(nir_src_num_components(alu->src[1].src) == 1); + assert(nir_src_bit_size(alu->src[1].src) == 32); + + struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MAX); + + CHECK(setup_alu_dest(instr, 0, alu)); + + for (size_t u = 0; u < nir_op_infos[nir_op_fmax].num_inputs; ++u) { + /* Handle values that can be pulled from const regs. */ + if (nir_alu_src_is_const(alu, u)) { + CHECK(trans_constreg_operand(instr, u + 1, nir_alu_src_const(alu, u))); + continue; + } + + size_t nir_src_reg = nir_alu_src_regindex(alu, u); + + CHECK(rogue_instr_set_operand_vreg(instr, u + 1, nir_src_reg)); + } + + return true; +} + +static bool trans_nir_alu_fmin(struct rogue_shader *shader, nir_alu_instr *alu) +{ + /* Src validation. */ + assert(nir_src_num_components(alu->src[0].src) == 1); + assert(nir_src_bit_size(alu->src[0].src) == 32); + + assert(nir_src_num_components(alu->src[1].src) == 1); + assert(nir_src_bit_size(alu->src[1].src) == 32); + + struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MIN); + + CHECK(setup_alu_dest(instr, 0, alu)); + + for (size_t u = 0; u < nir_op_infos[nir_op_fmin].num_inputs; ++u) { + /* Handle values that can be pulled from const regs. */ + if (nir_alu_src_is_const(alu, u)) { + CHECK(trans_constreg_operand(instr, u + 1, nir_alu_src_const(alu, u))); + continue; + } + + size_t nir_src_reg = nir_alu_src_regindex(alu, u); + + CHECK(rogue_instr_set_operand_vreg(instr, u + 1, nir_src_reg)); + } + + return true; +} + +static bool trans_nir_alu_mov_imm(struct rogue_shader *shader, + nir_alu_instr *alu) +{ + /* Src validation. */ + assert(nir_src_num_components(alu->src[0].src) == 1); + assert(nir_src_bit_size(alu->src[0].src) == 32); + + uint32_t value = nir_alu_src_const(alu, 0); + + struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV_IMM); + + CHECK(setup_alu_dest(instr, 0, alu)); + CHECK(rogue_instr_set_operand_imm(instr, 1, value)); + + return true; +} + +static bool trans_nir_alu_mov(struct rogue_shader *shader, nir_alu_instr *alu) +{ + /* Constant value that isn't in constregs. */ + if (nir_alu_src_is_const(alu, 0) && + nir_dest_num_components(alu->dest.dest) == 1) + return trans_nir_alu_mov_imm(shader, alu); + + /* Src validation. */ + assert(nir_src_num_components(alu->src[0].src) == 1); + assert(nir_src_bit_size(alu->src[0].src) == 32); + + struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV); + + CHECK(setup_alu_dest(instr, 0, alu)); + + /* Handle values that can be pulled from const regs. */ + if (nir_alu_src_is_const(alu, 0)) { + return trans_constreg_operand(instr, 1, nir_alu_src_const(alu, 0)); + } + + size_t nir_src_reg = nir_alu_src_regindex(alu, 0); + CHECK(rogue_instr_set_operand_vreg(instr, 1, nir_src_reg)); + + return true; +} + +static bool trans_nir_alu_pack_unorm_4x8(struct rogue_shader *shader, + nir_alu_instr *alu) +{ + /* Src/dest validation. */ + assert(nir_dest_num_components(alu->dest.dest) == 1); + assert(nir_dest_bit_size(alu->dest.dest) == 32); + + assert(nir_src_num_components(alu->src[0].src) == 4); + assert(nir_src_bit_size(alu->src[0].src) == 32); + + size_t nir_src_reg = nir_alu_src_regindex(alu, 0); + size_t nir_dest_reg = nir_alu_dest_regindex(alu); + + struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_PACK_U8888); + + CHECK(rogue_instr_set_operand_vreg(instr, 0, nir_dest_reg)); + + /* Ensure all 4 components are being sourced in order. */ + for (size_t u = 0; u < nir_src_num_components(alu->src[0].src); ++u) + assert(alu->src->swizzle[u] == u); + + CHECK(rogue_instr_set_operand_vreg_vec(instr, + 1, + ROGUE_COMPONENT_ALL, + nir_src_reg)); + + return true; +} + +static bool trans_nir_alu_fmul(struct rogue_shader *shader, nir_alu_instr *alu) +{ + /* Src validation. */ + assert(nir_src_num_components(alu->src[0].src) == 1); + assert(nir_src_bit_size(alu->src[0].src) == 32); + + assert(nir_src_num_components(alu->src[1].src) == 1); + assert(nir_src_bit_size(alu->src[1].src) == 32); + + size_t nir_in_reg_a = nir_alu_src_regindex(alu, 0); + size_t nir_in_reg_b = nir_alu_src_regindex(alu, 1); + + struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MUL); + + CHECK(setup_alu_dest(instr, 0, alu)); + CHECK(rogue_instr_set_operand_vreg(instr, 1, nir_in_reg_a)); + CHECK(rogue_instr_set_operand_vreg(instr, 2, nir_in_reg_b)); + + return true; +} + +static bool trans_nir_alu_ffma(struct rogue_shader *shader, nir_alu_instr *alu) +{ + /* Src validation. */ + assert(nir_src_num_components(alu->src[0].src) == 1); + assert(nir_src_bit_size(alu->src[0].src) == 32); + + assert(nir_src_num_components(alu->src[1].src) == 1); + assert(nir_src_bit_size(alu->src[1].src) == 32); + + assert(nir_src_num_components(alu->src[2].src) == 1); + assert(nir_src_bit_size(alu->src[2].src) == 32); + + size_t nir_in_reg_a = nir_alu_src_regindex(alu, 0); + size_t nir_in_reg_b = nir_alu_src_regindex(alu, 1); + size_t nir_in_reg_c = nir_alu_src_regindex(alu, 2); + + struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_FMA); + + CHECK(setup_alu_dest(instr, 0, alu)); + CHECK(rogue_instr_set_operand_vreg(instr, 1, nir_in_reg_a)); + CHECK(rogue_instr_set_operand_vreg(instr, 2, nir_in_reg_b)); + CHECK(rogue_instr_set_operand_vreg(instr, 3, nir_in_reg_c)); + + return true; +} + +static bool trans_nir_alu(struct rogue_shader *shader, nir_alu_instr *alu) +{ + switch (alu->op) { + case nir_op_fmax: + return trans_nir_alu_fmax(shader, alu); + + case nir_op_fmin: + return trans_nir_alu_fmin(shader, alu); + + case nir_op_pack_unorm_4x8: + return trans_nir_alu_pack_unorm_4x8(shader, alu); + + case nir_op_mov: + return trans_nir_alu_mov(shader, alu); + + case nir_op_fmul: + return trans_nir_alu_fmul(shader, alu); + + case nir_op_ffma: + return trans_nir_alu_ffma(shader, alu); + + default: + break; + } + + unreachable("Unimplemented NIR ALU instruction."); +} + +static bool trans_nir_intrinsic_load_input_fs(struct rogue_shader *shader, + nir_intrinsic_instr *intr) +{ + struct rogue_fs_build_data *fs_data = &shader->ctx->stage_data.fs; + + /* Src/dest validation. */ + assert(nir_dest_num_components(intr->dest) == 1); + assert(nir_dest_bit_size(intr->dest) == 32); + + assert(nir_src_num_components(intr->src[0]) == 1); + assert(nir_src_bit_size(intr->src[0]) == 32); + assert(nir_intr_src_is_const(intr, 0)); + + /* Intrinsic index validation. */ + assert(nir_intrinsic_dest_type(intr) == nir_type_float32); + + struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr); + size_t component = nir_intrinsic_component(intr); + size_t coeff_index = rogue_coeff_index_fs(&fs_data->iterator_args, + io_semantics.location, + component); + size_t wcoeff_index = rogue_coeff_index_fs(&fs_data->iterator_args, ~0, 0); + size_t drc_num = rogue_acquire_drc(shader); + uint64_t source_count = nir_dest_num_components(intr->dest); + + size_t nir_dest_reg = nir_intr_dest_regindex(intr); + + /* pixiter.w instruction. */ + struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_PIX_ITER_W); + + CHECK(rogue_instr_set_operand_vreg(instr, 0, nir_dest_reg)); + CHECK(rogue_instr_set_operand_drc(instr, 1, drc_num)); + CHECK(rogue_instr_set_operand_reg(instr, + 2, + ROGUE_OPERAND_TYPE_REG_COEFF, + coeff_index)); + CHECK(rogue_instr_set_operand_reg(instr, + 3, + ROGUE_OPERAND_TYPE_REG_COEFF, + wcoeff_index)); + CHECK(rogue_instr_set_operand_imm(instr, 4, source_count)); + + /* wdf instruction must follow the pixiter.w. */ + instr = rogue_shader_insert(shader, ROGUE_OP_WDF); + + CHECK(rogue_instr_set_operand_drc(instr, 0, drc_num)); + rogue_release_drc(shader, drc_num); + + return true; +} + +static bool trans_nir_intrinsic_load_input_vs(struct rogue_shader *shader, + nir_intrinsic_instr *intr) +{ + /* Src/dest validation. */ + assert(nir_dest_num_components(intr->dest) == 1); + assert(nir_dest_bit_size(intr->dest) == 32); + + assert(nir_src_num_components(intr->src[0]) == 1); + assert(nir_src_bit_size(intr->src[0]) == 32); + assert(nir_intr_src_is_const(intr, 0)); + + /* Intrinsic index validation. */ + assert(nir_intrinsic_dest_type(intr) == nir_type_float32); + + size_t component = nir_intrinsic_component(intr); + struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr); + size_t vi_reg_index = ((io_semantics.location - VERT_ATTRIB_GENERIC0) * 3) + + component; /* TODO: get these properly with the + * intrinsic index (ssa argument) + */ + + size_t nir_dest_reg = nir_intr_dest_regindex(intr); + + struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV); + + CHECK(rogue_instr_set_operand_vreg(instr, 0, nir_dest_reg)); + CHECK(rogue_instr_set_operand_reg(instr, + 1, + ROGUE_OPERAND_TYPE_REG_VERTEX_IN, + vi_reg_index)); + + return true; +} + +static bool trans_nir_intrinsic_load_input(struct rogue_shader *shader, + nir_intrinsic_instr *intr) +{ + switch (shader->stage) { + case MESA_SHADER_FRAGMENT: + return trans_nir_intrinsic_load_input_fs(shader, intr); + + case MESA_SHADER_VERTEX: + return trans_nir_intrinsic_load_input_vs(shader, intr); + + default: + break; + } + + unreachable("Unimplemented NIR load_input variant."); +} + +static bool trans_nir_intrinsic_store_output_fs(struct rogue_shader *shader, + nir_intrinsic_instr *intr) +{ + /* Src/dest validation. */ + assert(nir_src_num_components(intr->src[0]) == 1); + assert(nir_src_bit_size(intr->src[0]) == 32); + assert(!nir_intr_src_is_const(intr, 0)); + + assert(nir_src_num_components(intr->src[1]) == 1); + assert(nir_src_bit_size(intr->src[1]) == 32); + assert(nir_intr_src_is_const(intr, 1)); + + /* Intrinsic index validation. */ + assert(nir_intrinsic_src_type(intr) == nir_type_uint32); + + /* Fetch the output offset. */ + /* TODO: Is this really the right value to use for pixel out reg. num? */ + size_t offset = nir_intr_src_const(intr, 1); + + /* Fetch the components. */ + size_t src_reg = nir_intr_src_regindex(intr, 0); + + /* mov.olchk instruction. */ + struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV); + + CHECK(rogue_instr_set_operand_reg(instr, + 0, + ROGUE_OPERAND_TYPE_REG_PIXEL_OUT, + offset)); + CHECK(rogue_instr_set_operand_vreg(instr, 1, src_reg)); + CHECK(rogue_instr_set_flag(instr, ROGUE_INSTR_FLAG_OLCHK)); + + return true; +} + +static bool trans_nir_intrinsic_store_output_vs(struct rogue_shader *shader, + nir_intrinsic_instr *intr) +{ + struct rogue_vs_build_data *vs_data = &shader->ctx->stage_data.vs; + + /* Src/dest validation. */ + assert(nir_src_num_components(intr->src[0]) == 1); + assert(nir_src_bit_size(intr->src[0]) == 32); + assert(!nir_intr_src_is_const(intr, 0)); + + assert(nir_src_num_components(intr->src[1]) == 1); + assert(nir_src_bit_size(intr->src[1]) == 32); + assert(nir_intr_src_is_const(intr, 1)); + + /* Intrinsic index validation. */ + assert(nir_intrinsic_src_type(intr) == nir_type_float32); + assert(util_bitcount(nir_intrinsic_write_mask(intr)) == 1); + + struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr); + size_t component = nir_intrinsic_component(intr); + size_t vo_index = rogue_output_index_vs(&vs_data->outputs, + io_semantics.location, + component); + + size_t src_reg = nir_intr_src_regindex(intr, 0); + + struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_VTXOUT); + + CHECK(rogue_instr_set_operand_imm(instr, 0, vo_index)); + CHECK(rogue_instr_set_operand_vreg(instr, 1, src_reg)); + + return true; +} + +static bool trans_nir_intrinsic_store_output(struct rogue_shader *shader, + nir_intrinsic_instr *intr) +{ + switch (shader->stage) { + case MESA_SHADER_FRAGMENT: + return trans_nir_intrinsic_store_output_fs(shader, intr); + + case MESA_SHADER_VERTEX: + return trans_nir_intrinsic_store_output_vs(shader, intr); + + default: + break; + } + + unreachable("Unimplemented NIR store_output variant."); +} + +static bool trans_nir_intrinsic_load_ubo(struct rogue_shader *shader, + nir_intrinsic_instr *intr) +{ + struct rogue_ubo_data *ubo_data = + &shader->ctx->common_data[shader->stage].ubo_data; + + /* Src/dest validation. */ + assert(nir_dest_num_components(intr->dest) == 1); + assert(nir_dest_bit_size(intr->dest) == 32); + + assert(nir_src_num_components(intr->src[0]) == 2); + assert(nir_src_bit_size(intr->src[0]) == 32); + assert(nir_intr_src_is_const(intr, 0)); + + assert(nir_src_num_components(intr->src[1]) == 1); + assert(nir_src_bit_size(intr->src[1]) == 32); + assert(nir_intr_src_is_const(intr, 1)); + + /* Intrinsic index validation. */ + assert((nir_intrinsic_range_base(intr) % ROGUE_REG_SIZE_BYTES) == 0); + assert(nir_intrinsic_range(intr) == ROGUE_REG_SIZE_BYTES); + + size_t nir_dest_reg = nir_intr_dest_regindex(intr); + + size_t desc_set = nir_intr_src_comp_const(intr, 0, 0); + size_t binding = nir_intr_src_comp_const(intr, 0, 1); + size_t offset = nir_intrinsic_range_base(intr); + + size_t sh_num = rogue_ubo_reg(ubo_data, desc_set, binding, offset); + + struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV); + + CHECK(rogue_instr_set_operand_vreg(instr, 0, nir_dest_reg)); + CHECK(rogue_instr_set_operand_reg(instr, + 1, + ROGUE_OPERAND_TYPE_REG_SHARED, + sh_num)); + return true; +} + +static bool trans_nir_intrinsic(struct rogue_shader *shader, + nir_intrinsic_instr *intr) +{ + switch (intr->intrinsic) { + case nir_intrinsic_load_input: + return trans_nir_intrinsic_load_input(shader, intr); + + case nir_intrinsic_store_output: + return trans_nir_intrinsic_store_output(shader, intr); + + case nir_intrinsic_load_ubo: + return trans_nir_intrinsic_load_ubo(shader, intr); + + default: + break; + } + + unreachable("Unimplemented NIR intrinsic instruction."); +} + +static bool trans_nir_load_const(struct rogue_shader *shader, + nir_load_const_instr *load_const) +{ + /* Src/dest validation. */ + assert(load_const->def.bit_size == 32); + + /* Ensure that two-component load_consts are used only by load_ubos. */ + if (load_const->def.num_components == 2) { + nir_foreach_use (use_src, &load_const->def) { + nir_instr *instr = use_src->parent_instr; + assert(instr->type == nir_instr_type_intrinsic); + + ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + assert(intr->intrinsic == nir_intrinsic_load_ubo); + } + } else { + assert(load_const->def.num_components == 1); + } + + /* TODO: This is currently done in MOV_IMM, but instead now would be the + * time to lookup the constant value, see if it lives in const regs, or if + * it needs to generate a MOV_IMM (or be constant calc-ed). + */ + return true; +} + +static bool trans_nir_jump_return(struct rogue_shader *shader, + nir_jump_instr *jump) +{ + enum rogue_opcode return_op; + + switch (shader->stage) { + case MESA_SHADER_FRAGMENT: + return_op = ROGUE_OP_END_FRAG; + break; + + case MESA_SHADER_VERTEX: + return_op = ROGUE_OP_END_VERT; + break; + + default: + unreachable("Unimplemented NIR return instruction type."); + } + + rogue_shader_insert(shader, return_op); + + return true; +} + +static bool trans_nir_jump(struct rogue_shader *shader, nir_jump_instr *jump) +{ + switch (jump->type) { + case nir_jump_return: + return trans_nir_jump_return(shader, jump); + + default: + break; + } + + unreachable("Unimplemented NIR jump instruction type."); +} + +/** + * \brief Converts a NIR shader to Rogue. + * + * \param[in] ctx Shared multi-stage build context. + * \param[in] nir NIR shader. + * \return A rogue_shader* if successful, or NULL if unsuccessful. + */ +struct rogue_shader *rogue_nir_to_rogue(struct rogue_build_ctx *ctx, + const nir_shader *nir) +{ + gl_shader_stage stage = nir->info.stage; + struct rogue_shader *shader = rogue_shader_create(ctx, stage); + if (!shader) + return NULL; + + /* Make sure we only have a single function. */ + assert(exec_list_length(&nir->functions) == 1); + + /* Translate shader entrypoint. */ + nir_function_impl *entry = nir_shader_get_entrypoint((nir_shader *)nir); + nir_foreach_block (block, entry) { + nir_foreach_instr (instr, block) { + switch (instr->type) { + case nir_instr_type_alu: + /* TODO: Cleanup on failure. */ + CHECKF(trans_nir_alu(shader, nir_instr_as_alu(instr)), + "Failed to translate NIR ALU instruction."); + break; + + case nir_instr_type_intrinsic: + CHECKF(trans_nir_intrinsic(shader, nir_instr_as_intrinsic(instr)), + "Failed to translate NIR intrinsic instruction."); + break; + + case nir_instr_type_load_const: + CHECKF(trans_nir_load_const(shader, nir_instr_as_load_const(instr)), + "Failed to translate NIR load_const instruction."); + break; + + case nir_instr_type_jump: + CHECKF(trans_nir_jump(shader, nir_instr_as_jump(instr)), + "Failed to translate NIR jump instruction."); + break; + + default: + unreachable("Unimplemented NIR instruction type."); + } + } + } + + /* Perform register allocation. */ + /* TODO: handle failure. */ + if (!rogue_ra_alloc(&shader->instr_list, + shader->ra, + &ctx->common_data[stage].temps, + &ctx->common_data[stage].internals)) + return NULL; + + return shader; +} + +/** + * \brief Creates and sets up a shared multi-stage build context. + * + * \param[in] compiler The compiler context. + * \return A pointer to the new build context, or NULL on failure. + */ +struct rogue_build_ctx * +rogue_create_build_context(struct rogue_compiler *compiler) +{ + struct rogue_build_ctx *ctx; + + ctx = rzalloc_size(compiler, sizeof(*ctx)); + if (!ctx) + return NULL; + + ctx->compiler = compiler; + + /* nir/rogue/binary shaders need to be default-zeroed; + * this is taken care of by rzalloc_size. + */ + + /* Setup non-zero defaults. */ + ctx->stage_data.fs.msaa_mode = ROGUE_MSAA_MODE_PIXEL; + + return ctx; +} diff --git a/src/imagination/rogue/rogue.h b/src/imagination/rogue/rogue.h new file mode 100644 index 00000000000..ca9ff8cbdee --- /dev/null +++ b/src/imagination/rogue/rogue.h @@ -0,0 +1,75 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_H +#define ROGUE_H + +#include +#include + +#include "compiler/shader_enums.h" +#include "nir/nir.h" + +/* All registers are 32-bit in size. */ +#define ROGUE_REG_SIZE_BYTES 4 +#define ROGUE_REG_UNUSED UINT32_MAX + +struct nir_spirv_specialization; +struct rogue_build_ctx; +struct rogue_shader; + +enum rogue_msaa_mode { + ROGUE_MSAA_MODE_UNDEF = 0, /* explicitly treat 0 as undefined */ + /* One task for all samples. */ + ROGUE_MSAA_MODE_PIXEL, + /* For on-edge pixels only: separate tasks for each sample. */ + ROGUE_MSAA_MODE_SELECTIVE, + /* For all pixels: separate tasks for each sample. */ + ROGUE_MSAA_MODE_FULL, +}; + +/** + * \brief Shader binary. + */ +struct rogue_shader_binary { + size_t size; + uint8_t data[]; +}; + +PUBLIC +nir_shader *rogue_spirv_to_nir(struct rogue_build_ctx *ctx, + gl_shader_stage stage, + const char *entry, + size_t spirv_size, + const uint32_t *spirv_data, + unsigned num_spec, + struct nir_spirv_specialization *spec); + +PUBLIC +struct rogue_shader_binary *rogue_to_binary(struct rogue_build_ctx *ctx, + const struct rogue_shader *shader); + +PUBLIC +struct rogue_shader *rogue_nir_to_rogue(struct rogue_build_ctx *ctx, + const nir_shader *nir); +#endif /* ROGUE_H */ diff --git a/src/imagination/rogue/rogue_build_data.c b/src/imagination/rogue/rogue_build_data.c new file mode 100644 index 00000000000..010a10971dd --- /dev/null +++ b/src/imagination/rogue/rogue_build_data.c @@ -0,0 +1,602 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "compiler/shader_enums.h" +#include "nir/nir.h" +#include "rogue_build_data.h" +#include "rogue_nir_helpers.h" +#include "rogue_operand.h" +#include "util/macros.h" + +#define __pvr_address_type uint64_t +#define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr) + +#include "csbgen/rogue_pds.h" + +#undef __pvr_get_address +#undef __pvr_address_type + +/** + * \brief Allocates the coefficient registers that will contain the iterator + * data for the fragment shader input varyings. + * + * \param[in] args The iterator argument data. + * \return The total number of coefficient registers required by the iterators. + */ +static size_t alloc_iterator_regs(struct rogue_iterator_args *args) +{ + size_t coeffs = 0; + + for (size_t u = 0; u < args->num_fpu_iterators; ++u) { + /* Ensure there aren't any gaps. */ + assert(args->base[u] == ~0); + + args->base[u] = coeffs; + coeffs += ROGUE_COEFF_ALIGN * args->components[u]; + } + + return coeffs; +} + +/** + * \brief Reserves an iterator for a fragment shader input varying, + * and calculates its setup data. + * + * \param[in] args The iterator argument data. + * \param[in] i The iterator index. + * \param[in] type The interpolation type of the varying. + * \param[in] f16 Whether the data type is F16 or F32. + * \param[in] components The number of components in the varying. + */ +static void reserve_iterator(struct rogue_iterator_args *args, + size_t i, + enum glsl_interp_mode type, + bool f16, + size_t components) +{ + struct ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC data = { 0 }; + + assert(components >= 1 && components <= 4); + + /* The first iterator (W) *must* be INTERP_MODE_NOPERSPECTIVE. */ + assert(i > 0 || type == INTERP_MODE_NOPERSPECTIVE); + assert(i < ARRAY_SIZE(args->fpu_iterators)); + + switch (type) { + /* Default interpolation is smooth. */ + case INTERP_MODE_NONE: + data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD; + data.perspective = true; + break; + + case INTERP_MODE_NOPERSPECTIVE: + data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD; + data.perspective = false; + break; + + default: + unreachable("Unimplemented interpolation type."); + } + + /* Number of components in this varying + * (corresponds to ROGUE_PDSINST_DOUTI_SIZE_1..4D). + */ + data.size = (components - 1); + + /* TODO: Investigate F16 support. */ + assert(!f16); + data.f16 = f16; + + /* Offsets within the vertex. */ + data.f32_offset = 2 * i; + data.f16_offset = data.f32_offset; + + ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_pack(&args->fpu_iterators[i], &data); + args->destination[i] = i; + args->base[i] = ~0; + args->components[i] = components; + ++args->num_fpu_iterators; +} + +/** + * \brief Collects the fragment shader I/O data to feed-back to the driver. + * + * \sa #collect_io_data() + * + * \param[in] common_data Common build data. + * \param[in] fs_data Fragment-specific build data. + * \param[in] nir NIR fragment shader. + * \return true if successful, otherwise false. + */ +static bool collect_io_data_fs(struct rogue_common_build_data *common_data, + struct rogue_fs_build_data *fs_data, + nir_shader *nir) +{ + size_t num_inputs = nir_count_variables_with_modes(nir, nir_var_shader_in); + assert(num_inputs < (ARRAY_SIZE(fs_data->iterator_args.fpu_iterators) - 1)); + + /* Process inputs (if present). */ + if (num_inputs) { + /* If the fragment shader has inputs, the first iterator + * must be used for the W component. + */ + reserve_iterator(&fs_data->iterator_args, + 0, + INTERP_MODE_NOPERSPECTIVE, + false, + 1); + + nir_foreach_shader_in_variable (var, nir) { + size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1; + size_t components = glsl_get_components(var->type); + enum glsl_interp_mode interp = var->data.interpolation; + bool f16 = glsl_type_is_16bit(var->type); + + /* Check that arguments are either F16 or F32. */ + assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT); + assert(f16 || glsl_type_is_32bit(var->type)); + + /* Check input location. */ + assert(var->data.location >= VARYING_SLOT_VAR0 && + var->data.location <= VARYING_SLOT_VAR31); + + reserve_iterator(&fs_data->iterator_args, i, interp, f16, components); + } + + common_data->coeffs = alloc_iterator_regs(&fs_data->iterator_args); + assert(common_data->coeffs); + assert(common_data->coeffs < ROGUE_MAX_REG_COEFF); + } + + /* TODO: Process outputs. */ + + return true; +} + +/** + * \brief Allocates the vertex shader input registers. + * + * \param[in] inputs The vertex shader input data. + * \return The total number of vertex input registers required. + */ +static size_t alloc_vs_inputs(struct rogue_vertex_inputs *inputs) +{ + size_t vs_inputs = 0; + + for (size_t u = 0; u < inputs->num_input_vars; ++u) { + /* Ensure there aren't any gaps. */ + assert(inputs->base[u] == ~0); + + inputs->base[u] = vs_inputs; + vs_inputs += inputs->components[u]; + } + + return vs_inputs; +} + +/** + * \brief Allocates the vertex shader outputs. + * + * \param[in] outputs The vertex shader output data. + * \return The total number of vertex outputs required. + */ +static size_t alloc_vs_outputs(struct rogue_vertex_outputs *outputs) +{ + size_t vs_outputs = 0; + + for (size_t u = 0; u < outputs->num_output_vars; ++u) { + /* Ensure there aren't any gaps. */ + assert(outputs->base[u] == ~0); + + outputs->base[u] = vs_outputs; + vs_outputs += outputs->components[u]; + } + + return vs_outputs; +} + +/** + * \brief Counts the varyings used by the vertex shader. + * + * \param[in] outputs The vertex shader output data. + * \return The number of varyings used. + */ +static size_t count_vs_varyings(struct rogue_vertex_outputs *outputs) +{ + size_t varyings = 0; + + /* Skip the position. */ + for (size_t u = 1; u < outputs->num_output_vars; ++u) + varyings += outputs->components[u]; + + return varyings; +} + +/** + * \brief Reserves space for a vertex shader input. + * + * \param[in] inputs The vertex input data. + * \param[in] i The vertex input index. + * \param[in] components The number of components in the input. + */ +static void reserve_vs_input(struct rogue_vertex_inputs *inputs, + size_t i, + size_t components) +{ + assert(components >= 1 && components <= 4); + + assert(i < ARRAY_SIZE(inputs->base)); + + inputs->base[i] = ~0; + inputs->components[i] = components; + ++inputs->num_input_vars; +} + +/** + * \brief Reserves space for a vertex shader output. + * + * \param[in] outputs The vertex output data. + * \param[in] i The vertex output index. + * \param[in] components The number of components in the output. + */ +static void reserve_vs_output(struct rogue_vertex_outputs *outputs, + size_t i, + size_t components) +{ + assert(components >= 1 && components <= 4); + + assert(i < ARRAY_SIZE(outputs->base)); + + outputs->base[i] = ~0; + outputs->components[i] = components; + ++outputs->num_output_vars; +} + +/** + * \brief Collects the vertex shader I/O data to feed-back to the driver. + * + * \sa #collect_io_data() + * + * \param[in] common_data Common build data. + * \param[in] vs_data Vertex-specific build data. + * \param[in] nir NIR vertex shader. + * \return true if successful, otherwise false. + */ +static bool collect_io_data_vs(struct rogue_common_build_data *common_data, + struct rogue_vs_build_data *vs_data, + nir_shader *nir) +{ + ASSERTED bool out_pos_present = false; + ASSERTED size_t num_outputs = + nir_count_variables_with_modes(nir, nir_var_shader_out); + + /* Process inputs. */ + nir_foreach_shader_in_variable (var, nir) { + size_t components = glsl_get_components(var->type); + size_t i = var->data.location - VERT_ATTRIB_GENERIC0; + + /* Check that inputs are F32. */ + /* TODO: Support other types. */ + assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT); + assert(glsl_type_is_32bit(var->type)); + + /* Check input location. */ + assert(var->data.location >= VERT_ATTRIB_GENERIC0 && + var->data.location <= VERT_ATTRIB_GENERIC15); + + reserve_vs_input(&vs_data->inputs, i, components); + } + + vs_data->num_vertex_input_regs = alloc_vs_inputs(&vs_data->inputs); + assert(vs_data->num_vertex_input_regs); + assert(vs_data->num_vertex_input_regs < ROGUE_MAX_REG_VERTEX_IN); + + /* Process outputs. */ + + /* We should always have at least a position variable. */ + assert(num_outputs > 0 && "Invalid number of vertex shader outputs."); + + nir_foreach_shader_out_variable (var, nir) { + size_t components = glsl_get_components(var->type); + + /* Check that outputs are F32. */ + /* TODO: Support other types. */ + assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT); + assert(glsl_type_is_32bit(var->type)); + + if (var->data.location == VARYING_SLOT_POS) { + assert(components == 4); + out_pos_present = true; + + reserve_vs_output(&vs_data->outputs, 0, components); + } else if ((var->data.location >= VARYING_SLOT_VAR0) && + (var->data.location <= VARYING_SLOT_VAR31)) { + size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1; + reserve_vs_output(&vs_data->outputs, i, components); + } else { + unreachable("Unsupported vertex output type."); + } + } + + /* Always need the output position to be present. */ + assert(out_pos_present); + + vs_data->num_vertex_outputs = alloc_vs_outputs(&vs_data->outputs); + assert(vs_data->num_vertex_outputs); + assert(vs_data->num_vertex_outputs < ROGUE_MAX_VERTEX_OUTPUTS); + + vs_data->num_varyings = count_vs_varyings(&vs_data->outputs); + + return true; +} + +/** + * \brief Allocates the shared registers that will contain the UBOs. + * + * \param[in] ubo_data The UBO data. + * \return The total number of coefficient registers required by the iterators. + */ +static size_t alloc_ubos(struct rogue_ubo_data *ubo_data) +{ + size_t shareds = 0; + + for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) { + /* Ensure there aren't any gaps. */ + assert(ubo_data->dest[u] == ~0); + + ubo_data->dest[u] = shareds; + shareds += ubo_data->size[u]; + } + + return shareds; +} + +/** + * \brief Reserves a UBO and calculates its data. + * + * \param[in] ubo_data The UBO data. + * \param[in] desc_set The UBO descriptor set. + * \param[in] binding The UBO binding. + * \param[in] size The size required by the UBO (in dwords). + */ +static void reserve_ubo(struct rogue_ubo_data *ubo_data, + size_t desc_set, + size_t binding, + size_t size) +{ + size_t i = ubo_data->num_ubo_entries; + assert(i < ARRAY_SIZE(ubo_data->desc_set)); + + ubo_data->desc_set[i] = desc_set; + ubo_data->binding[i] = binding; + ubo_data->dest[i] = ~0; + ubo_data->size[i] = size; + ++ubo_data->num_ubo_entries; +} + +/** + * \brief Collects UBO data to feed-back to the driver. + * + * \param[in] common_data Common build data. + * \param[in] nir NIR shader. + * \return true if successful, otherwise false. + */ +static bool collect_ubo_data(struct rogue_common_build_data *common_data, + nir_shader *nir) +{ + /* Iterate over each UBO. */ + nir_foreach_variable_with_modes (var, nir, nir_var_mem_ubo) { + size_t desc_set = var->data.driver_location; + size_t binding = var->data.binding; + size_t ubo_size_regs = 0; + + nir_function_impl *entry = nir_shader_get_entrypoint(nir); + /* Iterate over each load_ubo that uses this UBO. */ + nir_foreach_block (block, entry) { + nir_foreach_instr (instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_ubo) + continue; + + assert(nir_src_num_components(intr->src[0]) == 2); + assert(nir_intr_src_is_const(intr, 0)); + + size_t load_desc_set = nir_intr_src_comp_const(intr, 0, 0); + size_t load_binding = nir_intr_src_comp_const(intr, 0, 1); + + if (load_desc_set != desc_set || load_binding != binding) + continue; + + ASSERTED size_t size_bytes = nir_intrinsic_range(intr); + assert(size_bytes == ROGUE_REG_SIZE_BYTES); + + size_t offset_bytes = nir_intrinsic_range_base(intr); + assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES)); + + size_t offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES; + + /* TODO: Put offsets in a BITSET_DECLARE and check for gaps. */ + + /* Find the largest load offset. */ + ubo_size_regs = MAX2(ubo_size_regs, offset_regs); + } + } + + /* UBO size = largest offset + 1. */ + ++ubo_size_regs; + + reserve_ubo(&common_data->ubo_data, desc_set, binding, ubo_size_regs); + } + + common_data->shareds = alloc_ubos(&common_data->ubo_data); + assert(common_data->shareds < ROGUE_MAX_REG_SHARED); + + return true; +} + +/** + * \brief Collects I/O data to feed-back to the driver. + * + * Collects the inputs/outputs/memory required, and feeds that back to the + * driver. Done at this stage rather than at the start of rogue_to_binary, so + * that all the I/O of all the shader stages is known before backend + * compilation, which would let us do things like cull unused inputs. + * + * \param[in] ctx Shared multi-stage build context. + * \param[in] nir NIR shader. + * \return true if successful, otherwise false. + */ +bool rogue_collect_io_data(struct rogue_build_ctx *ctx, nir_shader *nir) +{ + gl_shader_stage stage = nir->info.stage; + struct rogue_common_build_data *common_data = &ctx->common_data[stage]; + + /* Collect stage-agnostic data. */ + if (!collect_ubo_data(common_data, nir)) + return false; + + /* Collect stage-specific data. */ + switch (stage) { + case MESA_SHADER_FRAGMENT: + return collect_io_data_fs(common_data, &ctx->stage_data.fs, nir); + + case MESA_SHADER_VERTEX: + return collect_io_data_vs(common_data, &ctx->stage_data.vs, nir); + + default: + break; + } + + return false; +} + +/** + * \brief Returns the allocated coefficient register index for a component of an + * input varying location. + * + * \param[in] args The allocated iterator argument data. + * \param[in] location The input varying location, or ~0 for the W coefficient. + * \param[in] component The requested component. + * \return The coefficient register index. + */ +size_t rogue_coeff_index_fs(struct rogue_iterator_args *args, + gl_varying_slot location, + size_t component) +{ + size_t i; + + /* Special case: W coefficient. */ + if (location == ~0) { + /* The W component shouldn't be the only one. */ + assert(args->num_fpu_iterators > 1); + assert(args->destination[0] == 0); + return 0; + } + + i = (location - VARYING_SLOT_VAR0) + 1; + assert(location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31); + assert(i < args->num_fpu_iterators); + assert(component < args->components[i]); + assert(args->base[i] != ~0); + + return args->base[i] + (ROGUE_COEFF_ALIGN * component); +} + +/** + * \brief Returns the allocated vertex output index for a component of an input + * varying location. + * + * \param[in] outputs The vertex output data. + * \param[in] location The output varying location. + * \param[in] component The requested component. + * \return The vertex output index. + */ +size_t rogue_output_index_vs(struct rogue_vertex_outputs *outputs, + gl_varying_slot location, + size_t component) +{ + size_t i; + + if (location == VARYING_SLOT_POS) { + /* Always at location 0. */ + assert(outputs->base[0] == 0); + i = 0; + } else if ((location >= VARYING_SLOT_VAR0) && + (location <= VARYING_SLOT_VAR31)) { + i = (location - VARYING_SLOT_VAR0) + 1; + } else { + unreachable("Unsupported vertex output type."); + } + + assert(i < outputs->num_output_vars); + assert(component < outputs->components[i]); + assert(outputs->base[i] != ~0); + + return outputs->base[i] + component; +} + +/** + * \brief Returns the allocated shared register index for a given UBO offset. + * + * \param[in] ubo_data The UBO data. + * \param[in] desc_set The UBO descriptor set. + * \param[in] binding The UBO binding. + * \param[in] offset_bytes The UBO offset in bytes. + * \return The UBO offset shared register index. + */ +size_t rogue_ubo_reg(struct rogue_ubo_data *ubo_data, + size_t desc_set, + size_t binding, + size_t offset_bytes) +{ + size_t ubo_index = ~0; + size_t offset_regs; + + /* Find UBO located at (desc_set, binding). */ + for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) { + if (ubo_data->dest[u] == ~0) + continue; + + if (ubo_data->desc_set[u] != desc_set || ubo_data->binding[u] != binding) + continue; + + ubo_index = u; + break; + } + + assert(ubo_index != ~0); + + assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES)); + offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES; + + return ubo_data->dest[ubo_index] + offset_regs; +} diff --git a/src/imagination/rogue/rogue_build_data.h b/src/imagination/rogue/rogue_build_data.h new file mode 100644 index 00000000000..b1d178a52a3 --- /dev/null +++ b/src/imagination/rogue/rogue_build_data.h @@ -0,0 +1,163 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_BUILD_DATA_H +#define ROGUE_BUILD_DATA_H + +#include +#include +#include + +#include "compiler/shader_enums.h" +#include "nir/nir.h" +#include "rogue.h" + +/* Max number of I/O varying variables. + * Fragment shader: MAX_VARYING + 1 (W coefficient). + * Vertex shader: MAX_VARYING + 1 (position slot). + */ +#define ROGUE_MAX_IO_VARYING_VARS (MAX_VARYING + 1) + +/* VERT_ATTRIB_GENERIC0-15 */ +#define ROGUE_MAX_IO_ATTRIB_VARS 16 + +/* Max buffers entries that can be used. */ +/* TODO: Currently UBOs are the only supported buffers. */ +#define ROGUE_MAX_BUFFERS 24 + +struct rogue_compiler; +struct rogue_shader; +struct rogue_shader_binary; + +/** + * \brief UBO data. + */ +struct rogue_ubo_data { + size_t num_ubo_entries; + size_t desc_set[ROGUE_MAX_BUFFERS]; + size_t binding[ROGUE_MAX_BUFFERS]; + size_t dest[ROGUE_MAX_BUFFERS]; + size_t size[ROGUE_MAX_BUFFERS]; +}; + +/** + * \brief Per-stage common build data. + */ +struct rogue_common_build_data { + size_t temps; + size_t internals; + size_t coeffs; + size_t shareds; + + struct rogue_ubo_data ubo_data; +}; + +/** + * \brief Arguments for the FPU iterator(s) + * (produces varyings for the fragment shader). + */ +struct rogue_iterator_args { + uint32_t num_fpu_iterators; + uint32_t fpu_iterators[ROGUE_MAX_IO_VARYING_VARS]; + uint32_t destination[ROGUE_MAX_IO_VARYING_VARS]; + size_t base[ROGUE_MAX_IO_VARYING_VARS]; + size_t components[ROGUE_MAX_IO_VARYING_VARS]; +}; + +/** + * \brief Vertex input register allocations. + */ +struct rogue_vertex_inputs { + size_t num_input_vars; + size_t base[ROGUE_MAX_IO_ATTRIB_VARS]; + size_t components[ROGUE_MAX_IO_ATTRIB_VARS]; +}; + +/** + * \brief Vertex output allocations. + */ +struct rogue_vertex_outputs { + size_t num_output_vars; + size_t base[ROGUE_MAX_IO_VARYING_VARS]; + size_t components[ROGUE_MAX_IO_VARYING_VARS]; +}; + +/** + * \brief Stage-specific build data. + */ +struct rogue_build_data { + struct rogue_fs_build_data { + struct rogue_iterator_args iterator_args; + enum rogue_msaa_mode msaa_mode; + bool phas; /* Indicates the presence of PHAS instruction. */ + } fs; + struct rogue_vs_build_data { + struct rogue_vertex_inputs inputs; + size_t num_vertex_input_regs; /* Final number of inputs. */ + + struct rogue_vertex_outputs outputs; + size_t num_vertex_outputs; /* Final number of outputs. */ + + size_t num_varyings; /* Final number of varyings. */ + } vs; +}; + +/** + * \brief Shared multi-stage build context. + */ +struct rogue_build_ctx { + struct rogue_compiler *compiler; + + /* Shaders in various stages of compilations. */ + nir_shader *nir[MESA_SHADER_FRAGMENT + 1]; + struct rogue_shader *rogue[MESA_SHADER_FRAGMENT + 1]; + struct rogue_shader_binary *binary[MESA_SHADER_FRAGMENT + 1]; + + struct rogue_common_build_data common_data[MESA_SHADER_FRAGMENT + 1]; + struct rogue_build_data stage_data; +}; + +PUBLIC +struct rogue_build_ctx * +rogue_create_build_context(struct rogue_compiler *compiler); + +PUBLIC +bool rogue_collect_io_data(struct rogue_build_ctx *ctx, nir_shader *nir); + +PUBLIC +size_t rogue_coeff_index_fs(struct rogue_iterator_args *args, + gl_varying_slot location, + size_t component); + +PUBLIC +size_t rogue_output_index_vs(struct rogue_vertex_outputs *outputs, + gl_varying_slot location, + size_t component); + +PUBLIC +size_t rogue_ubo_reg(struct rogue_ubo_data *ubo_data, + size_t desc_set, + size_t binding, + size_t offset_bytes); + +#endif /* ROGUE_BUILD_DATA_H */ diff --git a/src/imagination/rogue/rogue_compiler.c b/src/imagination/rogue/rogue_compiler.c new file mode 100644 index 00000000000..9ae95b076bb --- /dev/null +++ b/src/imagination/rogue/rogue_compiler.c @@ -0,0 +1,72 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "compiler/glsl_types.h" +#include "rogue_compiler.h" +#include "util/ralloc.h" + +/** + * \file rogue_compiler.c + * + * \brief Contains the Rogue compiler interface. + */ + +/** + * \brief Creates and sets up a Rogue compiler context. + * + * \param[in] dev_info Device info pointer. + * \return A pointer to the new compiler context, or NULL on failure. + */ +struct rogue_compiler * +rogue_compiler_create(const struct pvr_device_info *dev_info) +{ + struct rogue_compiler *compiler; + + compiler = rzalloc_size(NULL, sizeof(*compiler)); + if (!compiler) + return NULL; + + compiler->dev_info = dev_info; + + /* TODO: Additional compiler setup (allocators? error message output + * location?). + */ + + glsl_type_singleton_init_or_ref(); + + return compiler; +} + +/** + * \brief Destroys and frees a compiler context. + * + * \param[in] compiler The compiler context. + */ +void rogue_compiler_destroy(struct rogue_compiler *compiler) +{ + glsl_type_singleton_decref(); + + ralloc_free(compiler); +} diff --git a/src/imagination/rogue/rogue_compiler.h b/src/imagination/rogue/rogue_compiler.h new file mode 100644 index 00000000000..ed7b6fc720b --- /dev/null +++ b/src/imagination/rogue/rogue_compiler.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_COMPILER_H +#define ROGUE_COMPILER_H + +#include "util/macros.h" + +struct pvr_device_info; + +/** + * \brief Compiler context. + */ +struct rogue_compiler { + const struct pvr_device_info *dev_info; +}; + +PUBLIC +struct rogue_compiler * +rogue_compiler_create(const struct pvr_device_info *dev_info); + +PUBLIC +void rogue_compiler_destroy(struct rogue_compiler *compiler); + +#endif /* ROGUE_COMPILER_H */ diff --git a/src/imagination/rogue/rogue_constreg.c b/src/imagination/rogue/rogue_constreg.c new file mode 100644 index 00000000000..14c193cf945 --- /dev/null +++ b/src/imagination/rogue/rogue_constreg.c @@ -0,0 +1,199 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "rogue_constreg.h" +#include "util/macros.h" + +/** + * \file rogue_constreg.c + * + * \brief Contains functions to find and allocate constant register values. + */ + +/** + * \brief Mapping of constant register values and their indices. + */ +struct rogue_constreg { + uint32_t value; + size_t index; +}; + +#define CONSTREG(VALUE, INDEX) \ + { \ + .value = (VALUE), .index = (INDEX), \ + } + +/** + * \brief Constant register values (sorted for bsearch). + */ +static const struct rogue_constreg const_regs[] = { + CONSTREG(0x00000000U, 0U), /* 0 (INT32) / 0.0 (Float) */ + CONSTREG(0x00000001U, 1U), /* 1 (INT32) */ + CONSTREG(0x00000002U, 2U), /* 2 (INT32) */ + CONSTREG(0x00000003U, 3U), /* 3 (INT32) */ + CONSTREG(0x00000004U, 4U), /* 4 (INT32) */ + CONSTREG(0x00000005U, 5U), /* 5 (INT32) */ + CONSTREG(0x00000006U, 6U), /* 6 (INT32) */ + CONSTREG(0x00000007U, 7U), /* 7 (INT32) */ + CONSTREG(0x00000008U, 8U), /* 8 (INT32) */ + CONSTREG(0x00000009U, 9U), /* 9 (INT32) */ + CONSTREG(0x0000000aU, 10U), /* 10 (INT32) */ + CONSTREG(0x0000000bU, 11U), /* 11 (INT32) */ + CONSTREG(0x0000000cU, 12U), /* 12 (INT32) */ + CONSTREG(0x0000000dU, 13U), /* 13 (INT32) */ + CONSTREG(0x0000000eU, 14U), /* 14 (INT32) */ + CONSTREG(0x0000000fU, 15U), /* 15 (INT32) */ + CONSTREG(0x00000010U, 16U), /* 16 (INT32) */ + CONSTREG(0x00000011U, 17U), /* 17 (INT32) */ + CONSTREG(0x00000012U, 18U), /* 18 (INT32) */ + CONSTREG(0x00000013U, 19U), /* 19 (INT32) */ + CONSTREG(0x00000014U, 20U), /* 20 (INT32) */ + CONSTREG(0x00000015U, 21U), /* 21 (INT32) */ + CONSTREG(0x00000016U, 22U), /* 22 (INT32) */ + CONSTREG(0x00000017U, 23U), /* 23 (INT32) */ + CONSTREG(0x00000018U, 24U), /* 24 (INT32) */ + CONSTREG(0x00000019U, 25U), /* 25 (INT32) */ + CONSTREG(0x0000001aU, 26U), /* 26 (INT32) */ + CONSTREG(0x0000001bU, 27U), /* 27 (INT32) */ + CONSTREG(0x0000001cU, 28U), /* 28 (INT32) */ + CONSTREG(0x0000001dU, 29U), /* 29 (INT32) */ + CONSTREG(0x0000001eU, 30U), /* 30 (INT32) */ + CONSTREG(0x0000001fU, 31U), /* 31 (INT32) */ + CONSTREG(0x0000007fU, 147U), /* 127 (INT32) */ + + CONSTREG(0x37800000U, 134U), /* 1.0f/65536f */ + CONSTREG(0x38000000U, 135U), /* 1.0f/32768f */ + CONSTREG(0x38800000U, 88U), /* float(2^-14) */ + CONSTREG(0x39000000U, 87U), /* float(2^-13) */ + CONSTREG(0x39800000U, 86U), /* float(2^-12) */ + CONSTREG(0x3a000000U, 85U), /* float(2^-11) */ + CONSTREG(0x3a800000U, 84U), /* float(2^-10) */ + CONSTREG(0x3b000000U, 83U), /* float(2^-9) */ + CONSTREG(0x3b4d2e1cU, 136U), /* 0.0031308f */ + CONSTREG(0x3b800000U, 82U), /* float(2^-8) */ + CONSTREG(0x3c000000U, 81U), /* float(2^-7) */ + CONSTREG(0x3c800000U, 80U), /* float(2^-6) */ + CONSTREG(0x3d000000U, 79U), /* float(2^-5) */ + CONSTREG(0x3d25aee6U, 156U), /* 0.04045f */ + CONSTREG(0x3d6147aeU, 140U), /* 0.055f */ + CONSTREG(0x3d800000U, 78U), /* float(2^-4) */ + CONSTREG(0x3d9e8391U, 157U), /* 1.0f/12.92f */ + CONSTREG(0x3e000000U, 77U), /* float(2^-3) */ + CONSTREG(0x3e2aaaabU, 153U), /* 1/6 */ + CONSTREG(0x3e800000U, 76U), /* float(2^-2) */ + CONSTREG(0x3e9a209bU, 145U), /* Log_10(2) */ + CONSTREG(0x3ea2f983U, 128U), /* Float 1/PI */ + CONSTREG(0x3eaaaaabU, 152U), /* 1/3 */ + CONSTREG(0x3ebc5ab2U, 90U), /* 1/e */ + CONSTREG(0x3ed55555U, 138U), /* 1.0f/2.4f */ + CONSTREG(0x3f000000U, 75U), /* float(2^-1) */ + CONSTREG(0x3f22f983U, 129U), /* Float 2/PI */ + CONSTREG(0x3f317218U, 146U), /* Log_e(2) */ + CONSTREG(0x3f3504f3U, 92U), /* Float 1/SQRT(2) */ + CONSTREG(0x3f490fdbU, 93U), /* Float PI/4 */ + CONSTREG(0x3f72a76fU, 158U), /* 1.0f/1.055f */ + CONSTREG(0x3f800000U, 64U), /* 1.0f */ + CONSTREG(0x3f860a92U, 151U), /* Pi/3 */ + CONSTREG(0x3f870a3dU, 139U), /* 1.055f */ + CONSTREG(0x3fa2f983U, 130U), /* Float 4/PI */ + CONSTREG(0x3fb504f3U, 91U), /* Float SQRT(2) */ + CONSTREG(0x3fb8aa3bU, 155U), /* Log_2(e) */ + CONSTREG(0x3fc90fdbU, 94U), /* Float PI/2 */ + CONSTREG(0x40000000U, 65U), /* float(2^1) */ + CONSTREG(0x4019999aU, 159U), /* 2.4f */ + CONSTREG(0x402df854U, 89U), /* e */ + CONSTREG(0x40490fdbU, 95U), /* Float PI */ + CONSTREG(0x40549a78U, 154U), /* Log_2(10) */ + CONSTREG(0x40800000U, 66U), /* float(2^2) */ + CONSTREG(0x40c90fdbU, 131U), /* Float 2*PI */ + CONSTREG(0x41000000U, 67U), /* float(2^3) */ + CONSTREG(0x41490fdbU, 132U), /* Float 4*PI */ + CONSTREG(0x414eb852U, 137U), /* 12.92f */ + CONSTREG(0x41800000U, 68U), /* float(2^4) */ + CONSTREG(0x41c90fdbU, 133U), /* Float 8*PI */ + CONSTREG(0x42000000U, 69U), /* float(2^5) */ + CONSTREG(0x42800000U, 70U), /* float(2^6) */ + CONSTREG(0x43000000U, 71U), /* float(2^7) */ + CONSTREG(0x43800000U, 72U), /* float(2^8) */ + CONSTREG(0x44000000U, 73U), /* float(2^9) */ + CONSTREG(0x44800000U, 74U), /* float(2^10) */ + CONSTREG(0x4b000000U, 149U), /* 2^23 */ + CONSTREG(0x4b800000U, 150U), /* 2^24 */ + CONSTREG(0x7f7fffffU, 148U), /* FLT_MAX */ + CONSTREG(0x7f800000U, 142U), /* Infinity */ + CONSTREG(0x7fff7fffU, 144U), /* ARGB1555 mask */ + CONSTREG(0x80000000U, 141U), /* -0.0f */ + CONSTREG(0xffffffffU, 143U), /* -1 */ +}; + +#undef CONSTREG + +/** + * \brief Comparison function for bsearch() to support struct rogue_constreg. + * + * \param[in] lhs The left hand side of the comparison. + * \param[in] rhs The right hand side of the comparison. + * \return 0 if (lhs == rhs), -1 if (lhs < rhs), 1 if (lhs > rhs). + */ +static int constreg_cmp(const void *lhs, const void *rhs) +{ + const struct rogue_constreg *l = lhs; + const struct rogue_constreg *r = rhs; + + if (l->value < r->value) + return -1; + else if (l->value > r->value) + return 1; + + return 0; +} + +/** + * \brief Determines whether a given integer value exists in a constant + * register. + * + * \param[in] value The value required. + * \return The index of the constant register containing the value, or + * ROGUE_NO_CONST_REG if the value is not found. + */ +size_t rogue_constreg_lookup(uint32_t value) +{ + struct rogue_constreg constreg_target = { + .value = value, + }; + const struct rogue_constreg *constreg; + + constreg = bsearch(&constreg_target, + const_regs, + ARRAY_SIZE(const_regs), + sizeof(struct rogue_constreg), + constreg_cmp); + if (!constreg) + return ROGUE_NO_CONST_REG; + + return constreg->index; +} diff --git a/src/imagination/rogue/rogue_constreg.h b/src/imagination/rogue/rogue_constreg.h new file mode 100644 index 00000000000..1d67857a955 --- /dev/null +++ b/src/imagination/rogue/rogue_constreg.h @@ -0,0 +1,51 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_CONSTREGS_H +#define ROGUE_CONSTREGS_H + +#include +#include + +#include "util/macros.h" +#include "util/u_math.h" + +#define ROGUE_NO_CONST_REG SIZE_MAX + +PUBLIC +size_t rogue_constreg_lookup(uint32_t value); + +/** + * \brief Determines whether a given floating point value exists in a constant + * register. + * + * \param[in] value The value required. + * \return The index of the constant register containing the value, or + * ROGUE_NO_CONST_REG if the value is not found. + */ +static inline size_t rogue_constreg_lookup_float(float value) +{ + return rogue_constreg_lookup(fui(value)); +} + +#endif /* ROGUE_CONSTREGS_H */ diff --git a/src/imagination/rogue/rogue_dump.c b/src/imagination/rogue/rogue_dump.c new file mode 100644 index 00000000000..ee21be04f93 --- /dev/null +++ b/src/imagination/rogue/rogue_dump.c @@ -0,0 +1,170 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "rogue_dump.h" +#include "rogue_shader.h" +#include "rogue_util.h" +#include "util/bitscan.h" + +/** + * \file rogue_dump.c + * + * \brief Contains functions to dump Rogue data structures into a textual + * format. + */ + +static const char *const rogue_operand_string[ROGUE_OPERAND_TYPE_COUNT] = { + [ROGUE_OPERAND_TYPE_REG_TEMP] = "r", + [ROGUE_OPERAND_TYPE_REG_COEFF] = "cf", + [ROGUE_OPERAND_TYPE_REG_CONST] = "c", + [ROGUE_OPERAND_TYPE_REG_SHARED] = "sh", + [ROGUE_OPERAND_TYPE_REG_PIXEL_OUT] = "po", + [ROGUE_OPERAND_TYPE_REG_VERTEX_IN] = "vi", + [ROGUE_OPERAND_TYPE_REG_INTERNAL] = "i", + [ROGUE_OPERAND_TYPE_IMMEDIATE] = "#", + [ROGUE_OPERAND_TYPE_DRC] = "drc", + [ROGUE_OPERAND_TYPE_VREG] = "V", +}; + +static const char *const rogue_opcode_string[ROGUE_OP_COUNT] = { + [ROGUE_OP_NOP] = "nop", + [ROGUE_OP_END_FRAG] = "end.frag", + [ROGUE_OP_END_VERT] = "end.vert", + [ROGUE_OP_WDF] = "wdf", + [ROGUE_OP_PIX_ITER_W] = "pixiter.w", + [ROGUE_OP_MAX] = "max", + [ROGUE_OP_MIN] = "min", + [ROGUE_OP_PACK_U8888] = "pack.u8888", + [ROGUE_OP_MOV] = "mov", + [ROGUE_OP_MOV_IMM] = "mov.imm", + [ROGUE_OP_FMA] = "fma", + [ROGUE_OP_MUL] = "mul", + [ROGUE_OP_VTXOUT] = "vtxout", +}; + +static const char *const rogue_instr_flag_string[ROGUE_INSTR_FLAG_COUNT] = { + [ROGUE_INSTR_FLAG_SAT] = "sat", + [ROGUE_INSTR_FLAG_LP] = "lp", + [ROGUE_INSTR_FLAG_OLCHK] = "olchk", +}; + +static const char rogue_vector_string[4] = { + 'x', + 'y', + 'z', + 'w', +}; + +/** + * \brief Dumps an operand as text to a file pointer. + * + * \param[in] operand The operand. + * \param[in] fp The file pointer. + * \return true if successful, otherwise false. + */ +bool rogue_dump_operand(const struct rogue_operand *operand, FILE *fp) +{ + ASSERT_OPERAND_RANGE(operand->type); + + fprintf(fp, "%s", rogue_operand_string[operand->type]); + + if (operand->type == ROGUE_OPERAND_TYPE_IMMEDIATE) + fprintf(fp, "%" PRIu64, operand->immediate.value); + else if (operand->type == ROGUE_OPERAND_TYPE_DRC) + fprintf(fp, "%zu", operand->drc.number); + else if (rogue_check_bitset(rogue_onehot(operand->type), ROGUE_MASK_ANY_REG)) + fprintf(fp, "%zu", operand->reg.number); + else if (operand->type == ROGUE_OPERAND_TYPE_VREG) { + fprintf(fp, "%zu", operand->vreg.number); + if (operand->vreg.is_vector) + fprintf(fp, ".%c", rogue_vector_string[operand->vreg.component]); + } + + return true; +} + +/** + * \brief Dumps an instruction as text to a file pointer. + * + * \param[in] instr The instruction. + * \param[in] fp The file pointer. + * \return true if successful, otherwise false. + */ +bool rogue_dump_instr(const struct rogue_instr *instr, FILE *fp) +{ + uint64_t flags = 0U; + + ASSERT_OPCODE_RANGE(instr->opcode); + + flags = instr->flags; + + fprintf(fp, "%s", rogue_opcode_string[instr->opcode]); + + /* Iterate over each flag bit and print its string form. */ + while (flags) { + uint64_t flag = u_bit_scan64(&flags); + ASSERT_INSTR_FLAG_RANGE(flag); + fprintf(fp, ".%s", rogue_instr_flag_string[flag]); + } + + if (instr->num_operands) + fprintf(fp, " "); + + /* Dump each operand. */ + for (size_t u = 0U; u < instr->num_operands; ++u) { + CHECKF(rogue_dump_operand(&instr->operands[u], fp), + "Failed to dump operand."); + if (u < (instr->num_operands - 1)) + fprintf(fp, ", "); + } + + fprintf(fp, ";"); + + return true; +} + +/** + * \brief Dumps a shader as text to a file pointer. + * + * \param[in] shader The shader. + * \param[in] fp The file pointer. + * \return true if successful, otherwise false. + */ +bool rogue_dump_shader(const struct rogue_shader *shader, FILE *fp) +{ + /* Dump the shader stage. */ + fprintf(fp, "# %s shader\n", _mesa_shader_stage_to_string(shader->stage)); + + /* Dump each instruction. */ + foreach_instr (instr, &shader->instr_list) { + CHECKF(rogue_dump_instr(instr, fp), "Failed to dump instruction."); + fprintf(fp, "\n"); + } + fprintf(fp, "\n"); + + return true; +} diff --git a/src/imagination/rogue/rogue_dump.h b/src/imagination/rogue/rogue_dump.h new file mode 100644 index 00000000000..537f923cfac --- /dev/null +++ b/src/imagination/rogue/rogue_dump.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_DUMP_H +#define ROGUE_DUMP_H + +#include +#include + +#include "rogue_instr.h" +#include "rogue_operand.h" +#include "rogue_shader.h" +#include "util/macros.h" + +PUBLIC +bool rogue_dump_operand(const struct rogue_operand *operand, FILE *fp); + +PUBLIC +bool rogue_dump_instr(const struct rogue_instr *instr, FILE *fp); + +PUBLIC +bool rogue_dump_shader(const struct rogue_shader *shader, FILE *fp); + +#endif /* ROGUE_DUMP_H */ diff --git a/src/imagination/rogue/rogue_encode.c b/src/imagination/rogue/rogue_encode.c new file mode 100644 index 00000000000..4550ddaebae --- /dev/null +++ b/src/imagination/rogue/rogue_encode.c @@ -0,0 +1,851 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "hwdef/rogue_hw_defs.h" +#include "rogue_encode.h" +#include "rogue_encoders.h" +#include "rogue_operand.h" +#include "rogue_shader.h" +#include "rogue_util.h" +#include "util/bitscan.h" +#include "util/macros.h" + +static size_t rogue_encode_reg_bank(const struct rogue_operand *operand) +{ + switch (operand->type) { + case ROGUE_OPERAND_TYPE_REG_INTERNAL: + case ROGUE_OPERAND_TYPE_REG_PIXEL_OUT: + case ROGUE_OPERAND_TYPE_REG_CONST: + return 0; + case ROGUE_OPERAND_TYPE_REG_TEMP: + return 1; + case ROGUE_OPERAND_TYPE_REG_VERTEX_IN: + return 2; + case ROGUE_OPERAND_TYPE_REG_COEFF: + return 3; + case ROGUE_OPERAND_TYPE_REG_SHARED: + return 4; + default: + break; + } + + unreachable("Unimplemented register bank."); +} + +/** + * \brief Field mapping type. + */ +enum rogue_map_type { + ROGUE_MAP_TYPE_INSTR_FLAG = 0, + ROGUE_MAP_TYPE_OPERAND_FLAG, + ROGUE_MAP_TYPE_OPERAND, + + ROGUE_MAP_TYPE_COUNT, +}; + +/** + * \brief Field mapping rule description. + */ +struct rogue_field_mapping { + /* Type of mapping being performed. */ + enum rogue_map_type type; + + /* Index of the source operand/flag being mapped. */ + size_t index; + + /* List of ranges to perform mapping. */ + struct rogue_rangelist rangelist; + + /* Function used to encode the input into the value to be mapped. */ + field_encoder_t encoder_fn; +}; + +/** + * \brief Instruction encoding rule description. + */ +struct rogue_instr_encoding { + /* Number of bytes making up the base mask. */ + size_t num_bytes; + /* Base mask bytes. */ + uint8_t *bytes; + + /* Number of field mappings for this instruction. */ + size_t num_mappings; + /* Field mappings. */ + struct rogue_field_mapping *mappings; +}; + +static const +struct rogue_instr_encoding instr_encodings[ROGUE_OP_COUNT] = { + [ROGUE_OP_NOP] = { + .num_bytes = 8, + .bytes = (uint8_t []) { 0x04, 0x80, 0x6e, 0x00, 0xf2, 0xff, 0xff, 0xff }, + }, + + [ROGUE_OP_END_FRAG] = { + .num_bytes = 8, + .bytes = (uint8_t []) { 0x04, 0x80, 0xee, 0x00, 0xf2, 0xff, 0xff, 0xff }, + }, + + [ROGUE_OP_END_VERT] = { + .num_bytes = 8, + .bytes = (uint8_t []) { 0x44, 0xa0, 0x80, 0x05, 0x00, 0x00, 0x00, 0xff }, + }, + + [ROGUE_OP_WDF] = { + .num_bytes = 8, + .bytes = (uint8_t []) { 0x04, 0x80, 0x6a, 0xff, 0xf2, 0xff, 0xff, 0xff }, + .num_mappings = 1, + .mappings = (struct rogue_field_mapping []) { + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 0, + .rangelist = { + .num_ranges = 1, + .ranges = (struct rogue_bitrange []) { + { .start = 47, .num = 1, }, + }, + }, + .encoder_fn = &rogue_encoder_drc, + }, + }, + }, + + [ROGUE_OP_PIX_ITER_W] = { + .num_bytes = 16, + .bytes = (uint8_t []) { 0x48, 0x20, 0xb0, 0x01, 0x80, 0x40, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0xff, 0xf1, 0xff }, + .num_mappings = 6, + .mappings = (struct rogue_field_mapping []) { + /* Instruction flag mappings. */ + { + .type = ROGUE_MAP_TYPE_INSTR_FLAG, + .index = ROGUE_INSTR_FLAG_SAT, + .rangelist = { + .num_ranges = 1, + .ranges = (struct rogue_bitrange []) { + { .start = 100, .num = 1, }, + }, + }, + .encoder_fn = NULL, + }, + /* Operand mappings. */ + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 0, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 43, .num = 2, }, /* SB3(2..1) */ + { .start = 54, .num = 1, }, /* SB3(0) */ + { .start = 34, .num = 3, }, /* S3(10..8) */ + { .start = 41, .num = 2, }, /* S3(7..6) */ + { .start = 53, .num = 6, }, /* S3(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 1, + .rangelist = { + .num_ranges = 1, + .ranges = (struct rogue_bitrange []) { + { .start = 59, .num = 1, }, + }, + }, + .encoder_fn = &rogue_encoder_drc, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 2, + .rangelist = { + .num_ranges = 6, + .ranges = (struct rogue_bitrange []) { + { .start = 59, .num = 1, }, /* SB0(2) */ + { .start = 76, .num = 1, }, /* SB0(1) */ + { .start = 94, .num = 1, }, /* SB0(0) */ + { .start = 57, .num = 1, }, /* S0(7) */ + { .start = 74, .num = 1, }, /* S0(6) */ + { .start = 93, .num = 6, }, /* S0(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_8, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 3, + .rangelist = { + .num_ranges = 4, + .ranges = (struct rogue_bitrange []) { + { .start = 63, .num = 1, }, /* SB2(2) */ + { .start = 71, .num = 2, }, /* SB2(1..0) */ + { .start = 62, .num = 2, }, /* S2(7..6) */ + { .start = 69, .num = 6, }, /* S2(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_8, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 4, + .rangelist = { + .num_ranges = 1, + .ranges = (struct rogue_bitrange []) { + { .start = 99, .num = 4, }, + }, + }, + .encoder_fn = &rogue_encoder_ls_1_16, + }, + }, + }, + + [ROGUE_OP_MAX] = { + .num_bytes = 16, + .bytes = (uint8_t []) { 0x68, 0x42, 0xd0, 0x3c, 0xfa, 0x10, 0x87, 0x80, 0xc0, 0x80, 0x10, 0x00, 0x32, 0x80, 0x00, 0xff }, + .num_mappings = 3, + .mappings = (struct rogue_field_mapping []) { + /* Operand mappings. */ + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 0, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 11, .num = 2, }, /* DBn(2..1) */ + { .start = 22, .num = 1, }, /* DBn(0) */ + { .start = 14, .num = 3, }, /* Dn(10..8) */ + { .start = 9, .num = 2, }, /* Dn(7..6) */ + { .start = 21, .num = 6, }, /* Dn(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 1, + .rangelist = { + .num_ranges = 7, + .ranges = (struct rogue_bitrange []) { + { .start = 43, .num = 1, }, /* SB0(2) */ + { .start = 52, .num = 1, }, /* SB0(1) */ + { .start = 70, .num = 1, }, /* SB0(0) */ + { .start = 47, .num = 3, }, /* S0(10..8) */ + { .start = 41, .num = 1, }, /* S0(7) */ + { .start = 50, .num = 1, }, /* S0(6) */ + { .start = 69, .num = 6, }, /* S0(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 2, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 51, .num = 1, }, /* SB1(1) */ + { .start = 61, .num = 1, }, /* SB1(0) */ + { .start = 40, .num = 1, }, /* S1(7) */ + { .start = 49, .num = 2, }, /* S1(6..5) */ + { .start = 60, .num = 5, }, /* S1(4..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_2_8, + }, + }, + }, + + [ROGUE_OP_MIN] = { + .num_bytes = 16, + .bytes = (uint8_t []) { 0x68, 0x42, 0xd0, 0x3c, 0xf0, 0x11, 0x87, 0x80, 0xc0, 0x80, 0x10, 0x00, 0x32, 0x80, 0x00, 0xff }, + .num_mappings = 3, + .mappings = (struct rogue_field_mapping []) { + /* Operand mappings. */ + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 0, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 11, .num = 2, }, /* DBn(2..1) */ + { .start = 22, .num = 1, }, /* DBn(0) */ + { .start = 14, .num = 3, }, /* Dn(10..8) */ + { .start = 9, .num = 2, }, /* Dn(7..6) */ + { .start = 21, .num = 6, }, /* Dn(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 1, + .rangelist = { + .num_ranges = 7, + .ranges = (struct rogue_bitrange []) { + { .start = 43, .num = 1, }, /* SB0(2) */ + { .start = 52, .num = 1, }, /* SB0(1) */ + { .start = 70, .num = 1, }, /* SB0(0) */ + { .start = 47, .num = 3, }, /* S0(10..8) */ + { .start = 41, .num = 1, }, /* S0(7) */ + { .start = 50, .num = 1, }, /* S0(6) */ + { .start = 69, .num = 6, }, /* S0(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 2, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 51, .num = 1, }, /* SB1(1) */ + { .start = 61, .num = 1, }, /* SB1(0) */ + { .start = 40, .num = 1, }, /* S1(7) */ + { .start = 49, .num = 2, }, /* S1(6..5) */ + { .start = 60, .num = 5, }, /* S1(4..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_2_8, + }, + }, + }, + + [ROGUE_OP_PACK_U8888] = { + .num_bytes = 16, + .bytes = (uint8_t []) { 0x58, 0x92, 0x06, 0x9c, 0x20, 0x80, 0x00, 0x00, 0x00, 0x2c, 0x80, 0x00, 0xf2, 0xff, 0xff, 0xff }, + .num_mappings = 2, + .mappings = (struct rogue_field_mapping []) { + /* Operand mappings. */ + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 0, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 35, .num = 2, }, /* DBn(2..1) */ + { .start = 46, .num = 1, }, /* DBn(0) */ + { .start = 38, .num = 3, }, /* Dn(10..8) */ + { .start = 33, .num = 2, }, /* Dn(7..6) */ + { .start = 45, .num = 6, }, /* Dn(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 1, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 75, .num = 2, }, /* SB0(2..1) */ + { .start = 86, .num = 1, }, /* SB0(0) */ + { .start = 66, .num = 3, }, /* S0(10..8) */ + { .start = 73, .num = 2, }, /* S0(7..6) */ + { .start = 85, .num = 6, }, /* S0(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + }, + }, + + [ROGUE_OP_MOV] = { + .num_bytes = 16, + .bytes = (uint8_t []) { 0x48, 0x42, 0xd0, 0x3f, 0x87, 0x80, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0xf2, 0xff, 0xff, 0xff }, + .num_mappings = 3, + .mappings = (struct rogue_field_mapping []) { + /* Instruction flag mappings. */ + { + .type = ROGUE_MAP_TYPE_INSTR_FLAG, + .index = ROGUE_INSTR_FLAG_OLCHK, + .rangelist = { + .num_ranges = 1, + .ranges = (struct rogue_bitrange []) { + { .start = 115, .num = 1, }, + }, + }, + .encoder_fn = NULL, + }, + /* Operand mappings. */ + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 0, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 35, .num = 2, }, /* DBn(2..1) */ + { .start = 46, .num = 1, }, /* DBn(0) */ + { .start = 38, .num = 3, }, /* Dn(10..8) */ + { .start = 33, .num = 2, }, /* Dn(7..6) */ + { .start = 45, .num = 6, }, /* Dn(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 1, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 75, .num = 2, }, /* SB0(2..1) */ + { .start = 86, .num = 1, }, /* SB0(0) */ + { .start = 66, .num = 3, }, /* S0(10..8) */ + { .start = 73, .num = 2, }, /* S0(7..6) */ + { .start = 85, .num = 6, }, /* S0(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + }, + }, + + [ROGUE_OP_MOV_IMM] = { + .num_bytes = 16, + .bytes = (uint8_t []) { 0x88, 0x92, 0x40, 0x91, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0xf2, 0xff, 0xff, 0xff }, + .num_mappings = 2, + .mappings = (struct rogue_field_mapping []) { + /* Operand mappings. */ + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 0, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 35, .num = 2, }, /* DBn(2..1) */ + { .start = 46, .num = 1, }, /* DBn(0) */ + { .start = 38, .num = 3, }, /* Dn(10..8) */ + { .start = 33, .num = 2, }, /* Dn(7..6) */ + { .start = 45, .num = 6, }, /* Dn(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 1, + .rangelist = { + .num_ranges = 4, + .ranges = (struct rogue_bitrange []) { + { .start = 71, .num = 8, }, /* imm(31:24) */ + { .start = 79, .num = 8, }, /* imm(23:16) */ + { .start = 87, .num = 8, }, /* imm(15:8) */ + { .start = 95, .num = 8, }, /* imm(7:0) */ + }, + }, + .encoder_fn = &rogue_encoder_imm, + }, + }, + }, + + [ROGUE_OP_FMA] = { + .num_bytes = 16, + .bytes = (uint8_t []) { 0x28, 0x02, 0xd0, 0x00, 0x80, 0x40, 0x80, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0xff, 0xf1, 0xff }, + .num_mappings = 6, + .mappings = (struct rogue_field_mapping []) { + /* Instruction flag mappings. */ + { + .type = ROGUE_MAP_TYPE_INSTR_FLAG, + .index = ROGUE_INSTR_FLAG_SAT, + .rangelist = { + .num_ranges = 1, + .ranges = (struct rogue_bitrange []) { + { .start = 104, .num = 1, }, + }, + }, + .encoder_fn = NULL, + }, + { + .type = ROGUE_MAP_TYPE_INSTR_FLAG, + .index = ROGUE_INSTR_FLAG_LP, + .rangelist = { + .num_ranges = 1, + .ranges = (struct rogue_bitrange []) { + { .start = 100, .num = 1, }, + }, + }, + .encoder_fn = NULL, + }, + /* Operand mappings. */ + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 0, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 27, .num = 2, }, /* DBn(2..1) */ + { .start = 38, .num = 1, }, /* DBn(0) */ + { .start = 30, .num = 3, }, /* Dn(10..8) */ + { .start = 25, .num = 2, }, /* Dn(7..6) */ + { .start = 37, .num = 6, }, /* Dn(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 1, + .rangelist = { + .num_ranges = 6, + .ranges = (struct rogue_bitrange []) { + { .start = 59, .num = 1, }, /* SB0(2) */ + { .start = 76, .num = 1, }, /* SB0(1) */ + { .start = 94, .num = 1, }, /* SB0(0) */ + { .start = 57, .num = 1, }, /* S0(7) */ + { .start = 74, .num = 1, }, /* S0(6) */ + { .start = 93, .num = 6, }, /* S0(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_8, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 2, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 75, .num = 1, }, /* SB1(1) */ + { .start = 85, .num = 1, }, /* SB1(0) */ + { .start = 56, .num = 1, }, /* S1(7) */ + { .start = 73, .num = 2, }, /* S1(6..5) */ + { .start = 84, .num = 5, }, /* S1(4..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_2_8, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 3, + .rangelist = { + .num_ranges = 4, + .ranges = (struct rogue_bitrange []) { + { .start = 63, .num = 1, }, /* SB2(2) */ + { .start = 71, .num = 2, }, /* SB2(1..0) */ + { .start = 62, .num = 2, }, /* S2(7..6) */ + { .start = 69, .num = 6, }, /* S2(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_8, + }, + }, + }, + + [ROGUE_OP_MUL] = { + .num_bytes = 16, + .bytes = (uint8_t []) { 0x28, 0x02, 0x40, 0x80, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0xff, 0xf2, 0xff, 0xff, 0xff }, + .num_mappings = 5, + .mappings = (struct rogue_field_mapping []) { + /* Instruction flag mappings. */ + { + .type = ROGUE_MAP_TYPE_INSTR_FLAG, + .index = ROGUE_INSTR_FLAG_SAT, + .rangelist = { + .num_ranges = 1, + .ranges = (struct rogue_bitrange []) { + { .start = 108, .num = 1, }, + }, + }, + .encoder_fn = NULL, + }, + { + .type = ROGUE_MAP_TYPE_INSTR_FLAG, + .index = ROGUE_INSTR_FLAG_LP, + .rangelist = { + .num_ranges = 1, + .ranges = (struct rogue_bitrange []) { + { .start = 109, .num = 1, }, + }, + }, + .encoder_fn = NULL, + }, + /* Operand mappings. */ + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 0, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 43, .num = 2, }, /* DBn(2..1) */ + { .start = 54, .num = 1, }, /* DBn(0) */ + { .start = 46, .num = 3, }, /* Dn(10..8) */ + { .start = 41, .num = 2, }, /* Dn(7..6) */ + { .start = 53, .num = 6, }, /* Dn(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 1, + .rangelist = { + .num_ranges = 7, + .ranges = (struct rogue_bitrange []) { + { .start = 75, .num = 1, }, /* SB0(2) */ + { .start = 84, .num = 1, }, /* SB0(1) */ + { .start = 102, .num = 1, }, /* SB0(0) */ + { .start = 79, .num = 3, }, /* S0(10..8) */ + { .start = 73, .num = 1, }, /* S0(7) */ + { .start = 82, .num = 1, }, /* S0(6) */ + { .start = 101, .num = 6, }, /* S0(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 2, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 83, .num = 1, }, /* SB1(1) */ + { .start = 93, .num = 1, }, /* SB1(0) */ + { .start = 72, .num = 1, }, /* S1(7) */ + { .start = 81, .num = 2, }, /* S1(6..5) */ + { .start = 92, .num = 5, }, /* S1(4..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_2_8, + }, + }, + }, + + [ROGUE_OP_VTXOUT] = { + .num_bytes = 16, + .bytes = (uint8_t []) { 0x48, 0x20, 0x08, 0x00, 0x80, 0x00, 0x00, 0x00, 0x30, 0xff, 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff }, + .num_mappings = 2, + .mappings = (struct rogue_field_mapping []) { + /* Operand mappings. */ + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 0, + .rangelist = { + .num_ranges = 1, + .ranges = (struct rogue_bitrange []) { + { .start = 103, .num = 8, }, /* Immediate address. */ + }, + }, + .encoder_fn = &rogue_encoder_imm, + }, + { + .type = ROGUE_MAP_TYPE_OPERAND, + .index = 1, + .rangelist = { + .num_ranges = 5, + .ranges = (struct rogue_bitrange []) { + { .start = 83, .num = 2, }, /* SB0(2..1) */ + { .start = 94, .num = 1, }, /* SB0(0) */ + { .start = 74, .num = 3, }, /* S0(10..8) */ + { .start = 81, .num = 2, }, /* S0(7..6) */ + { .start = 93, .num = 6, }, /* S0(5..0) */ + }, + }, + .encoder_fn = &rogue_encoder_reg_3_11, + }, + }, + }, +}; + +/** + * \brief Applies a boolean flag encoding onto an instruction mask. + * + * \param[in] set Whether to set/unset the flag. + * \param[in] mapping The field mapping to apply. + * \param[in] instr_size The size of the instruction mask in bytes. + * \param[in] instr_bytes The instruction mask. + * \return true if encoding was successful. + */ +static bool rogue_encode_flag(bool set, + const struct rogue_field_mapping *mapping, + size_t instr_size, + uint8_t instr_bytes[instr_size]) +{ + return rogue_distribute_value((uint64_t)set, + &mapping->rangelist, + instr_size, + instr_bytes); +} + +/** + * \brief Applies an operand encoding onto an instruction mask. + * + * \param[in] operand The operand to apply. + * \param[in] mapping The field mapping to apply. + * \param[in] instr_size The size of the instruction mask in bytes. + * \param[in] instr_bytes The instruction mask. + * \return true if encoding was successful. + */ +static bool rogue_encode_operand(const struct rogue_operand *operand, + const struct rogue_field_mapping *mapping, + size_t instr_size, + uint8_t instr_bytes[instr_size]) +{ + uint64_t value = 0U; + + switch (operand->type) { + case ROGUE_OPERAND_TYPE_REG_PIXEL_OUT: + CHECKF( + mapping->encoder_fn(&value, + 2, + rogue_encode_reg_bank(operand), + operand->reg.number + ROGUE_PIXEL_OUT_REG_OFFSET), + "Failed to encode pixel output register operand."); + break; + case ROGUE_OPERAND_TYPE_REG_INTERNAL: + CHECKF( + mapping->encoder_fn(&value, + 2, + rogue_encode_reg_bank(operand), + operand->reg.number + ROGUE_INTERNAL_REG_OFFSET), + "Failed to encode internal register operand."); + break; + case ROGUE_OPERAND_TYPE_REG_TEMP: + case ROGUE_OPERAND_TYPE_REG_COEFF: + case ROGUE_OPERAND_TYPE_REG_CONST: + case ROGUE_OPERAND_TYPE_REG_SHARED: + case ROGUE_OPERAND_TYPE_REG_VERTEX_IN: + CHECKF(mapping->encoder_fn(&value, + 2, + rogue_encode_reg_bank(operand), + operand->reg.number), + "Failed to encode register operand."); + break; + + case ROGUE_OPERAND_TYPE_IMMEDIATE: + CHECKF(mapping->encoder_fn(&value, 1, operand->immediate.value), + "Failed to encode immediate operand."); + break; + + case ROGUE_OPERAND_TYPE_DRC: + CHECKF(mapping->encoder_fn(&value, 1, (uint64_t)operand->drc.number), + "Failed to encode DRC operand."); + break; + + default: + return false; + } + + CHECKF(rogue_distribute_value(value, + &mapping->rangelist, + instr_size, + instr_bytes), + "Failed to distribute value."); + + return true; +} + +/** + * \brief Applies operand and flag encodings to the base instruction bytes, then + * writes the result to file pointer "fp". + * + * \param[in] instr The instruction to be encoded. + * \param[in] fp The file pointer. + * \return true if encoding was successful. + */ +bool rogue_encode_instr(const struct rogue_instr *instr, FILE *fp) +{ + const struct rogue_instr_encoding *instr_encoding; + size_t instr_size; + uint8_t instr_bytes[ROGUE_MAX_INSTR_BYTES]; + + ASSERT_OPCODE_RANGE(instr->opcode); + + instr_encoding = &instr_encodings[instr->opcode]; + + /* Set up base instruction bytes. */ + instr_size = instr_encoding->num_bytes; + assert(instr_size <= ARRAY_SIZE(instr_bytes)); + memcpy(instr_bytes, instr_encoding->bytes, instr_size); + + /* Encode the operands and flags. */ + for (size_t u = 0U; u < instr_encoding->num_mappings; ++u) { + const struct rogue_field_mapping *mapping = &instr_encoding->mappings[u]; + + switch (mapping->type) { + case ROGUE_MAP_TYPE_INSTR_FLAG: { + uint64_t flag = rogue_onehot(mapping->index); + CHECKF(rogue_encode_flag(!!(instr->flags & flag), + mapping, + instr_size, + instr_bytes), + "Failed to encode instruction flag."); + break; + } + + case ROGUE_MAP_TYPE_OPERAND_FLAG: + return false; + + case ROGUE_MAP_TYPE_OPERAND: { + size_t operand_index = mapping->index; + CHECKF(rogue_encode_operand(&instr->operands[operand_index], + mapping, + instr_size, + instr_bytes), + "Failed to encode instruction operand."); + break; + } + + default: + return false; + } + } + + CHECKF(fwrite(instr_bytes, 1, instr_size, fp) == instr_size, + "Failed to write encoded instruction bytes."); + fflush(fp); + + return true; +} + +/** + * \brief Encodes each instruction in "shader", writing the output to "fp". + * + * \param[in] shader The shader to be encoded. + * \param[in] fp The file pointer. + * \return true if encoding was successful. + */ +bool rogue_encode_shader(const struct rogue_shader *shader, FILE *fp) +{ + long bytes_written; + + /* Encode each instruction. */ + foreach_instr (instr, &shader->instr_list) + CHECKF(rogue_encode_instr(instr, fp), "Failed to encode instruction."); + + /* Pad end of shader if required. */ + bytes_written = ftell(fp); + if (bytes_written <= 0) + return false; + + /* FIXME: Figure out the define for alignment of 16. */ + for (size_t u = 0; u < (bytes_written % 16); ++u) + fputc(0xff, fp); + + return true; +} diff --git a/src/imagination/rogue/rogue_encode.h b/src/imagination/rogue/rogue_encode.h new file mode 100644 index 00000000000..0305e372c08 --- /dev/null +++ b/src/imagination/rogue/rogue_encode.h @@ -0,0 +1,41 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_ENCODE_H +#define ROGUE_ENCODE_H + +#include +#include + +#include "util/macros.h" + +struct rogue_instr; +struct rogue_shader; + +PUBLIC +bool rogue_encode_instr(const struct rogue_instr *instr, FILE *fp); + +PUBLIC +bool rogue_encode_shader(const struct rogue_shader *shader, FILE *fp); + +#endif /* ROGUE_ENCODE_H */ diff --git a/src/imagination/rogue/rogue_encoders.c b/src/imagination/rogue/rogue_encoders.c new file mode 100644 index 00000000000..1da79903a77 --- /dev/null +++ b/src/imagination/rogue/rogue_encoders.c @@ -0,0 +1,164 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "rogue_encoders.h" +#include "rogue_util.h" +#include "util/bitscan.h" + +/** + * \brief Passes the input value through unchanged. + * + * \param[in] value Pointer to the destination value. + * \param[in] inputs Number of inputs provided. + * \param[in] ... Input value(s). + * \return true if encoding was successful. + */ +bool rogue_encoder_pass(uint64_t *value, size_t inputs, ...) +{ + va_list args; + + assert(inputs == 1); + + va_start(args, inputs); + *value = va_arg(args, uint64_t); + va_end(args); + + return true; +} + +/** + * \brief Encoder for DRC values. + * + * \sa #rogue_encoder_pass() + * + * \param[in] value Pointer to the destination value. + * \param[in] inputs Number of inputs provided. + * \param[in] ... Input value(s). + * \return true if encoding was successful. + */ +bool rogue_encoder_drc(uint64_t *value, size_t inputs, ...) + __attribute__((alias("rogue_encoder_pass"))); + +/** + * \brief Encoder for immediate values. + * + * \sa #rogue_encoder_pass() + * + * \param[in] value Pointer to the destination value. + * \param[in] inputs Number of inputs provided. + * \param[in] ... Input value(s). + * \return true if encoding was successful. + */ +bool rogue_encoder_imm(uint64_t *value, size_t inputs, ...) + __attribute__((alias("rogue_encoder_pass"))); + +/** + * \brief Encodes input ranges {1..15 -> 1-15} and {16 -> 0}. + * + * The input should be in the range 1-16; the function represents 1-15 normally + * and represents 16 by 0. + * + * \param[in] value Pointer to the destination value. + * \param[in] inputs Number of inputs provided. + * \param[in] ... Input value(s). + * \return true if encoding was successful. + */ +bool rogue_encoder_ls_1_16(uint64_t *value, size_t inputs, ...) +{ + va_list args; + uint64_t input; + + assert(inputs == 1); + + va_start(args, inputs); + input = va_arg(args, uint64_t); + va_end(args); + + /* Validate the input range. */ + if (!input || input > 16) { + *value = UINT64_MAX; + return false; + } + + *value = input % 16; + + return true; +} + +/** + * \brief Encodes registers according to the number of bits needed to specify + * the bank number and register number. + * + * \param[in] value Pointer to the destination value. + * \param[in] bank_bits The number of bits used to represent the register bank. + * \param[in] bank the register bank + * \param[in] num_bits The number of bits used to represent the register number. + * \param[in] num The register number. + * \return true if encoding was successful. + */ +static bool rogue_encoder_reg(uint64_t *value, + size_t bank_bits, + size_t bank, + size_t num_bits, + size_t num) +{ + /* Verify "num" fits in "num_bits" and "bank" fits in "bank_bits". */ + assert(util_last_bit64(num) <= num_bits); + assert(util_last_bit64(bank) <= bank_bits); + + *value = num; + *value |= (bank << num_bits); + + return true; +} + +/** + * \brief Macro to define the rogue_encoder_reg variants. + */ +#define ROGUE_ENCODER_REG_VARIANT(bank_bits, num_bits) \ + bool rogue_encoder_reg_##bank_bits##_##num_bits(uint64_t *value, \ + size_t inputs, \ + ...) \ + { \ + va_list args; \ + size_t bank; \ + size_t num; \ + assert(inputs == 2); \ + va_start(args, inputs); \ + bank = va_arg(args, size_t); \ + num = va_arg(args, size_t); \ + va_end(args); \ + return rogue_encoder_reg(value, bank_bits, bank, num_bits, num); \ + } + +ROGUE_ENCODER_REG_VARIANT(2, 8) +ROGUE_ENCODER_REG_VARIANT(3, 8) +ROGUE_ENCODER_REG_VARIANT(3, 11) + +#undef ROGUE_ENCODER_REG_VARIANT diff --git a/src/imagination/rogue/rogue_encoders.h b/src/imagination/rogue/rogue_encoders.h new file mode 100644 index 00000000000..ea5c94d6ec3 --- /dev/null +++ b/src/imagination/rogue/rogue_encoders.h @@ -0,0 +1,53 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_ENCODERS_H +#define ROGUE_ENCODERS_H + +#include +#include +#include + +#include "util/macros.h" + +/* Returns false if input was invalid. */ +typedef bool (*field_encoder_t)(uint64_t *value, size_t inputs, ...); + +bool rogue_encoder_pass(uint64_t *value, size_t inputs, ...); +bool rogue_encoder_drc(uint64_t *value, size_t inputs, ...); +bool rogue_encoder_imm(uint64_t *value, size_t inputs, ...); +bool rogue_encoder_ls_1_16(uint64_t *value, size_t inputs, ...); + +/** + * \brief Macro to declare the rogue_encoder_reg variants. + */ +#define ROGUE_ENCODER_REG_VARIANT(bank_bits, num_bits) \ + bool rogue_encoder_reg_##bank_bits##_##num_bits(uint64_t *value, \ + size_t inputs, \ + ...); +ROGUE_ENCODER_REG_VARIANT(2, 8) +ROGUE_ENCODER_REG_VARIANT(3, 8) +ROGUE_ENCODER_REG_VARIANT(3, 11) +#undef ROGUE_ENCODER_REG_VARIANT + +#endif /* ROGUE_ENCODERS_H */ diff --git a/src/imagination/rogue/rogue_instr.c b/src/imagination/rogue/rogue_instr.c new file mode 100644 index 00000000000..a6988021f15 --- /dev/null +++ b/src/imagination/rogue/rogue_instr.c @@ -0,0 +1,227 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "rogue_instr.h" +#include "rogue_operand.h" +#include "rogue_util.h" +#include "util/ralloc.h" + +/** + * \file rogue_instr.c + * + * \brief Contains functions to manipulate Rogue instructions. + */ + +/* clang-format off */ + +static const size_t instr_operand_count[ROGUE_OP_COUNT] = { + [ROGUE_OP_NOP] = 0, + [ROGUE_OP_END_FRAG] = 0, + [ROGUE_OP_END_VERT] = 0, + [ROGUE_OP_WDF] = 1, + [ROGUE_OP_PIX_ITER_W] = 5, + [ROGUE_OP_MAX] = 3, + [ROGUE_OP_MIN] = 3, + [ROGUE_OP_PACK_U8888] = 2, + [ROGUE_OP_MOV] = 2, + [ROGUE_OP_MOV_IMM] = 2, + [ROGUE_OP_FMA] = 4, + [ROGUE_OP_MUL] = 3, + [ROGUE_OP_VTXOUT] = 2, +}; + +/* clang-format on */ + +/** + * \brief Returns the number of operands an instruction takes. + * + * \param[in] opcode The instruction opcode. + * \return The number of operands. + */ +static inline size_t rogue_instr_num_operands(enum rogue_opcode opcode) +{ + ASSERT_OPCODE_RANGE(opcode); + + return instr_operand_count[opcode]; +} + +/** + * \brief Allocates and sets up a Rogue instruction. + * + * \param[in] mem_ctx The memory context for the instruction. + * \param[in] opcode The instruction opcode. + * \return A rogue_instr* if successful, or NULL if unsuccessful. + */ +struct rogue_instr *rogue_instr_create(void *mem_ctx, enum rogue_opcode opcode) +{ + struct rogue_instr *instr; + + ASSERT_OPCODE_RANGE(opcode); + + instr = rzalloc_size(mem_ctx, sizeof(*instr)); + if (!instr) + return NULL; + + instr->opcode = opcode; + instr->num_operands = rogue_instr_num_operands(opcode); + + /* Allocate space for operand array. */ + if (instr->num_operands) { + instr->operands = rzalloc_array_size(instr, + sizeof(*instr->operands), + instr->num_operands); + if (!instr->operands) { + ralloc_free(instr); + return NULL; + } + } + + return instr; +} + +/** + * \brief Sets a Rogue instruction flag. + * + * \param[in] instr The instruction. + * \param[in] flag The flag to set. + * \return true if valid, otherwise false. + */ +bool rogue_instr_set_flag(struct rogue_instr *instr, enum rogue_instr_flag flag) +{ + instr->flags = ROH(flag); + + return true; +} + +/** + * \brief Sets a Rogue instruction operand to an immediate value. + * + * \param[in] instr The instruction. + * \param[in] index The operand index. + * \param[in] value The value to set. + * \return true if valid, otherwise false. + */ +bool rogue_instr_set_operand_imm(struct rogue_instr *instr, + size_t index, + uint64_t value) +{ + ASSERT_INSTR_OPERAND_INDEX(instr, index); + + instr->operands[index].type = ROGUE_OPERAND_TYPE_IMMEDIATE; + instr->operands[index].immediate.value = value; + + return true; +} + +/** + * \brief Sets a Rogue instruction operand to a DRC number. + * + * \param[in] instr The instruction. + * \param[in] index The operand index. + * \param[in] number The DRC number to set. + * \return true if valid, otherwise false. + */ +bool rogue_instr_set_operand_drc(struct rogue_instr *instr, + size_t index, + size_t number) +{ + ASSERT_INSTR_OPERAND_INDEX(instr, index); + + instr->operands[index].type = ROGUE_OPERAND_TYPE_DRC; + instr->operands[index].drc.number = number; + + return true; +} + +/** + * \brief Sets a Rogue instruction operand to a register. + * + * \param[in] instr The instruction. + * \param[in] index The operand index. + * \param[in] type The register type to set. + * \param[in] number The register number to set. + * \return true if valid, otherwise false. + */ +bool rogue_instr_set_operand_reg(struct rogue_instr *instr, + size_t index, + enum rogue_operand_type type, + size_t number) +{ + ASSERT_INSTR_OPERAND_INDEX(instr, index); + ASSERT_OPERAND_REG(type); + + instr->operands[index].type = type; + instr->operands[index].reg.number = number; + + return true; +} + +/** + * \brief Sets a Rogue instruction operand to a virtual register. + * + * \param[in] instr The instruction. + * \param[in] index The operand index. + * \param[in] number The register number to set. + * \return true if valid, otherwise false. + */ +bool rogue_instr_set_operand_vreg(struct rogue_instr *instr, + size_t index, + size_t number) +{ + ASSERT_INSTR_OPERAND_INDEX(instr, index); + + instr->operands[index].type = ROGUE_OPERAND_TYPE_VREG; + instr->operands[index].vreg.number = number; + instr->operands[index].vreg.is_vector = false; + + return true; +} + +/** + * \brief Sets a Rogue instruction operand to a virtual register + * that is a vector type. + * + * \param[in] instr The instruction. + * \param[in] index The operand index. + * \param[in] component The vector component. + * \param[in] number The register number to set. + * \return true if valid, otherwise false. + */ +bool rogue_instr_set_operand_vreg_vec(struct rogue_instr *instr, + size_t index, + size_t component, + size_t number) +{ + ASSERT_INSTR_OPERAND_INDEX(instr, index); + + instr->operands[index].type = ROGUE_OPERAND_TYPE_VREG; + instr->operands[index].vreg.number = number; + instr->operands[index].vreg.is_vector = true; + instr->operands[index].vreg.component = component; + + return true; +} diff --git a/src/imagination/rogue/rogue_instr.h b/src/imagination/rogue/rogue_instr.h new file mode 100644 index 00000000000..5b6efa5dde6 --- /dev/null +++ b/src/imagination/rogue/rogue_instr.h @@ -0,0 +1,113 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_INSTR_H +#define ROGUE_INSTR_H + +#include +#include +#include + +#include "rogue_operand.h" +#include "util/list.h" + +/** + * \brief Instruction opcodes. + */ +enum rogue_opcode { + ROGUE_OP_NOP = 0, /** No-operation. */ + ROGUE_OP_END_FRAG, /** Fragment shader end. */ + ROGUE_OP_END_VERT, /** Vertex shader end. */ + ROGUE_OP_WDF, /** Write data fence. */ + + ROGUE_OP_PIX_ITER_W, /** Pixel iteration with coefficients. */ + + ROGUE_OP_MAX, /** Returns the largest out of two floats. */ + ROGUE_OP_MIN, /** Returns the smallest out of two floats. */ + + ROGUE_OP_PACK_U8888, /** Scales the four input floats: + * [0.0f, 0.1f] -> [0, 255] and packs them + * into a 32-bit unsigned integer. + */ + + ROGUE_OP_MOV, /** Register move instruction. */ + ROGUE_OP_MOV_IMM, /** Move immediate instruction. */ + + ROGUE_OP_FMA, /** Fused-multiply-add (float). */ + ROGUE_OP_MUL, /** Multiply (float). */ + + ROGUE_OP_VTXOUT, /** Writes the input register + * to the given vertex output index. + */ + + ROGUE_OP_COUNT, +}; + +/** + * \brief Instruction flags. + */ +enum rogue_instr_flag { + ROGUE_INSTR_FLAG_SAT = 0, /** Saturate values to 0.0 ... 1.0. */ + ROGUE_INSTR_FLAG_LP, /** Low-precision modifier. */ + ROGUE_INSTR_FLAG_OLCHK, /** Overlap check (pixel write). */ + + ROGUE_INSTR_FLAG_COUNT, +}; + +/** + * \brief Instruction description. + */ +struct rogue_instr { + enum rogue_opcode opcode; + + size_t num_operands; + struct rogue_operand *operands; + + uint64_t flags; /** A mask of #rogue_instr_flag values. */ + + struct list_head node; /** Linked list node. */ +}; + +struct rogue_instr *rogue_instr_create(void *mem_ctx, enum rogue_opcode opcode); + +bool rogue_instr_set_flag(struct rogue_instr *instr, + enum rogue_instr_flag flag); + +bool rogue_instr_set_operand_imm(struct rogue_instr *instr, + size_t index, + uint64_t value); +bool rogue_instr_set_operand_drc(struct rogue_instr *instr, + size_t index, + size_t number); +bool rogue_instr_set_operand_reg(struct rogue_instr *instr, + size_t index, + enum rogue_operand_type type, + size_t number); +bool rogue_instr_set_operand_vreg(struct rogue_instr *instr, + size_t index, + size_t number); +bool rogue_instr_set_operand_vreg_vec(struct rogue_instr *instr, + size_t index, + size_t component, + size_t number); +#endif /* ROGUE_INSTR_H */ diff --git a/src/imagination/rogue/rogue_nir.c b/src/imagination/rogue/rogue_nir.c new file mode 100644 index 00000000000..3fe90b47abd --- /dev/null +++ b/src/imagination/rogue/rogue_nir.c @@ -0,0 +1,191 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "compiler/spirv/nir_spirv.h" +#include "nir/nir.h" +#include "nir/nir_schedule.h" +#include "rogue_nir.h" +#include "rogue_operand.h" + +/** + * \file rogue_nir.c + * + * \brief Contains NIR-specific functions. + */ + +/** + * \brief SPIR-V to NIR compilation options. + */ +static const struct spirv_to_nir_options spirv_options = { + .environment = NIR_SPIRV_VULKAN, + + /* Buffer address: (descriptor_set, binding), offset. */ + .ubo_addr_format = nir_address_format_vec2_index_32bit_offset, +}; + +static const nir_shader_compiler_options nir_options = { + .lower_fsat = true, + .fuse_ffma32 = true, +}; + +const struct spirv_to_nir_options * +rogue_get_spirv_options(const struct rogue_compiler *compiler) +{ + return &spirv_options; +} + +const nir_shader_compiler_options * +rogue_get_compiler_options(const struct rogue_compiler *compiler) +{ + return &nir_options; +} + +static int rogue_glsl_type_size(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +/** + * \brief Applies optimizations and passes required to lower the NIR shader into + * a form suitable for lowering to Rogue IR. + * + * \param[in] ctx Shared multi-stage build context. + * \param[in] shader Rogue shader. + * \param[in] stage Shader stage. + * \return true if successful, otherwise false. + */ +bool rogue_nir_passes(struct rogue_build_ctx *ctx, + nir_shader *nir, + gl_shader_stage stage) +{ + bool progress; + + nir_validate_shader(nir, "after spirv_to_nir"); + + /* Splitting. */ + NIR_PASS_V(nir, nir_split_var_copies); + NIR_PASS_V(nir, nir_split_per_member_structs); + + /* Ensure fs outputs are in the [0.0f...1.0f] range. */ + NIR_PASS_V(nir, nir_lower_clamp_color_outputs); + + /* Replace references to I/O variables with intrinsics. */ + NIR_PASS_V(nir, + nir_lower_io, + nir_var_shader_in | nir_var_shader_out, + rogue_glsl_type_size, + (nir_lower_io_options)0); + + /* Load inputs to scalars (single registers later). */ + NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in); + + /* Optimize GL access qualifiers. */ + const nir_opt_access_options opt_access_options = { + .is_vulkan = true, + .infer_non_readable = true, + }; + NIR_PASS_V(nir, nir_opt_access, &opt_access_options); + + /* Apply PFO code to the fragment shader output. */ + if (nir->info.stage == MESA_SHADER_FRAGMENT) + NIR_PASS_V(nir, rogue_nir_pfo); + + /* Load outputs to scalars (single registers later). */ + NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out); + + /* Lower ALU operations to scalars. */ + NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); + + /* Algebraic opts. */ + do { + progress = false; + + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_cse); + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_opt_constant_folding); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS_V(nir, nir_opt_gcm, false); + } while (progress); + + /* Additional I/O lowering. */ + NIR_PASS_V(nir, + nir_lower_explicit_io, + nir_var_mem_ubo, + spirv_options.ubo_addr_format); + NIR_PASS_V(nir, rogue_nir_lower_io, NULL); + + /* Late algebraic opts. */ + do { + progress = false; + + NIR_PASS(progress, nir, nir_opt_algebraic_late); + NIR_PASS_V(nir, nir_opt_constant_folding); + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_dce); + NIR_PASS_V(nir, nir_opt_cse); + } while (progress); + + /* Replace SSA constant references with a register that loads the value. */ + NIR_PASS_V(nir, rogue_nir_constreg); + /* Remove unused constant registers. */ + NIR_PASS_V(nir, nir_opt_dce); + + /* Move loads to just before they're needed. */ + NIR_PASS_V(nir, nir_opt_move, nir_move_load_ubo | nir_move_load_input); + + /* Convert vecNs to movs so we can sequentially allocate them later. */ + NIR_PASS_V(nir, nir_lower_vec_to_movs, NULL, NULL); + + /* Out of SSA pass. */ + NIR_PASS_V(nir, nir_convert_from_ssa, false); + + /* TODO: Re-enable scheduling after register pressure tweaks. */ +#if 0 + /* Instruction scheduling. */ + struct nir_schedule_options schedule_options = { + .threshold = ROGUE_MAX_REG_TEMP / 2, + }; + NIR_PASS_V(nir, nir_schedule, &schedule_options); +#endif + + /* Assign I/O locations. */ + nir_assign_io_var_locations(nir, + nir_var_shader_in, + &nir->num_inputs, + nir->info.stage); + nir_assign_io_var_locations(nir, + nir_var_shader_out, + &nir->num_outputs, + nir->info.stage); + + /* Gather info into nir shader struct. */ + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + /* Clean-up after passes. */ + nir_sweep(nir); + + nir_validate_shader(nir, "after passes"); + + return true; +} diff --git a/src/imagination/rogue/rogue_nir.h b/src/imagination/rogue/rogue_nir.h new file mode 100644 index 00000000000..580e5bf3d80 --- /dev/null +++ b/src/imagination/rogue/rogue_nir.h @@ -0,0 +1,52 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_NIR_H +#define ROGUE_NIR_H + +#include "compiler/shader_enums.h" +#include "nir/nir.h" +#include "util/macros.h" + +struct rogue_build_ctx; +struct rogue_compiler; +struct spirv_to_nir_options; + +PUBLIC +const struct spirv_to_nir_options * +rogue_get_spirv_options(const struct rogue_compiler *compiler); + +PUBLIC +const nir_shader_compiler_options * +rogue_get_compiler_options(const struct rogue_compiler *compiler); + +bool rogue_nir_passes(struct rogue_build_ctx *ctx, + nir_shader *nir, + gl_shader_stage stage); + +/* Custom passes. */ +void rogue_nir_pfo(nir_shader *shader); +void rogue_nir_constreg(nir_shader *shader); +bool rogue_nir_lower_io(nir_shader *shader, void *layout); + +#endif /* ROGUE_NIR_H */ diff --git a/src/imagination/rogue/rogue_nir_helpers.h b/src/imagination/rogue/rogue_nir_helpers.h new file mode 100644 index 00000000000..85fc6db6f52 --- /dev/null +++ b/src/imagination/rogue/rogue_nir_helpers.h @@ -0,0 +1,149 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_NIR_HELPERS_H +#define ROGUE_NIR_HELPERS_H + +#include +#include +#include + +#include "nir/nir.h" +#include "util/bitscan.h" + +/** + * \file rogue_nir.c + * + * \brief Contains various NIR helper functions. + */ + +static inline unsigned nir_alu_dest_regindex(const nir_alu_instr *alu) +{ + assert(!alu->dest.dest.is_ssa); + + return alu->dest.dest.reg.reg->index; +} + +static inline unsigned nir_alu_dest_comp(const nir_alu_instr *alu) +{ + assert(!alu->dest.dest.is_ssa); + assert(util_is_power_of_two_nonzero(alu->dest.write_mask)); + + return ffs(alu->dest.write_mask) - 1; +} + +static inline unsigned nir_alu_src_regindex(const nir_alu_instr *alu, + size_t src) +{ + assert(src < nir_op_infos[alu->op].num_inputs); + assert(!alu->src[src].src.is_ssa); + + return alu->src[src].src.reg.reg->index; +} + +static inline uint32_t nir_alu_src_const(const nir_alu_instr *alu, size_t src) +{ + assert(src < nir_op_infos[alu->op].num_inputs); + assert(alu->src[src].src.is_ssa); + + nir_const_value *const_value = nir_src_as_const_value(alu->src[src].src); + + return nir_const_value_as_uint(*const_value, 32); +} + +static inline bool nir_alu_src_is_const(const nir_alu_instr *alu, size_t src) +{ + assert(src < nir_op_infos[alu->op].num_inputs); + + if (!alu->src[src].src.is_ssa) + return false; + + assert(alu->src[src].src.ssa->parent_instr); + + return (alu->src[src].src.ssa->parent_instr->type == + nir_instr_type_load_const); +} + +static inline unsigned nir_intr_dest_regindex(const nir_intrinsic_instr *intr) +{ + assert(!intr->dest.is_ssa); + + return intr->dest.reg.reg->index; +} + +static inline unsigned nir_intr_src_regindex(const nir_intrinsic_instr *intr, + size_t src) +{ + assert(src < nir_intrinsic_infos[intr->intrinsic].num_srcs); + assert(!intr->src[src].is_ssa); + + return intr->src[src].reg.reg->index; +} + +static inline uint32_t nir_intr_src_const(const nir_intrinsic_instr *intr, + size_t src) +{ + assert(src < nir_intrinsic_infos[intr->intrinsic].num_srcs); + assert(intr->src[src].is_ssa); + + nir_const_value *const_value = nir_src_as_const_value(intr->src[src]); + + return nir_const_value_as_uint(*const_value, 32); +} + +static inline uint32_t nir_intr_src_comp_const(const nir_intrinsic_instr *intr, + size_t src, + size_t comp) +{ + assert(src < nir_intrinsic_infos[intr->intrinsic].num_srcs); + assert(intr->src[src].is_ssa); + assert(comp < nir_src_num_components(intr->src[src])); + + return nir_src_comp_as_uint(intr->src[src], comp); +} + +static inline bool nir_intr_src_is_const(const nir_intrinsic_instr *intr, + size_t src) +{ + assert(src < nir_intrinsic_infos[intr->intrinsic].num_srcs); + + if (!intr->src[src].is_ssa) + return false; + + assert(intr->src[src].ssa->parent_instr); + + return (intr->src[src].ssa->parent_instr->type == nir_instr_type_load_const); +} + +static inline size_t nir_count_variables_with_modes(const nir_shader *nir, + nir_variable_mode mode) +{ + size_t count = 0; + + nir_foreach_variable_with_modes (var, nir, mode) + ++count; + + return count; +} + +#endif /* ROGUE_NIR_HELPERS_H */ diff --git a/src/imagination/rogue/rogue_operand.c b/src/imagination/rogue/rogue_operand.c new file mode 100644 index 00000000000..a0e86f9f8d6 --- /dev/null +++ b/src/imagination/rogue/rogue_operand.c @@ -0,0 +1,30 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rogue_operand.h" + +/** + * \file rogue_operand.c + * + * \brief Contains functions to manipulate Rogue instruction operands. + */ diff --git a/src/imagination/rogue/rogue_operand.h b/src/imagination/rogue/rogue_operand.h new file mode 100644 index 00000000000..753f09053d7 --- /dev/null +++ b/src/imagination/rogue/rogue_operand.h @@ -0,0 +1,158 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_OPERAND_H +#define ROGUE_OPERAND_H + +#include +#include + +#include "rogue_util.h" +#include "util/macros.h" + +/* Register-related defines. */ + +/* Total max number of registers per class + * (instances > ROGUE_MAX_REG_INDEX addressable via indexing only). + */ +#define ROGUE_MAX_REG_TEMP 248 +#define ROGUE_MAX_REG_COEFF 4096 +#define ROGUE_MAX_REG_CONST 240 +#define ROGUE_MAX_REG_SHARED 4096 +#define ROGUE_MAX_REG_PIXEL_OUT 8 +#define ROGUE_MAX_REG_VERTEX_IN 248 +#define ROGUE_MAX_REG_INTERNAL 8 + +/* Maximum register index via offset encoding. */ +#define ROGUE_MAX_REG_INDEX 256 + +/* Pixel-out register offset. */ +#define ROGUE_PIXEL_OUT_REG_OFFSET 32 + +/* Internal register offset. */ +#define ROGUE_INTERNAL_REG_OFFSET 36 + +/* Coefficient registers are typically used in groups of 4. */ +#define ROGUE_COEFF_ALIGN 4 + +/* Defines for other operand types. */ + +/* Available dependent read counters. */ +#define ROGUE_NUM_DRCS 2 + +/* Maximum number of vertex outputs. */ +#define ROGUE_MAX_VERTEX_OUTPUTS 256 + +/* All components of an emulated vec4 register group. */ +#define ROGUE_COMPONENT_ALL (~0) + +/** + * \brief Operand types. + */ +enum rogue_operand_type { + /* Register operands. */ + ROGUE_OPERAND_TYPE_REG_TEMP = 0, /** Temporary register. */ + ROGUE_OPERAND_TYPE_REG_COEFF, /** Coefficient register. */ + ROGUE_OPERAND_TYPE_REG_CONST, /** Constant register. */ + ROGUE_OPERAND_TYPE_REG_SHARED, /** Shared register. */ + ROGUE_OPERAND_TYPE_REG_PIXEL_OUT, /** Pixel output register. */ + ROGUE_OPERAND_TYPE_REG_VERTEX_IN, /** Vertex input register. */ + ROGUE_OPERAND_TYPE_REG_INTERNAL, /** Internal register. */ + + ROGUE_OPERAND_TYPE_REG_MAX = ROGUE_OPERAND_TYPE_REG_INTERNAL, + + ROGUE_OPERAND_TYPE_IMMEDIATE, /** Immediate value. */ + + ROGUE_OPERAND_TYPE_DRC, /** Dependent read counter. */ + + ROGUE_OPERAND_TYPE_VREG, /** Virtual register (pre-regalloc). */ + + ROGUE_OPERAND_TYPE_COUNT, +}; + +/* clang-format off */ + +#define ROGUE_NUM_REG_TYPES (ROGUE_OPERAND_TYPE_REG_MAX + 1) + +/** + * \brief A bitmask for any register operand type. + */ +#define ROGUE_MASK_ANY_REG \ + ROH(ROGUE_OPERAND_TYPE_REG_TEMP) | \ + ROH(ROGUE_OPERAND_TYPE_REG_COEFF) | \ + ROH(ROGUE_OPERAND_TYPE_REG_CONST) | \ + ROH(ROGUE_OPERAND_TYPE_REG_PIXEL_OUT) | \ + ROH(ROGUE_OPERAND_TYPE_REG_VERTEX_IN) | \ + ROH(ROGUE_OPERAND_TYPE_REG_SHARED) | \ + ROH(ROGUE_OPERAND_TYPE_REG_INTERNAL) + +/* clang-format on */ + +/** + * \brief Operand description. + */ +struct rogue_operand { + enum rogue_operand_type type; + + union { + struct { + uint64_t value; + } immediate; + + struct { + size_t number; + } drc; + + struct { + size_t number; + } reg; + + struct { + size_t number; + bool is_vector; + size_t component; + } vreg; + }; +}; + +/** + * \brief Register access flags. + */ +enum rogue_register_access { + ROGUE_REG_ACCESS_READ = BITFIELD_BIT(0U), /** Read-only. */ + ROGUE_REG_ACCESS_WRITE = BITFIELD_BIT(1U), /* Write-only. */ + ROGUE_REG_ACCESS_RW = ROGUE_REG_ACCESS_READ | + ROGUE_REG_ACCESS_WRITE, /** Read/write. */ +}; + +/** + * \brief Register modifier flags. + */ +enum rogue_register_modifier { + ROGUE_REG_MOD_NONE = 0U, + ROGUE_REG_MOD_IDX = BITFIELD_BIT(0U), /** Index modifier. */ + ROGUE_REG_MOD_DIM = BITFIELD_BIT(1U), /** Dimension modifier. */ + ROGUE_REG_MOD_ALL = ROGUE_REG_MOD_IDX | ROGUE_REG_MOD_DIM, +}; + +#endif /* ROGUE_OPERAND_H */ diff --git a/src/imagination/rogue/rogue_regalloc.c b/src/imagination/rogue/rogue_regalloc.c new file mode 100644 index 00000000000..f18e44322f6 --- /dev/null +++ b/src/imagination/rogue/rogue_regalloc.c @@ -0,0 +1,313 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "rogue_operand.h" +#include "rogue_regalloc.h" +#include "rogue_shader.h" +#include "rogue_util.h" +#include "util/hash_table.h" +#include "util/list.h" +#include "util/ralloc.h" +#include "util/register_allocate.h" +#include "util/u_dynarray.h" + +/** + * \file rogue_regalloc.c + * + * \brief Contains register allocation helper functions. + */ + +/** + * \brief Sets up the register data with the classes to be used for allocation. + * + * \param[in] data The register data array. + */ +static void +rogue_reg_data_init(struct rogue_reg_data data[static ROGUE_REG_CLASS_COUNT]) +{ + data[ROGUE_REG_CLASS_TEMP].type = ROGUE_OPERAND_TYPE_REG_TEMP; + data[ROGUE_REG_CLASS_TEMP].count = ROGUE_MAX_REG_TEMP; + data[ROGUE_REG_CLASS_TEMP].stride = 1; + + data[ROGUE_REG_CLASS_VEC4].type = ROGUE_OPERAND_TYPE_REG_INTERNAL; + data[ROGUE_REG_CLASS_VEC4].count = ROGUE_MAX_REG_INTERNAL; + data[ROGUE_REG_CLASS_VEC4].stride = 4; +} + +/** + * \brief Initializes the Rogue register allocation context. + * + * \param[in] mem_ctx The memory context for the ra context. + * \return A rogue_ra * if successful, or NULL if unsuccessful. + */ +struct rogue_ra *rogue_ra_init(void *mem_ctx) +{ + struct rogue_ra *ra; + size_t total_regs = 0; + + ra = rzalloc_size(mem_ctx, sizeof(*ra)); + if (!ra) + return NULL; + + /* Initialize the register class data. */ + rogue_reg_data_init(ra->reg_data); + + /* Count up the registers classes and set up their offsets. + * + * The physical register numbers are sequential, even if the + * registers are from different banks, so keeping track of + * the offset means we can get the true physical register + * number back after allocation. + */ + for (size_t u = 0; u < ARRAY_SIZE(ra->reg_data); ++u) { + ra->reg_data[u].offset = total_regs; + total_regs += ra->reg_data[u].count; + } + + /* Create a register set for allocation. */ + ra->regs = ra_alloc_reg_set(ra, total_regs, true); + if (!ra->regs) { + ralloc_free(ra); + return NULL; + } + + /* Create the register class for the temps. */ + ra->reg_data[ROGUE_REG_CLASS_TEMP].class = + ra_alloc_contig_reg_class(ra->regs, 1); + + /* Create the register class for vec4 registers + * (using the internal register bank). + */ + ra->reg_data[ROGUE_REG_CLASS_VEC4].class = + ra_alloc_contig_reg_class(ra->regs, 4); + + /* Populate the register classes. */ + for (size_t u = 0; u < ARRAY_SIZE(ra->reg_data); ++u) { + struct rogue_reg_data *reg_data = &ra->reg_data[u]; + size_t offset = reg_data->offset; + size_t end = reg_data->offset + reg_data->count; + size_t stride = reg_data->stride; + + for (size_t r = offset; r < end; r += stride) + ra_class_add_reg(reg_data->class, r); + } + + /* Finalize the set (no early conflicts passed along for now). */ + ra_set_finalize(ra->regs, NULL); + + return ra; +} + +/** + * \brief The range for which a (virtual) register is live, and its references. + */ +struct live_range { + size_t start; + size_t end; + enum rogue_reg_class class; + struct util_dynarray operand_refs; +}; + +/** + * \brief Performs register allocation. + * + * \param[in] instr_list A linked list of instructions with virtual registers to + * be allocated. + * \param[in] ra The register allocation context. + */ +bool rogue_ra_alloc(struct list_head *instr_list, + struct rogue_ra *ra, + size_t *temps_used, + size_t *internals_used) +{ + /* Used for ra_alloc_interference_graph() as it doesn't + * like having gaps (e.g. with v0, v2 count = 3 rather + * than 2). + */ + size_t max_vreg = 0; + + struct hash_table *reg_ht = + _mesa_hash_table_create(ra, _mesa_hash_uint, _mesa_key_uint_equal); + if (!reg_ht) + return false; + + /* Calculate live ranges for virtual registers. */ + size_t ip = 0U; /* "Instruction pointer". */ + foreach_instr (instr, instr_list) { + for (size_t u = 0U; u < instr->num_operands; ++u) { + struct hash_entry *entry; + struct live_range *range; + + if (instr->operands[u].type != ROGUE_OPERAND_TYPE_VREG) + continue; + + entry = + _mesa_hash_table_search(reg_ht, &instr->operands[u].vreg.number); + if (!entry) { + /* First use of this virtual register: initialize live range. */ + /* TODO: Error handling. */ + range = rzalloc_size(reg_ht, sizeof(*range)); + + range->start = ip; + range->end = ip; + range->class = instr->operands[u].vreg.is_vector + ? ROGUE_REG_CLASS_VEC4 + : ROGUE_REG_CLASS_TEMP; + + entry = _mesa_hash_table_insert(reg_ht, + &instr->operands[u].vreg.number, + range); + + max_vreg = MAX2(max_vreg, instr->operands[u].vreg.number); + + util_dynarray_init(&range->operand_refs, range); + } else { + /* Subsequent uses: update live range end. */ + range = entry->data; + range->end = MAX2(range->end, ip); + assert(range->class == (instr->operands[u].vreg.is_vector + ? ROGUE_REG_CLASS_VEC4 + : ROGUE_REG_CLASS_TEMP)); + } + + /* Save a reference to the operand. */ + util_dynarray_append(&range->operand_refs, + struct rogue_operand *, + &instr->operands[u]); + } + ++ip; + } + + /* Initialize the interference graph. */ + struct ra_graph *g = ra_alloc_interference_graph(ra->regs, max_vreg + 1); + + /* Set each virtual register to the appropriate class. */ + hash_table_foreach (reg_ht, entry) { + const uint32_t *vreg = entry->key; + struct live_range *range = entry->data; + struct ra_class *class = ra->reg_data[range->class].class; + + ra_set_node_class(g, *vreg, class); + /* TODO: ra_set_node_spill_cost(g, *vreg, cost); */ + } + + /* Build interference graph from overlapping live ranges. */ + hash_table_foreach (reg_ht, entry_first) { + const uint32_t *vreg_first = entry_first->key; + struct live_range *range_first = entry_first->data; + + hash_table_foreach (reg_ht, entry_second) { + const uint32_t *vreg_second = entry_second->key; + struct live_range *range_second = entry_second->data; + + if (*vreg_first == *vreg_second) + continue; + + /* If the live ranges overlap, those register nodes interfere. */ + if (!(range_first->start >= range_second->end || + range_second->start >= range_first->end)) { + ra_add_node_interference(g, *vreg_first, *vreg_second); + } + } + } + + /* Add node interferences such that the same register can't be used for + * both an instruction's source and destination. + */ + foreach_instr (instr, instr_list) { + for (size_t u = 0U; u < instr->num_operands; ++u) { + if (instr->operands[u].type != ROGUE_OPERAND_TYPE_VREG) + continue; + + /* Operand 0 (if it exists and is virtual) is always + * the destination register. + */ + if (u > 0 && instr->operands[0].type == ROGUE_OPERAND_TYPE_VREG) + ra_add_node_interference(g, + instr->operands[0].vreg.number, + instr->operands[u].vreg.number); + } + } + + /* Perform register allocation. */ + /* TODO: Spilling support. */ + assert(ra_allocate(g)); + + /* Replace virtual registers with allocated physical registers. + * N.B. This is a destructive process as it overwrites the hash table key! + */ + hash_table_foreach (reg_ht, entry) { + uint32_t vreg = *(uint32_t *)entry->key; + unsigned phy_reg = ra_get_node_reg(g, vreg); + struct live_range *range = entry->data; + + struct rogue_reg_data *reg_data = &ra->reg_data[range->class]; + enum rogue_operand_type type = reg_data->type; + size_t reg_offset = reg_data->offset; + size_t *num_used = ®_data->num_used; + + util_dynarray_foreach (&range->operand_refs, + struct rogue_operand *, + operand_ptr) { + size_t num = phy_reg - reg_offset; + struct rogue_operand *operand = *operand_ptr; + + assert(operand->type == ROGUE_OPERAND_TYPE_VREG); + assert(operand->vreg.number == vreg); + + /* Index the component of emulated vec4 registers. */ + if (operand->vreg.is_vector && + operand->vreg.component != ROGUE_COMPONENT_ALL) + num += operand->vreg.component; + + operand->type = type; + operand->reg.number = num; + + *num_used = MAX2(*num_used, operand->reg.number); + } + + util_dynarray_fini(&range->operand_refs); + _mesa_hash_table_remove(reg_ht, entry); + } + + /* Registers used = max reg number + 1. */ + for (size_t u = 0; u < ARRAY_SIZE(ra->reg_data); ++u) + if (ra->reg_data[u].num_used) + ++ra->reg_data[u].num_used; + + /* Pass back the registers used. */ + if (temps_used) + *temps_used = ra->reg_data[ROGUE_REG_CLASS_TEMP].num_used; + + if (internals_used) + *internals_used = ra->reg_data[ROGUE_REG_CLASS_VEC4].num_used; + + ralloc_free(g); + + _mesa_hash_table_destroy(reg_ht, NULL); + + return true; +} diff --git a/src/imagination/rogue/rogue_regalloc.h b/src/imagination/rogue/rogue_regalloc.h new file mode 100644 index 00000000000..eb831eac426 --- /dev/null +++ b/src/imagination/rogue/rogue_regalloc.h @@ -0,0 +1,70 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_REGALLOC_H +#define ROGUE_REGALLOC_H + +#include +#include + +#include "util/list.h" + +/** + * \brief Register classes used for allocation. + */ +enum rogue_reg_class { + ROGUE_REG_CLASS_TEMP, + ROGUE_REG_CLASS_VEC4, + + ROGUE_REG_CLASS_COUNT, +}; + +/** + * \brief Register data for each class. + */ +struct rogue_reg_data { + enum rogue_operand_type type; + size_t count; + size_t stride; + + size_t offset; + struct ra_class *class; + size_t num_used; +}; + +/** + * \brief Register allocation context. + */ +struct rogue_ra { + struct ra_regs *regs; + + struct rogue_reg_data reg_data[ROGUE_REG_CLASS_COUNT]; +}; + +struct rogue_ra *rogue_ra_init(void *mem_ctx); +bool rogue_ra_alloc(struct list_head *instr_list, + struct rogue_ra *ra, + size_t *temps_used, + size_t *internals_used); + +#endif /* ROGUE_REGALLOC_H */ diff --git a/src/imagination/rogue/rogue_shader.c b/src/imagination/rogue/rogue_shader.c new file mode 100644 index 00000000000..174d96248ac --- /dev/null +++ b/src/imagination/rogue/rogue_shader.c @@ -0,0 +1,133 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "rogue_shader.h" +#include "rogue_instr.h" +#include "rogue_regalloc.h" +#include "rogue_util.h" +#include "util/ralloc.h" + +/** + * \file rogue_shader.c + * + * \brief Contains functions to manipulate Rogue shaders. + */ + +/** + * \brief Counts how many times an instruction is used in a shader. + * + * \param[in] shader The shader containing instructions to count. + * \param[in] opcode The opcode of the instruction to be counted. + * \return The number of times "opcode" is present, or 0 on error. + */ +size_t rogue_shader_instr_count_type(const struct rogue_shader *shader, + enum rogue_opcode opcode) +{ + size_t count = 0U; + + ASSERT_OPCODE_RANGE(opcode); + + foreach_instr (instr, &shader->instr_list) + if (instr->opcode == opcode) + ++count; + + return count; +} + +/** + * \brief Allocates and sets up a Rogue shader. + * + * \param[in] stage The shader stage. + * \return A rogue_shader* if successful, or NULL if unsuccessful. + */ +struct rogue_shader *rogue_shader_create(struct rogue_build_ctx *ctx, + gl_shader_stage stage) +{ + struct rogue_shader *shader; + + if (!ctx) + return NULL; + + shader = rzalloc_size(ctx, sizeof(*shader)); + if (!shader) + return NULL; + + shader->stage = stage; + + list_inithead(&shader->instr_list); + + shader->ctx = ctx; + shader->ra = rogue_ra_init(shader); + if (!shader->ra) { + ralloc_free(shader); + return NULL; + } + + return shader; +} + +/** + * \brief Creates an instruction and appends it to a Rogue shader. + * + * \param[in] shader The shader. + * \param[in] opcode The instruction opcode. + * \return A rogue_instr* if successful, or NULL if unsuccessful. + */ +struct rogue_instr *rogue_shader_insert(struct rogue_shader *shader, + enum rogue_opcode opcode) +{ + struct rogue_instr *instr = rogue_instr_create(shader, opcode); + if (!instr) + return NULL; + + list_addtail(&instr->node, &shader->instr_list); + + return instr; +} + +size_t rogue_acquire_drc(struct rogue_shader *shader) +{ + size_t drc; + + /* If both DRCs are in use, we have a problem. */ + if (shader->drc_used[0] && shader->drc_used[1]) + return SIZE_MAX; + + drc = !shader->drc_used[0] ? 0 : 1; + shader->drc_used[drc] = true; + + return drc; +} + +void rogue_release_drc(struct rogue_shader *shader, size_t drc) +{ + assert(drc < ROGUE_NUM_DRCS); + assert(shader->drc_used[drc]); + + shader->drc_used[drc] = false; +} diff --git a/src/imagination/rogue/rogue_shader.h b/src/imagination/rogue/rogue_shader.h new file mode 100644 index 00000000000..2109855be08 --- /dev/null +++ b/src/imagination/rogue/rogue_shader.h @@ -0,0 +1,81 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_SHADER_H +#define ROGUE_SHADER_H + +#include +#include + +#include "compiler/shader_enums.h" +#include "rogue_instr.h" +#include "rogue_operand.h" +#include "rogue_util.h" +#include "util/list.h" +#include "util/macros.h" + +struct rogue_build_ctx; +struct rogue_ra; + +/** + * \brief Shader description. + */ +struct rogue_shader { + gl_shader_stage stage; /** Shader stage. */ + + struct list_head instr_list; /** Instructions linked list. */ + + struct rogue_build_ctx *ctx; + struct rogue_ra *ra; + + bool drc_used[ROGUE_NUM_DRCS]; +}; + +/* Shader instruction list iterators and helpers. */ +#define foreach_instr(__instr, __list) \ + list_for_each_entry (struct rogue_instr, __instr, __list, node) +#define foreach_instr_rev(__instr, __list) \ + list_for_each_entry_rev (struct rogue_instr, __instr, __list, node) +#define foreach_instr_safe(__instr, __list) \ + list_for_each_entry_safe (struct rogue_instr, __instr, __list, node) + +#define instr_first_entry(__list) \ + list_first_entry(__list, struct rogue_instr, node) +#define instr_last_entry(__list) \ + list_last_entry(__list, struct rogue_instr, node) + +size_t rogue_shader_instr_count_type(const struct rogue_shader *shader, + enum rogue_opcode opcode); + +PUBLIC +struct rogue_shader *rogue_shader_create(struct rogue_build_ctx *ctx, + gl_shader_stage stage); + +PUBLIC +struct rogue_instr *rogue_shader_insert(struct rogue_shader *shader, + enum rogue_opcode opcode); + +size_t rogue_acquire_drc(struct rogue_shader *shader); +void rogue_release_drc(struct rogue_shader *shader, size_t drc); + +#endif /* ROGUE_SHADER_H */ diff --git a/src/imagination/rogue/rogue_util.c b/src/imagination/rogue/rogue_util.c new file mode 100644 index 00000000000..f20c13ab122 --- /dev/null +++ b/src/imagination/rogue/rogue_util.c @@ -0,0 +1,98 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "rogue_util.h" +#include "util/macros.h" + +/** + * \file rogue_util.c + * + * \brief Contains compiler utility and helper functions. + */ + +/** + * \brief Splits and distributes value "source" across "dest_bytes" according to + * the ranges specified (from MSB to LSB). + * + * \param[in] source The source value to be distributed. + * \param[in] rangelist The rangelist describing how to distribute "source". + * \param[in] dest_size The size of the destination in bytes. + * \param[in] dest_bytes The destination byte array. + * \return false if invalid inputs were provided, else true. + */ +bool rogue_distribute_value(uint64_t source, + const struct rogue_rangelist *rangelist, + size_t dest_size, + uint8_t dest_bytes[dest_size]) +{ + size_t total_bits_left = 0U; + + /* Check that "value" is actually representable in "total_bits" bits. */ + total_bits_left = rogue_rangelist_bits(rangelist); + assert(util_last_bit64(source) <= total_bits_left && + "Value cannot be represented."); + + /* Iterate over each range. */ + for (size_t u = 0U; u < rangelist->num_ranges; ++u) { + struct rogue_bitrange *range = &rangelist->ranges[u]; + + size_t dest_bit = range->start; + size_t bits_left = range->num; + size_t bytes_covered = rogue_bytes_spilled(range) + 1; + size_t base_byte = rogue_byte_index(range, dest_size); + + /* Iterate over each byte covered by the current range. */ + for (size_t b = 0U; b < bytes_covered; ++b) { + size_t max_bits = rogue_max_bits(dest_bit); + size_t bits_to_place = MIN2(bits_left, max_bits); + size_t dest_byte_bit = dest_bit % 8; + size_t source_bit = total_bits_left - 1; + + /* Mask and shuffle the source value so that it'll fit into the + * correct place in the destination byte: + */ + + /* Extract bits. */ + uint64_t value_masked = + (source & BITMASK64_N(source_bit, bits_to_place)); + /* Shift all the way right. */ + value_masked >>= (1 + source_bit - bits_to_place); + /* Shift left to the correct position. */ + value_masked <<= (1 + dest_byte_bit - bits_to_place); + /* Place value into byte. */ + dest_bytes[base_byte + b] |= (value_masked & 0xff); + + dest_bit -= max_bits; + bits_left -= bits_to_place; + total_bits_left -= bits_to_place; + } + } + + return true; +} diff --git a/src/imagination/rogue/rogue_util.h b/src/imagination/rogue/rogue_util.h new file mode 100644 index 00000000000..fa789c48220 --- /dev/null +++ b/src/imagination/rogue/rogue_util.h @@ -0,0 +1,320 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_UTIL_H +#define ROGUE_UTIL_H + +#include +#include +#include +#include + +#include "util/bitscan.h" +#include "util/log.h" +#include "util/macros.h" + +/* Input validation helpers. */ + +/** + * \brief Returns false if "expr" is not asserted. + * + * \param[in] expr The expression to check. + */ +#define CHECK(expr) \ + do { \ + if (!(expr)) \ + return false; \ + } while (0) + +/** + * \brief Returns false if "expr" is not asserted, + * and logs the provided error message. + * + * \param[in] expr The expression to check. + * \param[in] fmt The error message to print. + * \param[in] ... The printf-style varable arguments. + */ +#define CHECKF(expr, fmt, ...) \ + do { \ + if (!(expr)) { \ + mesa_log(MESA_LOG_ERROR, "ROGUE", fmt, ##__VA_ARGS__); \ + return false; \ + } \ + } while (0) + +/** + * \brief Asserts if "opcode" is invalid. + * + * \param[in] opcode The opcode to check. + */ +#define ASSERT_OPCODE_RANGE(opcode) assert((opcode) < ROGUE_OP_COUNT) + +/** + * \brief Asserts if "operand" is invalid. + * + * \param[in] operand The operand to check. + */ +#define ASSERT_OPERAND_RANGE(operand) \ + assert((operand) < ROGUE_OPERAND_TYPE_COUNT) + +/** + * \brief Asserts if "operand" is not a register. + * + * \param[in] operand The operand to check. + */ +#define ASSERT_OPERAND_REG(operand) \ + assert((operand) <= ROGUE_OPERAND_TYPE_REG_MAX) + +/** + * \brief Asserts if "flag" is invalid. + * + * \param[in] flag The flag to check. + */ +#define ASSERT_INSTR_FLAG_RANGE(flag) assert((flag) < ROGUE_INSTR_FLAG_COUNT) + +/** + * \brief Asserts if operand index "index" is out of range. + * + * \param[in] instr The target instruction. + * \param[in] index The operand index to check. + */ +#define ASSERT_INSTR_OPERAND_INDEX(instr, index) \ + assert((index) < (instr)->num_operands) + +/** + * \brief Asserts if "stage" is invalid. + * + * \param[in] stage The stage to check. + */ +#define ASSERT_SHADER_STAGE_RANGE(stage) assert((stage) < MESA_SHADER_STAGES) + +/** + * \brief Creates a "n"-bit mask starting from bit "b". + * + * \param[in] b The starting bit. + * \param[in] n The number of bits in the mask. + */ +#define BITMASK64_N(b, n) (((~0ULL) << (64 - (n))) >> (63 - (b))) + +/** + * \brief Compile-time rogue_onehot. + * + * \sa #rogue_onehot() + */ +#define ROH(OFFSET) BITFIELD64_BIT(OFFSET) + +/* TODO: Consider integrating the following into src/util/{macros,bitscan}.h */ + +/** + * \brief Converts a one-hot encoding to an offset encoding. + * + * E.g. 0b10000 -> 4 + * + * \param[in] onehot The one-hot encoding. + * \return The offset encoding. + */ +static inline uint64_t rogue_offset(uint64_t onehot) +{ + assert(util_bitcount64(onehot) == 1); + return ffsll(onehot) - 1; +} + +/** + * \brief Converts an offset encoding to a one-hot encoding. + * + * E.g. 0 -> 0b1 + * + * \param[in] offset The offset encoding. + * \return The one-hot encoding. + */ +static inline uint64_t rogue_onehot(uint64_t offset) +{ + assert(offset < 64ULL); + return (1ULL << offset); +} + +/** + * \brief Checks whether an input bitfield contains only a valid bitset. + * + * E.g. rogue_check_bitset(0b00001100, 0b00001111) -> true + * rogue_check_bitset(0b00001100, 0b00000111) -> false + * + * \param[in] input The input bitfield. + * \param[in] valid_bits The valid bitset. + * \return true if "input" contains only "valid_bits", false otherwise. + */ +static inline bool rogue_check_bitset(uint64_t input, uint64_t valid_bits) +{ + input &= ~valid_bits; + return !input; +} + +/** + * \brief Describes a downward range of bits within an arbitrarily-sized + * sequence. + * + * E.g. for start = 7 and num = 3: + * + * 76543210 + * abcdefgh + * + * the bit range would be: abc. + */ +struct rogue_bitrange { + size_t start; + size_t num; +}; + +/** + * \brief Describes a collection of bit-ranges within an arbitrarily-sized + * sequence that are meaningful together. + * + * E.g. an 8-bit value that is encoded within a larger value: + * 8-bit value: abcdefgh + * Parent value: 010ab0cdef0010gh + * + */ +struct rogue_rangelist { + size_t num_ranges; + struct rogue_bitrange *ranges; +}; + +/** + * \brief Counts the total number of bits described in a rangelist. + * + * \param[in] rangelist The input rangelist. + * \return The total number of bits. + */ +static inline size_t +rogue_rangelist_bits(const struct rogue_rangelist *rangelist) +{ + size_t total_bits = 0U; + + for (size_t u = 0U; u < rangelist->num_ranges; ++u) + total_bits += rangelist->ranges[u].num; + + return total_bits; +} + +/** + * \brief Returns the byte offset of the bitrange moving left from the LSB. + * + * \param[in] bitrange The input bit-range. + * \return The byte offset. + */ +static inline size_t rogue_byte_num(const struct rogue_bitrange *bitrange) +{ + /* Make sure there are enough bits. */ + assert(bitrange->num <= (bitrange->start + 1)); + + return bitrange->start / 8; +} + +/** + * \brief Returns the array-indexable byte offset of a bit-range if the sequence + * it represents were to be stored in an byte-array containing "num_bytes" + * bytes. + * + * E.g. uint8_t array[2] is a sequence of 16 bits: + * bit(0) is located in array[1]. + * bit(15) is located in array[0]. + * + * For uint8_t array[4]: + * bit(0) is located in array[3]. + * bit(15) is located in array[2]. + * + * \param[in] bitrange The input bit-range. + * \param[in] num_bytes The number of bytes that are used to contain the + * bit-range. \return The byte offset. + */ +static inline size_t rogue_byte_index(const struct rogue_bitrange *bitrange, + size_t num_bytes) +{ + /* Make sure there are enough bits. */ + assert(bitrange->num <= (bitrange->start + 1)); + + return num_bytes - rogue_byte_num(bitrange) - 1; +} + +/** + * \brief Returns the bit offset of a bit-range if the sequence it represents is + * being accessed in a byte-wise manner. + * + * E.g. bit 17 has a bit offset of 1. + * + * \param[in] bitrange The input bit-range. + * \return The bit offset. + */ +static inline size_t rogue_bit_offset(const struct rogue_bitrange *bitrange) +{ + /* Make sure there are enough bits. */ + assert(bitrange->num <= (bitrange->start + 1)); + + return bitrange->start % 8; +} + +/** + * \brief Returns the number of additional bytes that the bit-range spills into + * (excluding its "starting" byte). + * + * \param[in] bitrange The input bit-range. + * \return The number of bytes spilled. + */ +static inline size_t rogue_bytes_spilled(const struct rogue_bitrange *bitrange) +{ + /* Make sure there are enough bits. */ + assert(bitrange->num <= (bitrange->start + 1)); + + return ((bitrange->num - 1) / 8) + + ((bitrange->num % 8) > (rogue_bit_offset(bitrange) + 1)); +} + +/** + * \brief For a given bit offset, returns the maximum number of bits (including + * itself) that are accessible before spilling into the following byte. + * + * E.g. When trying to insert an 8-bit value offset of 13, a maximum of 6 bits + * can be placed; the last 2 bits will need to go into the next byte. + * + * 8-bit value: abcdefgh + * + * array[0] array[1] + * 15 8 7 0 + * iiiiiiii jjjjjjjj + * ^ + * abcdef gh + * + * \param[in] The bit offset. + * \return The maximum number of accessible bits. + */ +static inline size_t rogue_max_bits(size_t offset) +{ + return (offset % 8) + 1; +} + +bool rogue_distribute_value(uint64_t source, + const struct rogue_rangelist *rangelist, + size_t dest_size, + uint8_t dest_bytes[dest_size]); + +#endif /* ROGUE_UTIL_H */ diff --git a/src/imagination/rogue/rogue_validate.c b/src/imagination/rogue/rogue_validate.c new file mode 100644 index 00000000000..8d9d10e3642 --- /dev/null +++ b/src/imagination/rogue/rogue_validate.c @@ -0,0 +1,288 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * \file rogue_validate.c + * + * \brief Contains rules and functions for validating Rogue data structures. + */ + +#include + +#include "rogue_operand.h" +#include "rogue_shader.h" +#include "rogue_util.h" +#include "rogue_validate.h" +#include "util/list.h" +#include "util/macros.h" + +/** + * \brief Register operand rules. + */ +#define REG_RULE(OPERAND, ACCESS, MAX, MODIFIERS) \ + [ROGUE_OPERAND_TYPE_REG_##OPERAND] = { \ + .access = ROGUE_REG_ACCESS_##ACCESS, \ + .max = MAX, \ + .modifiers = ROGUE_REG_MOD_##MODIFIERS, \ + } + +/* TODO: Support register indexing > ROGUE_MAX_REG_TEMP. */ +static const struct rogue_register_rule reg_rules[ROGUE_NUM_REG_TYPES] = { + REG_RULE(TEMP, RW, MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_TEMP), ALL), + REG_RULE(COEFF, RW, MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_COEFF), ALL), + REG_RULE(CONST, RW, MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_CONST), NONE), + REG_RULE(SHARED, RW, MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_SHARED), ALL), + REG_RULE(PIXEL_OUT, + RW, + MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_PIXEL_OUT), + NONE), + REG_RULE(VERTEX_IN, + RW, + MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_VERTEX_IN), + ALL), + REG_RULE(INTERNAL, + RW, + MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_INTERNAL), + NONE), +}; +#undef REG_RULE + +/** + * \brief Instruction rules. + */ +/* TODO: Common up register classes to prevent long lines. */ +static const struct rogue_instr_rule instr_rules[ROGUE_OP_COUNT] = { + [ROGUE_OP_NOP] = { .flags = 0, .num_operands = 0, .operand_rules = NULL, }, + [ROGUE_OP_END_FRAG] = { .flags = 0, .num_operands = 0, .operand_rules = NULL, }, + [ROGUE_OP_END_VERT] = { .flags = 0, .num_operands = 0, .operand_rules = NULL, }, + [ROGUE_OP_WDF] = { .flags = 0, + .num_operands = 1, .operand_rules = (struct rogue_instr_operand_rule[]){ + [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_DRC), .min = -1, .max = -1, .align = -1, }, + }, + }, + [ROGUE_OP_PIX_ITER_W] = { .flags = ROH(ROGUE_INSTR_FLAG_SAT), + .num_operands = 5, .operand_rules = (struct rogue_instr_operand_rule[]){ + [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_DRC), .min = -1, .max = -1, .align = -1, }, + [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_COEFF), .min = -1, .max = -1, .align = ROGUE_COEFF_ALIGN, }, + [3] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_COEFF), .min = -1, .max = -1, .align = ROGUE_COEFF_ALIGN, }, + [4] = { .mask = ROH(ROGUE_OPERAND_TYPE_IMMEDIATE), .min = 1, .max = 16, .align = -1, }, + }, + }, + [ROGUE_OP_MAX] = { .flags = 0, + .num_operands = 3, .operand_rules = (struct rogue_instr_operand_rule[]){ + [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_CONST) | ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + }, + }, + [ROGUE_OP_MIN] = { .flags = 0, + .num_operands = 3, .operand_rules = (struct rogue_instr_operand_rule[]){ + [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP) | ROH(ROGUE_OPERAND_TYPE_REG_INTERNAL), .min = -1, .max = -1, .align = -1, }, + [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_CONST) | ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + }, + }, + /* TODO: Add representation for 4 sequential registers. */ + [ROGUE_OP_PACK_U8888] = { .flags = 0, + .num_operands = 2, .operand_rules = (struct rogue_instr_operand_rule[]){ + [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_INTERNAL), .min = -1, .max = -1, .align = -1, }, + }, + }, + [ROGUE_OP_MOV] = { .flags = ROH(ROGUE_INSTR_FLAG_OLCHK), + .num_operands = 2, .operand_rules = (struct rogue_instr_operand_rule[]){ + [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP) | ROH(ROGUE_OPERAND_TYPE_REG_INTERNAL) | ROH(ROGUE_OPERAND_TYPE_REG_PIXEL_OUT), .min = -1, .max = -1, .align = -1, }, + [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_CONST) | ROH(ROGUE_OPERAND_TYPE_REG_TEMP) | ROH(ROGUE_OPERAND_TYPE_REG_SHARED) | ROH(ROGUE_OPERAND_TYPE_REG_VERTEX_IN), .min = -1, .max = -1, .align = -1, }, + }, + }, + [ROGUE_OP_MOV_IMM] = { .flags = 0, + .num_operands = 2, .operand_rules = (struct rogue_instr_operand_rule[]){ + [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_IMMEDIATE), .min = 0, .max = UINT32_MAX, .align = -1, }, + }, + }, + [ROGUE_OP_FMA] = { .flags = ROH(ROGUE_INSTR_FLAG_SAT) | ROH(ROGUE_INSTR_FLAG_LP), + .num_operands = 4, .operand_rules = (struct rogue_instr_operand_rule[]){ + [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + [3] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + }, + }, + [ROGUE_OP_MUL] = { .flags = ROH(ROGUE_INSTR_FLAG_SAT) | ROH(ROGUE_INSTR_FLAG_LP), + .num_operands = 3, .operand_rules = (struct rogue_instr_operand_rule[]){ + [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + }, + }, + [ROGUE_OP_VTXOUT] = { .flags = 0, + .num_operands = 2, .operand_rules = (struct rogue_instr_operand_rule[]){ + [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_IMMEDIATE), .min = 0, .max = ROGUE_MAX_VERTEX_OUTPUTS, .align = -1, }, + [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, }, + }, + }, +}; + +/** + * \brief Validates an operand. + * + * \param[in] operand The operand. + * \return true if valid, otherwise false. + */ +bool rogue_validate_operand(const struct rogue_operand *operand) +{ + ASSERT_OPERAND_RANGE(operand->type); + + switch (operand->type) { + case ROGUE_OPERAND_TYPE_IMMEDIATE: + return true; + + case ROGUE_OPERAND_TYPE_DRC: + CHECKF(operand->drc.number < ROGUE_NUM_DRCS, + "Invalid DRC number '%zu'.", + operand->drc.number); + return true; + + case ROGUE_OPERAND_TYPE_REG_TEMP: + case ROGUE_OPERAND_TYPE_REG_COEFF: + case ROGUE_OPERAND_TYPE_REG_CONST: + case ROGUE_OPERAND_TYPE_REG_SHARED: + case ROGUE_OPERAND_TYPE_REG_PIXEL_OUT: + case ROGUE_OPERAND_TYPE_REG_VERTEX_IN: + case ROGUE_OPERAND_TYPE_REG_INTERNAL: + CHECKF(operand->reg.number < reg_rules[operand->type].max, + "Register number '%zu' out of range.", + operand->reg.number); + return true; + + default: + break; + } + + return false; +} + +/** + * \brief Validates an instruction. + * + * \param[in] instr The instruction. + * \return true if valid, otherwise false. + */ +bool rogue_validate_instr(const struct rogue_instr *instr) +{ + const struct rogue_instr_rule *rule; + + ASSERT_OPCODE_RANGE(instr->opcode); + + rule = &instr_rules[instr->opcode]; + + /* Validate flags. */ + CHECKF(rogue_check_bitset(instr->flags, rule->flags), + "Invalid instruction flags specified."); + + /* Validate number of operands. */ + CHECKF(instr->num_operands == rule->num_operands, + "Invalid number of operands specified."); + + CHECK(!rule->num_operands || instr->operands); + for (size_t u = 0U; u < instr->num_operands; ++u) { + /* Validate operand types. */ + CHECKF(rogue_check_bitset(rogue_onehot(instr->operands[u].type), + rule->operand_rules[u].mask), + "Invalid type for operand %zu.", + u); + + /* Validate immediate ranges. */ + if (rogue_check_bitset(rogue_onehot(instr->operands[u].type), + ROH(ROGUE_OPERAND_TYPE_IMMEDIATE)) && + rule->operand_rules[u].min != -1 && + rule->operand_rules[u].max != -1) { + CHECKF( + instr->operands[u].immediate.value >= rule->operand_rules[u].min && + instr->operands[u].immediate.value <= rule->operand_rules[u].max, + "Immediate value out of range for operand %zu.", + u); + } + + /* Validate register alignment. */ + if (rogue_check_bitset(rogue_onehot(instr->operands[u].type), + ROGUE_MASK_ANY_REG) && + rule->operand_rules[u].align != -1) { + CHECKF(!(instr->operands[u].reg.number % rule->operand_rules[u].align), + "Invalid register alignment in operand %zu.", + u); + } + + /* Validate each operand. */ + CHECKF(rogue_validate_operand(&instr->operands[u]), + "Failed to validate operand."); + } + + return true; +} + +/** + * \brief Validates a shader. + * + * \param[in] shader The shader. + * \return true if valid, otherwise false. + */ +bool rogue_validate_shader(const struct rogue_shader *shader) +{ + CHECK(!list_is_empty(&shader->instr_list)); + ASSERT_SHADER_STAGE_RANGE(shader->stage); + + /* Shader stage-specific validation. */ + switch (shader->stage) { + case MESA_SHADER_VERTEX: + /* Make sure there is (only) one end vertex shader instruction. */ + CHECKF(rogue_shader_instr_count_type(shader, ROGUE_OP_END_VERT) == 1, + "Shader must contain a single end.vert instruction."); + + /* Make sure the end vertex shader instruction is the last one. */ + CHECKF(instr_last_entry(&shader->instr_list)->opcode == ROGUE_OP_END_VERT, + "end.vert not last instruction."); + break; + + case MESA_SHADER_FRAGMENT: + /* Make sure there is (only) one end fragment shader instruction. */ + CHECKF(rogue_shader_instr_count_type(shader, ROGUE_OP_END_FRAG) == 1, + "Shader must contain a single end.frag instruction."); + + /* Make sure the end fragment shader instruction is the last one. */ + CHECKF(instr_last_entry(&shader->instr_list)->opcode == ROGUE_OP_END_FRAG, + "end.frag not last instruction."); + break; + + default: + return false; + } + + /* Validate each instruction. */ + foreach_instr (instr, &shader->instr_list) + CHECKF(rogue_validate_instr(instr), "Failed to validate instruction."); + + return true; +} diff --git a/src/imagination/rogue/rogue_validate.h b/src/imagination/rogue/rogue_validate.h new file mode 100644 index 00000000000..36268f32210 --- /dev/null +++ b/src/imagination/rogue/rogue_validate.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ROGUE_VALIDATE_H +#define ROGUE_VALIDATE_H + +#include +#include +#include +#include + +#include "rogue_instr.h" +#include "rogue_operand.h" +#include "rogue_shader.h" +#include "util/macros.h" + +/** + * \brief Register rule description. + */ +struct rogue_register_rule { + enum rogue_register_access access; + size_t max; + enum rogue_register_modifier modifiers; +}; + +/** + * \brief Instruction operand rule description. + */ +struct rogue_instr_operand_rule { + uint64_t mask; + ssize_t min; + ssize_t max; + ssize_t align; +}; + +/** + * \brief Instruction rule description. + */ +struct rogue_instr_rule { + uint64_t flags; /** A mask of #rogue_instr_flag values. */ + size_t num_operands; + struct rogue_instr_operand_rule *operand_rules; +}; + +PUBLIC +bool rogue_validate_operand(const struct rogue_operand *operand); + +PUBLIC +bool rogue_validate_instr(const struct rogue_instr *instr); + +PUBLIC +bool rogue_validate_shader(const struct rogue_shader *shader); + +#endif /* ROGUE_VALIDATE_H */ diff --git a/src/imagination/rogue/tools/offline_compiler.c b/src/imagination/rogue/tools/offline_compiler.c new file mode 100644 index 00000000000..bbf597f8d61 --- /dev/null +++ b/src/imagination/rogue/tools/offline_compiler.c @@ -0,0 +1,314 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "compiler/shader_enums.h" +#include "nir/nir.h" +#include "rogue.h" +#include "rogue_build_data.h" +#include "rogue_compiler.h" +#include "rogue_dump.h" +#include "util/os_file.h" +#include "util/ralloc.h" + +#include +#include +#include +#include +#include +#include + +/* Number of hex columns to dump before starting a new line. */ +#define ARRAY_DUMP_COLS 16 + +/** + * \file compiler.c + * + * \brief Rogue offline compiler. + */ + +static const struct option cmdline_opts[] = { + /* Arguments. */ + { "stage", required_argument, NULL, 's' }, + { "file", required_argument, NULL, 'f' }, + { "entry", required_argument, NULL, 'e' }, + + /* Options. */ + { "help", no_argument, NULL, 'h' }, + { "out", required_argument, NULL, 'o' }, + + { "dump-c-array", no_argument, NULL, 'c' }, + { "dump-rogue", no_argument, NULL, 'r' }, + { "dump-nir", no_argument, NULL, 'n' }, + + { NULL, 0, NULL, 0 }, +}; + +struct compiler_opts { + gl_shader_stage stage; + char *file; + char *entry; + char *out_file; + bool dump_c_array; + bool dump_rogue; + bool dump_nir; +}; + +static void usage(const char *argv0) +{ + /* clang-format off */ + printf("Rogue offline compiler.\n"); + printf("Usage: %s -s -f [-e ] [-o ] [-c] [-r] [-n] [-h]\n", argv0); + printf("\n"); + + printf("Required arguments:\n"); + printf("\t-s, --stage Shader stage (supported options: frag, vert).\n"); + printf("\t-f, --file Shader SPIR-V filename.\n"); + printf("\n"); + + printf("Options:\n"); + printf("\t-h, --help Prints this help message.\n"); + printf("\t-e, --entry Overrides the shader entry-point name (default: 'main').\n"); + printf("\t-o, --out Overrides the output filename (default: 'out.bin').\n"); + printf("\n"); + + printf("\t-c, --dump-c-array Print the shader binary as a C byte array.\n"); + printf("\t-r, --dump-rogue Prints the shader Rogue assembly.\n"); + printf("\t-n, --dump-nir Prints the shader NIR.\n"); + printf("\n"); + /* clang-format on */ +} + +static bool parse_cmdline(int argc, char *argv[], struct compiler_opts *opts) +{ + int opt; + int longindex; + + while ( + (opt = + getopt_long(argc, argv, "crnhs:f:e:o:", cmdline_opts, &longindex)) != + -1) { + switch (opt) { + case 'c': + opts->dump_c_array = true; + break; + + case 'e': + if (opts->entry) + continue; + + opts->entry = optarg; + break; + + case 'f': + if (opts->file) + continue; + + opts->file = optarg; + break; + + case 'n': + opts->dump_nir = true; + break; + + case 'o': + if (opts->out_file) + continue; + + opts->out_file = optarg; + break; + + case 'r': + opts->dump_rogue = true; + break; + + case 's': + if (opts->stage != MESA_SHADER_NONE) + continue; + + if (!strcmp(optarg, "frag")) + opts->stage = MESA_SHADER_FRAGMENT; + else if (!strcmp(optarg, "vert")) + opts->stage = MESA_SHADER_VERTEX; + else { + fprintf(stderr, "Invalid stage \"%s\".\n", optarg); + usage(argv[0]); + return false; + } + + break; + + case 'h': + default: + usage(argv[0]); + return false; + } + } + + if (opts->stage == MESA_SHADER_NONE || !opts->file) { + fprintf(stderr, + "%s: --stage and --file are required arguments.\n", + argv[0]); + usage(argv[0]); + return false; + } + + if (!opts->out_file) + opts->out_file = "out.bin"; + + if (!opts->entry) + opts->entry = "main"; + + return true; +} + +int main(int argc, char *argv[]) +{ + /* Command-line options. */ + /* N.B. MESA_SHADER_NONE != 0 */ + struct compiler_opts opts = { .stage = MESA_SHADER_NONE, 0 }; + + /* Input file data. */ + char *input_data; + size_t input_size; + + /* Compiler context. */ + struct rogue_compiler *compiler; + + /* Multi-stage build context. */ + struct rogue_build_ctx *ctx; + + /* Output file. */ + FILE *fp; + size_t bytes_written; + + /* Parse command-line options. */ + if (!parse_cmdline(argc, argv, &opts)) + return 1; + + /* Load SPIR-V input file. */ + input_data = os_read_file(opts.file, &input_size); + if (!input_data) { + fprintf(stderr, "Failed to read file \"%s\".\n", opts.file); + return 1; + } + + /* Create compiler context. */ + compiler = rogue_compiler_create(NULL); + if (!compiler) { + fprintf(stderr, "Failed to set up compiler context.\n"); + goto err_free_input; + } + + ctx = rogue_create_build_context(compiler); + if (!ctx) { + fprintf(stderr, "Failed to set up build context.\n"); + goto err_destroy_compiler; + } + + /* SPIR-V -> NIR. */ + ctx->nir[opts.stage] = rogue_spirv_to_nir(ctx, + opts.stage, + opts.entry, + input_size / sizeof(uint32_t), + (uint32_t *)input_data, + 0, + NULL); + if (!ctx->nir[opts.stage]) { + fprintf(stderr, "Failed to translate SPIR-V input to NIR.\n"); + goto err_free_build_context; + } + + /* Dump NIR shader. */ + if (opts.dump_nir) + nir_print_shader(ctx->nir[opts.stage], stdout); + + /* NIR -> Rogue. */ + ctx->rogue[opts.stage] = rogue_nir_to_rogue(ctx, ctx->nir[opts.stage]); + if (!ctx->rogue[opts.stage]) { + fprintf(stderr, "Failed to translate NIR input to Rogue.\n"); + goto err_free_build_context; + } + + /* Dump Rogue shader. */ + if (opts.dump_rogue) + rogue_dump_shader(ctx->rogue[opts.stage], stdout); + + /* Rogue -> Binary. */ + ctx->binary[opts.stage] = rogue_to_binary(ctx, ctx->rogue[opts.stage]); + if (!ctx->binary[opts.stage]) { + fprintf(stderr, "Failed to translate Rogue to binary.\n"); + goto err_free_build_context; + } + + /* Dump binary as a C array. */ + if (opts.dump_c_array) { + printf("uint8_t shader_bytes[%zu] = {", ctx->binary[opts.stage]->size); + for (size_t u = 0U; u < ctx->binary[opts.stage]->size; ++u) { + if (!(u % ARRAY_DUMP_COLS)) + printf("\n\t"); + + printf("0x%02x, ", ctx->binary[opts.stage]->data[u]); + } + printf("\n};\n"); + } + + /* Write shader binary to disk. */ + fp = fopen(opts.out_file, "wb"); + if (!fp) { + fprintf(stderr, "Failed to open output file \"%s\".\n", opts.out_file); + goto err_free_build_context; + } + + bytes_written = fwrite(ctx->binary[opts.stage]->data, + 1, + ctx->binary[opts.stage]->size, + fp); + if (bytes_written != ctx->binary[opts.stage]->size) { + fprintf( + stderr, + "Failed to write to output file \"%s\" (%zu bytes of %zu written).\n", + opts.out_file, + bytes_written, + ctx->binary[opts.stage]->size); + goto err_close_outfile; + } + + /* Clean up. */ + fclose(fp); + ralloc_free(ctx); + rogue_compiler_destroy(compiler); + free(input_data); + + return 0; + +err_close_outfile: + fclose(fp); +err_free_build_context: + ralloc_free(ctx); +err_destroy_compiler: + rogue_compiler_destroy(compiler); +err_free_input: + free(input_data); + + return 1; +} diff --git a/src/imagination/vulkan/meson.build b/src/imagination/vulkan/meson.build new file mode 100644 index 00000000000..a85f732afb4 --- /dev/null +++ b/src/imagination/vulkan/meson.build @@ -0,0 +1,171 @@ +# Copyright © 2022 Imagination Technologies Ltd. + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +subdir('pds') + +pvr_entrypoints = custom_target( + 'pvr_entrypoints', + input : [vk_entrypoints_gen, vk_api_xml], + output : ['pvr_entrypoints.h', 'pvr_entrypoints.c'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak', + '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'pvr', + ], + depend_files : vk_entrypoints_gen_depend_files, +) + +pvr_files = files( + 'winsys/powervr/pvr_drm.c', + 'winsys/pvr_winsys.c', + 'winsys/pvr_winsys_helper.c', + 'pvr_blit.c', + 'pvr_bo.c', + 'pvr_cmd_buffer.c', + 'pvr_csb.c', + 'pvr_descriptor_set.c', + 'pvr_device.c', + 'pvr_formats.c', + 'pvr_hw_pass.c', + 'pvr_image.c', + 'pvr_job_common.c', + 'pvr_job_compute.c', + 'pvr_job_context.c', + 'pvr_job_render.c', + 'pvr_pass.c', + 'pvr_pipeline.c', + 'pvr_pipeline_cache.c', + 'pvr_query.c', + 'pvr_queue.c', + 'pvr_shader.c', + 'pvr_tex_state.c', + 'pvr_wsi.c', +) + +pvr_includes = [ + include_directories('usc/programs'), + include_directories('winsys'), + libpowervr_pds_includes, +] + +pvr_deps = [ + dep_csbgen, + dep_libdrm, + dep_valgrind, + idep_vulkan_runtime, + idep_vulkan_util, + idep_vulkan_wsi, +] + +pvr_flags = [ + no_override_init_args, +] + +if with_imagination_srv + pvr_files += files( + 'winsys/pvrsrvkm/pvr_srv.c', + 'winsys/pvrsrvkm/pvr_srv_bo.c', + 'winsys/pvrsrvkm/pvr_srv_bridge.c', + 'winsys/pvrsrvkm/pvr_srv_job_compute.c', + 'winsys/pvrsrvkm/pvr_srv_job_render.c', + 'winsys/pvrsrvkm/pvr_srv_syncobj.c', + ) + pvr_flags += '-DPVR_SUPPORT_SERVICES_DRIVER' +endif + +libvulkan_powervr_mesa = shared_library( + 'vulkan_powervr_mesa', + [pvr_files, pvr_entrypoints], + include_directories : [ + pvr_includes, + inc_gallium_aux, + inc_imagination, + inc_include, + inc_src, + inc_mesa, + inc_gallium, + inc_compiler, + ], + link_with : [ + libpowervr_common, + libpowervr_pds, + libpowervr_rogue, + libvulkan_wsi, + ], + dependencies : [ + pvr_deps, + idep_nir, + ], + c_args : pvr_flags, + link_args : [ + ld_args_build_id, + ld_args_bsymbolic, + ld_args_gc_sections + ], + gnu_symbol_visibility : 'hidden', + install : true, +) + +if with_symbols_check + test( + 'pvr symbols check', + symbols_check, + args : [ + '--lib', libvulkan_powervr_mesa, + '--symbols-file', vulkan_icd_symbols, + symbols_check_args, + ], + suite : ['imagination'], + ) +endif + +powervr_mesa_icd = custom_target( + 'powervr_mesa_icd', + input : [vk_icd_gen, vk_api_xml], + output : 'powervr_mesa_icd.@0@.json'.format(host_machine.cpu()), + command : [ + prog_python, '@INPUT0@', + '--api-version', '1.0', '--xml', '@INPUT1@', + '--lib-path', join_paths(get_option('prefix'), get_option('libdir'), + 'libvulkan_powervr_mesa.so'), + '--out', '@OUTPUT@', + ], + build_by_default : true, + install_dir : with_vulkan_icd_dir, + install : true, +) + +if meson.version().version_compare('>= 0.58') + _dev_icdname = 'powervr_mesa_devenv_icd.@0@.json'.format(host_machine.cpu()) + custom_target( + 'powervr_mesa_devenv_icd', + input : [vk_icd_gen, vk_api_xml], + output : _dev_icdname, + command : [ + prog_python, '@INPUT0@', + '--api-version', '1.0', '--xml', '@INPUT1@', + '--lib-path', meson.current_build_dir() / 'libvulkan_powervr_mesa.so', + '--out', '@OUTPUT@', + ], + build_by_default : true, + ) + + devenv.append('VK_ICD_FILENAMES', meson.current_build_dir() / _dev_icdname) +endif diff --git a/src/imagination/vulkan/pds/meson.build b/src/imagination/vulkan/pds/meson.build new file mode 100644 index 00000000000..20f47901317 --- /dev/null +++ b/src/imagination/vulkan/pds/meson.build @@ -0,0 +1,50 @@ +# Copyright © 2022 Imagination Technologies Ltd. + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +libpowervr_pds_files = files( + 'pvr_pds.c', + 'pvr_pds_disasm.c', + 'pvr_pds_printer.c', + 'pvr_xgl_pds.c', +) + +libpowervr_pds_includes = include_directories( + '..', + '.', + 'pvr_pds_programs', +) + +libpowervr_pds = static_library( + 'pvr_pds', + [libpowervr_pds_files], + include_directories : [ + libpowervr_pds_includes, + inc_include, + inc_src, + inc_imagination, + ], + c_args : [ + no_override_init_args, + ], + gnu_symbol_visibility : 'hidden', + pic : true, +) diff --git a/src/imagination/vulkan/pds/pvr_pds.c b/src/imagination/vulkan/pds/pvr_pds.c new file mode 100644 index 00000000000..c7e83e49817 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds.c @@ -0,0 +1,5179 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "pvr_device_info.h" +#include "pvr_pds.h" +#include "pvr_rogue_pds_defs.h" +#include "pvr_rogue_pds_disasm.h" +#include "pvr_rogue_pds_encode.h" +#include "util/log.h" +#include "util/macros.h" + +#define H32(X) (uint32_t)((((X) >> 32U) & 0xFFFFFFFFUL)) +#define L32(X) (uint32_t)(((X)&0xFFFFFFFFUL)) + +/***************************************************************************** + Macro definitions +*****************************************************************************/ + +#define PVR_PDS_DWORD_SHIFT 2 + +#define PVR_PDS_CONSTANTS_BLOCK_BASE 0 +#define PVR_PDS_CONSTANTS_BLOCK_SIZE 128 +#define PVR_PDS_TEMPS_BLOCK_BASE 128 +#define PVR_PDS_TEMPS_BLOCK_SIZE 32 + +#define PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE PVR_ROGUE_PDSINST_ST_COUNT4_MASK +#define PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE PVR_ROGUE_PDSINST_LD_COUNT8_MASK + +/* Map PDS temp registers to the CDM values they contain Work-group IDs are only + * available in the coefficient sync task. + */ +#define PVR_PDS_CDM_WORK_GROUP_ID_X 0 +#define PVR_PDS_CDM_WORK_GROUP_ID_Y 1 +#define PVR_PDS_CDM_WORK_GROUP_ID_Z 2 +/* Local IDs are available in every task. */ +#define PVR_PDS_CDM_LOCAL_ID_X 0 +#define PVR_PDS_CDM_LOCAL_ID_YZ 1 + +#define PVR_PDS_DOUTW_LOWER32 0x0 +#define PVR_PDS_DOUTW_UPPER32 0x1 +#define PVR_PDS_DOUTW_LOWER64 0x2 +#define PVR_PDS_DOUTW_LOWER128 0x3 +#define PVR_PDS_DOUTW_MAXMASK 0x4 + +#define ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE 8U +#define PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE (16U) + +/***************************************************************************** + Static variables +*****************************************************************************/ + +static const uint32_t dword_mask_const[PVR_PDS_DOUTW_MAXMASK] = { + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64 +}; + +/* If has_slc_mcu_cache_control is enabled use cache_control_const[0], else use + * cache_control_const[1]. + */ +static const uint32_t cache_control_const[2][2] = { + { PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED }, + { 0, 0 } +}; + +/***************************************************************************** + Function definitions +*****************************************************************************/ + +uint64_t pvr_pds_encode_ld_src0(uint64_t dest, + uint64_t count8, + uint64_t src_add, + bool cached, + const struct pvr_device_info *dev_info) +{ + uint64_t encoded = 0; + + if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) { + encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED + : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS); + } + + encoded |= ((src_add & PVR_ROGUE_PDSINST_LD_SRCADD_MASK) + << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT); + encoded |= ((count8 & PVR_ROGUE_PDSINST_LD_COUNT8_MASK) + << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT); + encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED + : PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS); + encoded |= ((dest & PVR_ROGUE_PDSINST_REGS64TP_MASK) + << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT); + + return encoded; +} + +uint64_t pvr_pds_encode_st_src0(uint64_t src, + uint64_t count4, + uint64_t dst_add, + bool write_through, + const struct pvr_device_info *device_info) +{ + uint64_t encoded = 0; + + if (device_info->features.has_slc_mcu_cache_controls) { + encoded |= (write_through + ? PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH + : PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK); + } + + encoded |= ((dst_add & PVR_ROGUE_PDSINST_ST_SRCADD_MASK) + << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT); + encoded |= ((count4 & PVR_ROGUE_PDSINST_ST_COUNT4_MASK) + << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT); + encoded |= (write_through ? PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH + : PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK); + encoded |= ((src & PVR_ROGUE_PDSINST_REGS32TP_MASK) + << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT); + + return encoded; +} + +static ALWAYS_INLINE uint32_t +pvr_pds_encode_doutw_src1(uint32_t dest, + uint32_t dword_mask, + uint32_t flags, + bool cached, + const struct pvr_device_info *dev_info) +{ + assert(((dword_mask > PVR_PDS_DOUTW_LOWER64) && ((dest & 3) == 0)) || + ((dword_mask == PVR_PDS_DOUTW_LOWER64) && ((dest & 1) == 0)) || + (dword_mask < PVR_PDS_DOUTW_LOWER64)); + + uint32_t encoded = + (dest << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT); + + encoded |= dword_mask_const[dword_mask]; + + encoded |= flags; + + encoded |= + cache_control_const[PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) ? 0 + : 1] + [cached ? 1 : 0]; + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_encode_doutw64(uint32_t cc, + uint32_t end, + uint32_t src1, + uint32_t src0) +{ + return pvr_pds_inst_encode_dout(cc, + end, + src1, + src0, + PVR_ROGUE_PDSINST_DSTDOUT_DOUTW); +} + +static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc, + uint32_t end, + uint32_t src0) +{ + return pvr_pds_inst_encode_dout(cc, + end, + 0, + src0, + PVR_ROGUE_PDSINST_DSTDOUT_DOUTU); +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_doutc(uint32_t cc, + uint32_t end) +{ + return pvr_pds_inst_encode_dout(cc, + end, + 0, + 0, + PVR_ROGUE_PDSINST_DSTDOUT_DOUTC); +} + +static ALWAYS_INLINE uint32_t pvr_pds_encode_doutd(uint32_t cc, + uint32_t end, + uint32_t src1, + uint32_t src0) +{ + return pvr_pds_inst_encode_dout(cc, + end, + src1, + src0, + PVR_ROGUE_PDSINST_DSTDOUT_DOUTD); +} + +static ALWAYS_INLINE uint32_t pvr_pds_encode_douti(uint32_t cc, + uint32_t end, + uint32_t src0) +{ + return pvr_pds_inst_encode_dout(cc, + end, + 0, + src0, + PVR_ROGUE_PDSINST_DSTDOUT_DOUTI); +} + +static ALWAYS_INLINE uint32_t pvr_pds_encode_doutv(uint32_t cc, + uint32_t end, + uint32_t src1, + uint32_t src0) +{ + return pvr_pds_inst_encode_dout(cc, + end, + src1, + src0, + PVR_ROGUE_PDSINST_DSTDOUT_DOUTV); +} + +static ALWAYS_INLINE uint32_t pvr_pds_encode_bra(uint32_t srcc, + uint32_t neg, + uint32_t setc, + int32_t relative_address) +{ + /* Address should be signed but API only allows unsigned value. */ + return pvr_pds_inst_encode_bra(srcc, neg, setc, (uint32_t)relative_address); +} + +/** + * Gets the next constant address and moves the next constant pointer along. + * + * \param next_constant Pointer to the next constant address. + * \param num_constants The number of constants required. + * \param count The number of constants allocated. + * \return The address of the next constant. + */ +static uint32_t pvr_pds_get_constants(uint32_t *next_constant, + uint32_t num_constants, + uint32_t *count) +{ + uint32_t constant; + + /* Work out starting constant number. For even number of constants, start on + * a 64-bit boundary. + */ + if (num_constants & 1) + constant = *next_constant; + else + constant = (*next_constant + 1) & ~1; + + /* Update the count with the number of constants actually allocated. */ + *count += constant + num_constants - *next_constant; + + /* Move the next constant pointer. */ + *next_constant = constant + num_constants; + + assert((constant + num_constants) <= PVR_PDS_CONSTANTS_BLOCK_SIZE); + + return constant; +} + +/** + * Gets the next temp address and moves the next temp pointer along. + * + * \param next_temp Pointer to the next temp address. + * \param num_temps The number of temps required. + * \param count The number of temps allocated. + * \return The address of the next temp. + */ +static uint32_t +pvr_pds_get_temps(uint32_t *next_temp, uint32_t num_temps, uint32_t *count) +{ + uint32_t temp; + + /* Work out starting temp number. For even number of temps, start on a + * 64-bit boundary. + */ + if (num_temps & 1) + temp = *next_temp; + else + temp = (*next_temp + 1) & ~1; + + /* Update the count with the number of temps actually allocated. */ + *count += temp + num_temps - *next_temp; + + /* Move the next temp pointer. */ + *next_temp = temp + num_temps; + + assert((temp + num_temps) <= + (PVR_PDS_TEMPS_BLOCK_SIZE + PVR_PDS_TEMPS_BLOCK_BASE)); + + return temp; +} + +/** + * Write a 32-bit constant indexed by the long range. + * + * \param data_block Pointer to data block to write to. + * \param index Index within the data to write to. + * \param dword The 32-bit constant to write. + */ +static void +pvr_pds_write_constant32(uint32_t *data_block, uint32_t index, uint32_t dword0) +{ + /* Check range. */ + assert(index <= (PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER - + PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER)); + + data_block[index + 0] = dword0; + + PVR_PDS_PRINT_DATA("WriteConstant32", (uint64_t)dword0, index); +} + +/** + * Write a 64-bit constant indexed by the long range. + * + * \param data_block Pointer to data block to write to. + * \param index Index within the data to write to. + * \param dword0 Lower half of the 64 bit constant. + * \param dword1 Upper half of the 64 bit constant. + */ +static void pvr_pds_write_constant64(uint32_t *data_block, + uint32_t index, + uint32_t dword0, + uint32_t dword1) +{ + /* Has to be on 64 bit boundary. */ + assert((index & 1) == 0); + + /* Check range. */ + assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER - + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)); + + data_block[index + 0] = dword0; + data_block[index + 1] = dword1; + + PVR_PDS_PRINT_DATA("WriteConstant64", + ((uint64_t)dword0 << 32) | (uint64_t)dword1, + index); +} + +/** + * Write a 64-bit constant from a single wide word indexed by the long-range + * number. + * + * \param data_block Pointer to data block to write to. + * \param index Index within the data to write to. + * \param word The 64-bit constant to write. + */ + +static void +pvr_pds_write_wide_constant(uint32_t *data_block, uint32_t index, uint64_t word) +{ + /* Has to be on 64 bit boundary. */ + assert((index & 1) == 0); + + /* Check range. */ + assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER - + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)); + + data_block[index + 0] = L32(word); + data_block[index + 1] = H32(word); + + PVR_PDS_PRINT_DATA("WriteWideConstant", word, index); +} + +static void pvr_pds_write_dma_address(uint32_t *data_block, + uint32_t index, + uint64_t address, + bool coherent, + const struct pvr_device_info *dev_info) +{ + /* Has to be on 64 bit boundary. */ + assert((index & 1) == 0); + + if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) + address |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED; + + /* Check range. */ + assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER - + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)); + + data_block[index + 0] = L32(address); + data_block[index + 1] = H32(address); + + PVR_PDS_PRINT_DATA("WriteDMAAddress", address, index); +} + +/** + * External API to append a 64-bit constant to an existing data segment + * allocation. + * + * \param constants Pointer to start of data segment. + * \param constant_value Value to write to constant. + * \param data_size The number of constants allocated. + * \returns The address of the next constant. + */ +uint32_t pvr_pds_append_constant64(uint32_t *constants, + uint64_t constant_value, + uint32_t *data_size) +{ + /* Calculate next constant from current data size. */ + uint32_t next_constant = *data_size; + uint32_t constant = pvr_pds_get_constants(&next_constant, 2, data_size); + + /* Set the value. */ + pvr_pds_write_wide_constant(constants, constant, constant_value); + + return constant; +} + +void pvr_pds_pixel_shader_sa_initialize( + struct pvr_pds_pixel_shader_sa_program *program) +{ + memset(program, 0, sizeof(*program)); +} + +/** + * Encode a DMA burst. + * + * \param dma_control DMA control words. + * \param dma_address DMA address. + * \param dest_offset Destination offset in the attribute. + * \param dma_size The size of the DMA in words. + * \param src_address Source address for the burst. + * \param dev_info PVR device info structure. + * \returns The number of DMA transfers required. + */ + +uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control, + uint64_t *dma_address, + uint32_t dest_offset, + uint32_t dma_size, + uint64_t src_address, + const struct pvr_device_info *dev_info) +{ + /* Simplified for MS2. */ + + /* Force to 1 DMA. */ + const uint32_t num_kicks = 1; + + dma_control[0] = dma_size + << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT; + dma_control[0] |= dest_offset + << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT; + + dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED | + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE; + + dma_address[0] = src_address; + if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) { + dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED; + } + + return num_kicks; +} + +/* FIXME: use the csbgen interface and pvr_csb_pack. + * FIXME: use bool for phase_rate_change. + */ +/** + * Sets up the USC control words for a DOUTU. + * + * \param usc_task_control USC task control structure to be setup. + * \param execution_address USC execution virtual address. + * \param usc_temps Number of USC temps. + * \param sample_rate Sample rate for the DOUTU. + * \param phase_rate_change Phase rate change for the DOUTU. + */ +void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control, + uint64_t execution_address, + uint32_t usc_temps, + uint32_t sample_rate, + uint32_t phase_rate_change) +{ + usc_task_control->src0 = UINT64_C(0); + + /* Set the execution address. */ + pvr_set_usc_execution_address64(&(usc_task_control->src0), + execution_address); + + if (usc_temps > 0) { + /* Temps are allocated in blocks of 4 dwords. */ + usc_temps = + DIV_ROUND_UP(usc_temps, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE); + + /* Check for losing temps due to too many requested. */ + assert((usc_temps & PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK) == + usc_temps); + + usc_task_control->src0 |= + ((uint64_t)(usc_temps & + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK)) + << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT; + } + + if (sample_rate > 0) { + usc_task_control->src0 |= + ((uint64_t)sample_rate) + << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT; + } + + if (phase_rate_change) { + usc_task_control->src0 |= + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN; + } +} + +/** + * Generates the PDS pixel event program. + * + * \param program Pointer to the PDS pixel event program. + * \param buffer Pointer to the buffer for the program. + * \param gen_mode Generate either a data segment or code segment. + * \param dev_info PVR device info structure. + * \returns Pointer to just beyond the buffer for the program. + */ +uint32_t * +pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; + uint32_t *constants = buffer; + + uint32_t data_size = 0; + + /* Copy the DMA control words and USC task control words to constants, then + * arrange them so that the 64-bit words are together followed by the 32-bit + * words. + */ + uint32_t control_constant = + pvr_pds_get_constants(&next_constant, 2, &data_size); + uint32_t emit_constant = + pvr_pds_get_constants(&next_constant, + (2 * program->num_emit_word_pairs), + &data_size); + + uint32_t control_word_constant = + pvr_pds_get_constants(&next_constant, + program->num_emit_word_pairs, + &data_size); + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + /* Src0 for DOUTU. */ + pvr_pds_write_wide_constant(buffer, + control_constant, + program->task_control.src0); /* DOUTU */ + /* 64-bit Src0. */ + + /* Emit words for end of tile program. */ + for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) { + pvr_pds_write_constant64(constants, + emit_constant + (2 * i), + program->emit_words[(2 * i) + 0], + program->emit_words[(2 * i) + 1]); + } + + /* Control words. */ + for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) { + uint32_t doutw = pvr_pds_encode_doutw_src1( + (2 * i), + PVR_PDS_DOUTW_LOWER64, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, + false, + dev_info); + + if (i == (program->num_emit_word_pairs - 1)) + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + + pvr_pds_write_constant32(constants, control_word_constant + i, doutw); + } + } + + else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + /* DOUTW the state into the shared register. */ + for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) { + *buffer++ = pvr_pds_encode_doutw64( + /* cc */ 0, + /* END */ 0, + /* SRC1 */ (control_word_constant + i), /* DOUTW 32-bit Src1 */ + /* SRC0 */ (emit_constant + (2 * i)) >> 1); /* DOUTW 64-bit Src0 + */ + } + + /* Kick the USC. */ + *buffer++ = pvr_pds_encode_doutu( + /* cc */ 0, + /* END */ 1, + /* SRC0 */ control_constant >> 1); + } + + uint32_t code_size = 1 + program->num_emit_word_pairs; + + /* Save the data segment Pointer and size. */ + program->data_segment = constants; + program->data_size = data_size; + program->code_size = code_size; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) + return (constants + next_constant); + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) + return buffer; + + return NULL; +} + +/** + * Checks if any of the vertex streams contains instance data. + * + * \param streams Streams contained in the vertex shader. + * \param num_streams Number of vertex streams. + * \returns true if one or more of the given vertex streams contains + * instance data, otherwise false. + */ +static bool pvr_pds_vertex_streams_contains_instance_data( + const struct pvr_pds_vertex_stream *streams, + uint32_t num_streams) +{ + for (uint32_t i = 0; i < num_streams; i++) { + const struct pvr_pds_vertex_stream *vertex_stream = &streams[i]; + if (vertex_stream->instance_data) + return true; + } + + return false; +} + +static uint32_t pvr_pds_get_bank_based_constants(uint32_t num_backs, + uint32_t *next_constant, + uint32_t num_constants, + uint32_t *count) +{ + /* Allocate constant for PDS vertex shader where constant is divided into + * banks. + */ + uint32_t constant; + + assert(num_constants == 1 || num_constants == 2); + + if (*next_constant >= (num_backs << 3)) + return pvr_pds_get_constants(next_constant, num_constants, count); + + if ((*next_constant % 8) == 0) { + constant = *next_constant; + + if (num_constants == 1) + *next_constant += 1; + else + *next_constant += 8; + } else if (num_constants == 1) { + constant = *next_constant; + *next_constant += 7; + } else { + *next_constant += 7; + constant = *next_constant; + + if (*next_constant >= (num_backs << 3)) { + *next_constant += 2; + *count += 2; + } else { + *next_constant += 8; + } + } + return constant; +} + +/** + * Generates a PDS program to load USC vertex inputs based from one or more + * vertex buffers, each containing potentially multiple elements, and then a + * DOUTU to execute the USC. + * + * \param program Pointer to the description of the program which should be + * generated. + * \param buffer Pointer to buffer that receives the output of this function. + * Will either be the data segment or code segment depending on + * gen_mode. + * \param gen_mode Which part to generate, either data segment or + * code segment. If PDS_GENERATE_SIZES is specified, nothing is + * written, but size information in program is updated. + * \param dev_info PVR device info structure. + * \returns Pointer to just beyond the buffer for the data - i.e the value + * of the buffer after writing its contents. + */ +uint32_t * +pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; + uint32_t next_stream_constant; + uint32_t next_temp; + uint32_t usc_control_constant64; + uint32_t stride_constant32 = 0; + uint32_t dma_address_constant64 = 0; + uint32_t dma_control_constant64; + uint32_t multiplier_constant32 = 0; + uint32_t base_instance_const32 = 0; + + uint32_t temp = 0; + uint32_t index_temp64 = 0; + uint32_t num_vertices_temp64 = 0; + uint32_t pre_index_temp = (uint32_t)(-1); + bool first_ddmadt = true; + uint32_t input_register0; + uint32_t input_register1; + uint32_t input_register2; + + struct pvr_pds_vertex_stream *vertex_stream; + struct pvr_pds_vertex_element *vertex_element; + uint32_t shift_2s_comp; + + uint32_t data_size = 0; + uint32_t code_size = 0; + uint32_t temps_used = 0; + + bool direct_writes_needed = false; + + uint32_t consts_size = 0; + uint32_t vertex_id_control_word_const32 = 0; + uint32_t instance_id_control_word_const32 = 0; + uint32_t instance_id_modifier_word_const32 = 0; + uint32_t geometry_id_control_word_const64 = 0; + uint32_t empty_dma_control_constant64 = 0; + + bool any_instanced_stream = + pvr_pds_vertex_streams_contains_instance_data(program->streams, + program->num_streams); + + uint32_t base_instance_register = 0; + uint32_t ddmadt_enables = 0; + + bool issue_empty_ddmad = false; + uint32_t last_stream_index = program->num_streams - 1; + bool current_p0 = false; + uint32_t skip_stream_flag = 0; + + /* Generate the PDS vertex shader data. */ + +#if defined(DEBUG) + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + for (uint32_t i = 0; i < program->data_size; i++) + buffer[i] = 0xDEADBEEF; + } +#endif + + /* Generate the PDS vertex shader program */ + next_temp = PVR_PDS_TEMPS_BLOCK_BASE; + /* IR0 is in first 32-bit temp, temp[0].32, vertex_Index. */ + input_register0 = pvr_pds_get_temps(&next_temp, 1, &temps_used); + /* IR1 is in second 32-bit temp, temp[1].32, instance_ID. */ + input_register1 = pvr_pds_get_temps(&next_temp, 1, &temps_used); + + if (program->iterate_remap_id) + input_register2 = pvr_pds_get_temps(&next_temp, 1, &temps_used); + else + input_register2 = 0; /* Not used, but need to silence the compiler. */ + + /* Generate the PDS vertex shader code. The constants in the data block are + * arranged as follows: + * + * 64 bit bank 0 64 bit bank 1 64 bit bank 2 64 bit bank + * 3 Not used (tmps) Stride | Multiplier Address Control + */ + + /* Find out how many constants are needed by streams. */ + for (uint32_t stream = 0; stream < program->num_streams; stream++) { + pvr_pds_get_constants(&next_constant, + 8 * program->streams[stream].num_elements, + &consts_size); + } + + /* If there are no vertex streams allocate the first bank for USC Code + * Address. + */ + if (consts_size == 0) + pvr_pds_get_constants(&next_constant, 2, &consts_size); + else + next_constant = 8; + + direct_writes_needed = program->iterate_instance_id || + program->iterate_vtx_id || program->iterate_remap_id; + + if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) { + /* Evaluate what config of DDMAD should be used for each stream. */ + for (uint32_t stream = 0; stream < program->num_streams; stream++) { + vertex_stream = &program->streams[stream]; + + if (vertex_stream->use_ddmadt) { + ddmadt_enables |= (1 << stream); + + /* The condition for index value is: + * index * stride + size <= bufferSize (all in unit of byte) + */ + if (vertex_stream->stride == 0) { + if (vertex_stream->elements[0].size <= + vertex_stream->buffer_size_in_bytes) { + /* index can be any value -> no need to use DDMADT. */ + ddmadt_enables &= (~(1 << stream)); + } else { + /* No index works -> no need to issue DDMAD instruction. + */ + skip_stream_flag |= (1 << stream); + } + } else { + /* index * stride + size <= bufferSize + * + * can be converted to: + * index <= (bufferSize - size) / stride + * + * where maximum index is: + * integer((bufferSize - size) / stride). + */ + if (vertex_stream->buffer_size_in_bytes < + vertex_stream->elements[0].size) { + /* No index works -> no need to issue DDMAD instruction. + */ + skip_stream_flag |= (1 << stream); + } else { + uint32_t max_index = (vertex_stream->buffer_size_in_bytes - + vertex_stream->elements[0].size) / + vertex_stream->stride; + if (max_index == 0xFFFFFFFFu) { + /* No need to use DDMADT as all possible indices can + * pass the test. + */ + ddmadt_enables &= (~(1 << stream)); + } else { + /* In this case, test condition can be changed to + * index < max_index + 1. + */ + program->streams[stream].num_vertices = + pvr_pds_get_bank_based_constants(program->num_streams, + &next_constant, + 1, + &consts_size); + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + pvr_pds_write_constant32( + buffer, + program->streams[stream].num_vertices, + max_index + 1); + } + } + } + } + } + + if ((skip_stream_flag & (1 << stream)) == 0) { + issue_empty_ddmad = (ddmadt_enables & (1 << stream)) != 0; + last_stream_index = stream; + } + } + } else { + if (program->num_streams > 0 && + program->streams[program->num_streams - 1].use_ddmadt) { + issue_empty_ddmad = true; + } + } + + if (direct_writes_needed) + issue_empty_ddmad = false; + + if (issue_empty_ddmad) { + /* An empty DMA control const (DMA size = 0) is required in case the + * last DDMADD is predicated out and last flag does not have any usage. + */ + empty_dma_control_constant64 = + pvr_pds_get_bank_based_constants(program->num_streams, + &next_constant, + 2, + &consts_size); + } + + /* Assign constants for non stream or base instance if there is any + * instanced stream. + */ + if (direct_writes_needed || any_instanced_stream || + program->instance_ID_modifier) { + if (program->iterate_vtx_id) { + vertex_id_control_word_const32 = + pvr_pds_get_bank_based_constants(program->num_streams, + &next_constant, + 1, + &consts_size); + } + + if (program->iterate_instance_id || program->instance_ID_modifier) { + if (program->instance_ID_modifier == 0) { + instance_id_control_word_const32 = + pvr_pds_get_bank_based_constants(program->num_streams, + &next_constant, + 1, + &consts_size); + } else { + instance_id_modifier_word_const32 = + pvr_pds_get_bank_based_constants(program->num_streams, + &next_constant, + 1, + &consts_size); + if ((instance_id_modifier_word_const32 % 2) == 0) { + instance_id_control_word_const32 = + pvr_pds_get_bank_based_constants(program->num_streams, + &next_constant, + 1, + &consts_size); + } else { + instance_id_control_word_const32 = + instance_id_modifier_word_const32; + instance_id_modifier_word_const32 = + pvr_pds_get_bank_based_constants(program->num_streams, + &next_constant, + 1, + &consts_size); + } + } + } + + if (program->base_instance != 0) { + base_instance_const32 = + pvr_pds_get_bank_based_constants(program->num_streams, + &next_constant, + 1, + &consts_size); + } + + if (program->iterate_remap_id) { + geometry_id_control_word_const64 = + pvr_pds_get_bank_based_constants(program->num_streams, + &next_constant, + 2, + &consts_size); + } + } + + if (program->instance_ID_modifier != 0) { + /* This instanceID modifier is used when a draw array instanced call + * sourcing from client data cannot fit into vertex buffer and needs to + * be broken down into several draw calls. + */ + + code_size += 1; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + pvr_pds_write_constant32(buffer, + instance_id_modifier_word_const32, + program->instance_ID_modifier); + } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + *buffer++ = pvr_pds_inst_encode_add32( + /* cc */ 0x0, + /* ALUM */ 0, /* Unsigned */ + /* SNA */ 0, /* Add */ + /* SRC0 32b */ instance_id_modifier_word_const32, + /* SRC1 32b */ input_register1, + /* DST 32b */ input_register1); + } + } + + /* Adjust instanceID if necessary. */ + if (any_instanced_stream || program->iterate_instance_id) { + if (program->base_instance != 0) { + assert(!program->draw_indirect); + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + pvr_pds_write_constant32(buffer, + base_instance_const32, + program->base_instance); + } + + base_instance_register = base_instance_const32; + } + + if (program->draw_indirect) { + assert((program->instance_ID_modifier == 0) && + (program->base_instance == 0)); + + base_instance_register = PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER + 1; + } + } + + next_constant = next_stream_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; + usc_control_constant64 = + pvr_pds_get_constants(&next_stream_constant, 2, &data_size); + + for (uint32_t stream = 0; stream < program->num_streams; stream++) { + bool instance_data_with_base_instance; + + if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) && + ((skip_stream_flag & (1 << stream)) != 0)) { + continue; + } + + vertex_stream = &program->streams[stream]; + + instance_data_with_base_instance = + ((vertex_stream->instance_data) && + ((program->base_instance > 0) || (program->draw_indirect))); + + /* Get all 8 32-bit constants at once, only 6 for first stream due to + * USC constants. + */ + if (stream == 0) { + stride_constant32 = + pvr_pds_get_constants(&next_stream_constant, 6, &data_size); + } else { + next_constant = + pvr_pds_get_constants(&next_stream_constant, 8, &data_size); + + /* Skip bank 0. */ + stride_constant32 = next_constant + 2; + } + + multiplier_constant32 = stride_constant32 + 1; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + pvr_pds_write_constant32(buffer, + stride_constant32, + vertex_stream->stride); + + /* Vertex stream frequency multiplier. */ + if (vertex_stream->multiplier) + pvr_pds_write_constant32(buffer, + multiplier_constant32, + vertex_stream->multiplier); + } + + /* Update the code size count and temps count for the above code + * segment. + */ + if (vertex_stream->current_state) { + code_size += 1; + temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); /* 32-bit */ + } else { + unsigned int num_temps_required = 0; + + if (vertex_stream->multiplier) { + num_temps_required += 2; + code_size += 3; + + if (vertex_stream->shift) { + code_size += 1; + + if ((int32_t)vertex_stream->shift > 0) + code_size += 1; + } + } else if (vertex_stream->shift) { + code_size += 1; + num_temps_required += 1; + } else if (instance_data_with_base_instance) { + num_temps_required += 1; + } + + if (num_temps_required != 0) { + temp = pvr_pds_get_temps(&next_temp, + num_temps_required, + &temps_used); /* 64-bit */ + } else { + temp = vertex_stream->instance_data ? input_register1 + : input_register0; + } + + if (instance_data_with_base_instance) + code_size += 1; + } + + /* The real code segment. */ + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + /* If it's current state stream, then index = 0 always. */ + if (vertex_stream->current_state) { + /* Put zero in temp. */ + *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0); + } else if (vertex_stream->multiplier) { + /* old: Iout = (Iin * (Multiplier+2^24)) >> (Shift+24) + * new: Iout = (Iin * Multiplier) >> (shift+31) + */ + + /* Put zero in temp. Need zero for add part of the following + * MAD. MAD source is 64 bit, so need two LIMMs. + */ + *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0); + /* Put zero in temp. Need zero for add part of the following + * MAD. + */ + *buffer++ = pvr_pds_inst_encode_limm(0, temp + 1, 0, 0); + + /* old: (Iin * (Multiplier+2^24)) + * new: (Iin * Multiplier) + */ + *buffer++ = pvr_rogue_inst_encode_mad( + 0, /* Sign of add is positive. */ + 0, /* Unsigned ALU mode */ + 0, /* Unconditional */ + multiplier_constant32, + vertex_stream->instance_data ? input_register1 : input_register0, + temp / 2, + temp / 2); + + if (vertex_stream->shift) { + int32_t shift = (int32_t)vertex_stream->shift; + + /* new: >> (shift + 31) */ + shift += 31; + shift *= -1; + + if (shift < -31) { + /* >> (31) */ + shift_2s_comp = 0xFFFE1; + *buffer++ = pvr_pds_inst_encode_stflp64( + /* cc */ 0, + /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE, + /* IM */ 1, /* enable immediate */ + /* SRC0 */ temp / 2, + /* SRC1 */ input_register0, /* This won't be used in + * a shift operation. + */ + /* SRC2 (Shift) */ shift_2s_comp, + /* DST */ temp / 2); + shift += 31; + } + + /* old: >> (Shift+24) + * new: >> (shift + 31) + */ + shift_2s_comp = *((uint32_t *)&shift); + *buffer++ = pvr_pds_inst_encode_stflp64( + /* cc */ 0, + /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE, + /* IM */ 1, /*enable immediate */ + /* SRC0 */ temp / 2, + /* SRC1 */ input_register0, /* This won't be used in + * a shift operation. + */ + /* SRC2 (Shift) */ shift_2s_comp, + /* DST */ temp / 2); + } + + if (instance_data_with_base_instance) { + *buffer++ = + pvr_pds_inst_encode_add32(0, /* cc */ + 0, /* ALNUM */ + 0, /* SNA */ + base_instance_register, /* src0 + */ + temp, /* src1 */ + temp /* dst */ + ); + } + } else { /* NOT vertex_stream->multiplier */ + if (vertex_stream->shift) { + /* Shift Index/InstanceNum Right by shift bits. Put result + * in a Temp. + */ + + /* 2's complement of shift as this will be a right shift. */ + shift_2s_comp = ~(vertex_stream->shift) + 1; + + *buffer++ = pvr_pds_inst_encode_stflp32( + /* IM */ 1, /* enable immediate. */ + /* cc */ 0, + /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE, + /* SRC0 */ vertex_stream->instance_data ? input_register1 + : input_register0, + /* SRC1 */ input_register0, /* This won't be used in + * a shift operation. + */ + /* SRC2 (Shift) */ shift_2s_comp, + /* DST */ temp); + + if (instance_data_with_base_instance) { + *buffer++ = + pvr_pds_inst_encode_add32(0, /* cc */ + 0, /* ALNUM */ + 0, /* SNA */ + base_instance_register, /* src0 + */ + temp, /* src1 */ + temp /* dst */ + ); + } + } else { + if (instance_data_with_base_instance) { + *buffer++ = + pvr_pds_inst_encode_add32(0, /* cc */ + 0, /* ALNUM */ + 0, /* SNA */ + base_instance_register, /* src0 + */ + input_register1, /* src1 */ + temp /* dst */ + ); + } else { + /* If the shift instruction doesn't happen, use the IR + * directly into the following MAD. + */ + temp = vertex_stream->instance_data ? input_register1 + : input_register0; + } + } + } + } + + if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) { + if (vertex_stream->use_ddmadt) + ddmadt_enables |= (1 << stream); + } else { + if ((ddmadt_enables & (1 << stream)) != 0) { + /* Emulate what DDMADT does for range checking. */ + if (first_ddmadt) { + /* Get an 64 bits temp such that cmp current index with + * allowed vertex number can work. + */ + index_temp64 = + pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit + */ + num_vertices_temp64 = + pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit + */ + + index_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER; + num_vertices_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER; + + code_size += 3; + current_p0 = true; + } + + code_size += (temp == pre_index_temp ? 1 : 2); + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + if (first_ddmadt) { + /* Set predicate to be P0. */ + *buffer++ = pvr_pds_encode_bra( + PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC + */ + 0, /* Neg */ + PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETCC + */ + 1); /* Addr */ + + *buffer++ = + pvr_pds_inst_encode_limm(0, index_temp64 + 1, 0, 0); + *buffer++ = + pvr_pds_inst_encode_limm(0, num_vertices_temp64 + 1, 0, 0); + } + + if (temp != pre_index_temp) { + *buffer++ = pvr_pds_inst_encode_stflp32( + /* IM */ 1, /* enable immediate. */ + /* cc */ 0, + /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE, + /* SRC0 */ temp - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER, + /* SRC1 */ 0, + /* SRC2 (Shift) */ 0, + /* DST */ index_temp64); + } + + *buffer++ = pvr_pds_inst_encode_stflp32( + /* IM */ 1, /* enable immediate. */ + /* cc */ 0, + /* LOP */ PVR_ROGUE_PDSINST_LOP_OR, + /* SRC0 */ num_vertices_temp64 + 1, + /* SRC1 */ vertex_stream->num_vertices, + /* SRC2 (Shift) */ 0, + /* DST */ num_vertices_temp64); + } + + first_ddmadt = false; + + pre_index_temp = temp; + } + } + + /* Process the elements in the stream. */ + for (uint32_t element = 0; element < vertex_stream->num_elements; + element++) { + bool terminate = false; + + vertex_element = &vertex_stream->elements[element]; + /* Check if last DDMAD needs terminate or not. */ + if ((element == (vertex_stream->num_elements - 1)) && + (stream == last_stream_index)) { + terminate = !issue_empty_ddmad && !direct_writes_needed; + } + + /* Get a new set of constants for this element. */ + if (element) { + /* Get all 8 32 bit constants at once. */ + next_constant = + pvr_pds_get_constants(&next_stream_constant, 8, &data_size); + } + + dma_address_constant64 = next_constant + 4; + dma_control_constant64 = dma_address_constant64 + 2; + + if (vertex_element->component_size == 0) { + /* Standard DMA. + * + * Write the DMA transfer control words into the PDS data + * section. + * + * DMA Address is 40-bit. + */ + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint32_t dma_control_word; + uint64_t dma_control_word64 = 0; + uint32_t dma_size; + + /* Write the address to the constant. */ + pvr_pds_write_dma_address(buffer, + dma_address_constant64, + vertex_stream->address + + (uint64_t)vertex_element->offset, + false, + dev_info); + { + if (program->stream_patch_offsets) { + program + ->stream_patch_offsets[program->num_stream_patches++] = + (stream << 16) | (dma_address_constant64 >> 1); + } + } + + /* Size is in bytes - round up to nearest 32 bit word. */ + dma_size = + (vertex_element->size + (1 << PVR_PDS_DWORD_SHIFT) - 1) >> + PVR_PDS_DWORD_SHIFT; + + assert(dma_size <= PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER); + + /* Set up the dma transfer control word. */ + dma_control_word = + dma_size << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT; + + dma_control_word |= + vertex_element->reg + << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT; + + dma_control_word |= + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE | + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED; + + if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) { + if ((ddmadt_enables & (1 << stream)) != 0) { + assert( + ((((uint64_t)vertex_stream->buffer_size_in_bytes + << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) & + ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK) >> + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) == + (uint64_t)vertex_stream->buffer_size_in_bytes); + dma_control_word64 = + (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN | + (((uint64_t)vertex_stream->buffer_size_in_bytes + << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) & + ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK)); + } + } + /* If this is the last dma then also set the last flag. */ + if (terminate) { + dma_control_word |= + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN; + } + + /* Write the 32-Bit SRC3 word to a 64-bit constant as per + * spec. + */ + pvr_pds_write_wide_constant(buffer, + dma_control_constant64, + dma_control_word64 | + (uint64_t)dma_control_word); + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) { + if ((ddmadt_enables & (1 << stream)) != 0) { + *buffer++ = pvr_pds_inst_encode_cmp( + 0, /* cc enable */ + PVR_ROGUE_PDSINST_COP_LT, /* Operation */ + index_temp64 >> 1, /* SRC0 (REGS64TP) */ + (num_vertices_temp64 >> 1) + + PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER); /* SRC1 + (REGS64) + */ + } + } + /* Multiply by the vertex stream stride and add the base + * followed by a DOUTD. + * + * dmad32 (C0 * T0) + C1, C2 + * src0 = stride src1 = index src2 = baseaddr src3 = + * doutd part + */ + + uint32_t cc; + if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) + cc = 0; + else + cc = (ddmadt_enables & (1 << stream)) != 0 ? 1 : 0; + + *buffer++ = pvr_pds_inst_encode_ddmad( + /* cc */ cc, + /* END */ 0, + /* SRC0 */ stride_constant32, /* Stride 32-bit*/ + /* SRC1 */ temp, /* Index 32-bit*/ + /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream + * Address + * + + * Offset + */ + /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA + * Transfer + * Control + * Word. + */ + ); + } + + if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) && + ((ddmadt_enables & (1 << stream)) != 0)) { + code_size += 1; + } + code_size += 1; + } else { + /* Repeat DMA. + * + * Write the DMA transfer control words into the PDS data + * section. + * + * DMA address is 40-bit. + */ + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint32_t dma_control_word; + + /* Write the address to the constant. */ + pvr_pds_write_dma_address(buffer, + dma_address_constant64, + vertex_stream->address + + (uint64_t)vertex_element->offset, + false, + dev_info); + + /* Set up the DMA transfer control word. */ + dma_control_word = + vertex_element->size + << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT; + + dma_control_word |= + vertex_element->reg + << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT; + + switch (vertex_element->component_size) { + case 4: { + dma_control_word |= + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR; + break; + } + case 3: { + dma_control_word |= + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE; + break; + } + case 2: { + dma_control_word |= + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO; + break; + } + default: { + dma_control_word |= + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE; + break; + } + } + + dma_control_word |= + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT; + + dma_control_word |= + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE | + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED; + + /* If this is the last dma then also set the last flag. */ + if (terminate) { + dma_control_word |= + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN; + } + + /* Write the 32-Bit SRC3 word to a 64-bit constant as per + * spec. + */ + pvr_pds_write_wide_constant(buffer, + dma_control_constant64, + (uint64_t)dma_control_word); + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + /* Multiply by the vertex stream stride and add the base + * followed by a DOUTD. + * + * dmad32 (C0 * T0) + C1, C2 + * src0 = stride src1 = index src2 = baseaddr src3 = + * doutd part + */ + *buffer++ = pvr_pds_inst_encode_ddmad( + /* cc */ 0, + /* END */ 0, + /* SRC0 */ stride_constant32, /* Stride 32-bit*/ + /* SRC1 */ temp, /* Index 32-bit*/ + /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream + * Address + * + + * Offset. + */ + /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA + * Transfer + * Control + * Word. + */ + ); + } + + code_size += 1; + } /* End of repeat DMA. */ + } /* Element loop */ + } /* Stream loop */ + + if (issue_empty_ddmad) { + /* Issue an empty last DDMAD, always executed. */ + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + pvr_pds_write_wide_constant( + buffer, + empty_dma_control_constant64, + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN); + } + + code_size += 1; + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + *buffer++ = pvr_pds_inst_encode_ddmad( + /* cc */ 0, + /* END */ 0, + /* SRC0 */ stride_constant32, /* Stride 32-bit*/ + /* SRC1 */ temp, /* Index 32-bit*/ + /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream + *Address + + *Offset. + */ + /* SRC3 64-bit */ empty_dma_control_constant64 >> 1 /* DMA + * Transfer + * Control + * Word. + */ + ); + } + } + + if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) { + if (current_p0) { + code_size += 1; + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + /* Revert predicate back to IF0 which is required by DOUTU. */ + *buffer++ = + pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC + */ + 0, /* Neg */ + PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC + */ + 1); /* Addr */ + } + } + } + /* Send VertexID if requested. */ + if (program->iterate_vtx_id) { + if (program->draw_indirect) { + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + *buffer++ = pvr_pds_inst_encode_add32( + /* cc */ 0x0, + /* ALUM */ 0, /* Unsigned */ + /* SNA */ 1, /* Minus */ + /* SRC0 32b */ input_register0, /* vertexID */ + /* SRC1 32b */ PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER, /* base + * vertexID. + */ + /* DST 32b */ input_register0); + } + + code_size += 1; + } + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint32_t doutw = pvr_pds_encode_doutw_src1( + program->vtx_id_register, + PVR_PDS_DOUTW_LOWER32, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + false, + dev_info); + + if (!program->iterate_instance_id && !program->iterate_remap_id) + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + + pvr_pds_write_constant32(buffer, + vertex_id_control_word_const32, + doutw); + } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + *buffer++ = pvr_pds_encode_doutw64( + /* cc */ 0, + /* END */ 0, + /* SRC1 */ vertex_id_control_word_const32, /* DOUTW 32-bit Src1 + */ + /* SRC0 */ input_register0 >> 1); /* DOUTW 64-bit Src0 */ + } + + code_size += 1; + } + + /* Send InstanceID if requested. */ + if (program->iterate_instance_id) { + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint32_t doutw = pvr_pds_encode_doutw_src1( + program->instance_id_register, + PVR_PDS_DOUTW_UPPER32, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + true, + dev_info); + + if (!program->iterate_remap_id) + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + + pvr_pds_write_constant32(buffer, + instance_id_control_word_const32, + doutw); + } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + *buffer++ = pvr_pds_encode_doutw64( + /* cc */ 0, + /* END */ 0, + /* SRC1 */ instance_id_control_word_const32, /* DOUTW 32-bit Src1 */ + /* SRC0 */ input_register1 >> 1); /* DOUTW 64-bit Src0 */ + } + + code_size += 1; + } + + /* Send remapped index number to vi0. */ + if (program->iterate_remap_id) { + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint32_t doutw = pvr_pds_encode_doutw_src1( + 0 /* vi0 */, + PVR_PDS_DOUTW_LOWER32, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE | + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN, + false, + dev_info); + + pvr_pds_write_constant64(buffer, + geometry_id_control_word_const64, + doutw, + 0); + } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + *buffer++ = pvr_pds_encode_doutw64( + /* cc */ 0, + /* END */ 0, + /* SRC1 */ geometry_id_control_word_const64, /* DOUTW 32-bit + * Src1 + */ + /* SRC0 */ input_register2 >> 1); /* DOUTW 64-bit Src0 */ + } + + code_size += 1; + } + + /* Copy the USC task control words to constants. */ + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + pvr_pds_write_wide_constant(buffer, + usc_control_constant64, + program->usc_task_control.src0); /* 64-bit + * Src0 + */ + if (program->stream_patch_offsets) { + /* USC TaskControl is always the first patch. */ + program->stream_patch_offsets[0] = usc_control_constant64 >> 1; + } + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + /* Conditionally (if last in task) issue the task to the USC + * (if0) DOUTU src1=USC Code Base address, src2=DOUTU word 2. + */ + + *buffer++ = pvr_pds_encode_doutu( + /* cc */ 1, + /* END */ 1, + /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0 */ + + /* End the program if the Dout did not already end it. */ + *buffer++ = pvr_pds_inst_encode_halt(0); + } + + code_size += 2; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + /* Set the data segment pointer and ensure we return 1 past the buffer + * ptr. + */ + program->data_segment = buffer; + + buffer += consts_size; + } + + program->temps_used = temps_used; + program->data_size = consts_size; + program->code_size = code_size; + program->ddmadt_enables = ddmadt_enables; + if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) + program->skip_stream_flag = skip_stream_flag; + + return buffer; +} + +/** + * Generates a PDS program to load USC compute shader global/local/workgroup + * sizes/ids and then a DOUTU to execute the USC. + * + * \param program Pointer to description of the program that should be + * generated. + * \param buffer Pointer to buffer that receives the output of this function. + * This will be either the data segment, or the code depending on + * gen_mode. + * \param gen_mode Which part to generate, either data segment or code segment. + * If PDS_GENERATE_SIZES is specified, nothing is written, but + * size information in program is updated. + * \param dev_info PVR device info struct. + * \returns Pointer to just beyond the buffer for the data - i.e. the value of + * the buffer after writing its contents. + */ +uint32_t * +pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + uint32_t usc_control_constant64; + uint32_t usc_control_constant64_coeff_update = 0; + uint32_t zero_constant64 = 0; + + uint32_t data_size = 0; + uint32_t code_size = 0; + uint32_t temps_used = 0; + uint32_t doutw = 0; + + uint32_t barrier_ctrl_word = 0; + uint32_t barrier_ctrl_word2 = 0; + + /* Even though there are 3 IDs for local and global we only need max one + * DOUTW for local, and two for global. + */ + uint32_t work_group_id_ctrl_words[2] = { 0 }; + uint32_t local_id_ctrl_word = 0; + uint32_t local_input_register; + + /* For the constant value to load into ptemp (SW fence). */ + uint64_t predicate_ld_src0_constant = 0; + uint32_t cond_render_negate_constant = 0; + + uint32_t cond_render_pred_temp; + uint32_t cond_render_negate_temp; + + /* 2x 64 bit registers that will mask out the Predicate load. */ + uint32_t cond_render_pred_mask_constant = 0; + +#if defined(DEBUG) + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + for (uint32_t j = 0; j < program->data_size; j++) + buffer[j] = 0xDEADBEEF; + } +#endif + + /* All the compute input registers are in temps. */ + temps_used += PVR_PDS_NUM_COMPUTE_INPUT_REGS; + + uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE + temps_used; + + uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; + + if (program->kick_usc) { + /* Copy the USC task control words to constants. */ + usc_control_constant64 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + } + + if (program->has_coefficient_update_task) { + usc_control_constant64_coeff_update = + pvr_pds_get_constants(&next_constant, 2, &data_size); + } + + if (program->conditional_render) { + predicate_ld_src0_constant = + pvr_pds_get_constants(&next_constant, 2, &data_size); + cond_render_negate_constant = + pvr_pds_get_constants(&next_constant, 2, &data_size); + cond_render_pred_mask_constant = + pvr_pds_get_constants(&next_constant, 4, &data_size); + + /* LD will load a 64 bit value. */ + cond_render_pred_temp = pvr_pds_get_temps(&next_temp, 4, &temps_used); + cond_render_negate_temp = pvr_pds_get_temps(&next_temp, 2, &temps_used); + + program->cond_render_const_offset_in_dwords = predicate_ld_src0_constant; + program->cond_render_pred_temp = cond_render_pred_temp; + } + + if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || + (program->clear_pds_barrier) || + (program->kick_usc && program->conditional_render)) { + zero_constant64 = pvr_pds_get_constants(&next_constant, 2, &data_size); + } + + if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + barrier_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size); + if (PVR_HAS_QUIRK(dev_info, 51210)) { + barrier_ctrl_word2 = + pvr_pds_get_constants(&next_constant, 1, &data_size); + } + } + + if (program->work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED || + program->work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + work_group_id_ctrl_words[0] = + pvr_pds_get_constants(&next_constant, 1, &data_size); + } + + if (program->work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + work_group_id_ctrl_words[1] = + pvr_pds_get_constants(&next_constant, 1, &data_size); + } + + if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || + (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || + (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) { + local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size); + } + + if (program->add_base_workgroup) { + for (uint32_t workgroup_component = 0; workgroup_component < 3; + workgroup_component++) { + if (program->work_group_input_regs[workgroup_component] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + program + ->base_workgroup_constant_offset_in_dwords[workgroup_component] = + pvr_pds_get_constants(&next_constant, 1, &data_size); + } + } + } + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + if (program->kick_usc) { + /* Src0 for DOUTU */ + pvr_pds_write_wide_constant(buffer, + usc_control_constant64, + program->usc_task_control.src0); /* 64-bit + * Src0. + */ + } + + if (program->has_coefficient_update_task) { + /* Src0 for DOUTU. */ + pvr_pds_write_wide_constant( + buffer, + usc_control_constant64_coeff_update, + program->usc_task_control_coeff_update.src0); /* 64-bit Src0 */ + } + + if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || + (program->clear_pds_barrier) || + (program->kick_usc && program->conditional_render)) { + pvr_pds_write_wide_constant(buffer, zero_constant64, 0); /* 64-bit + * Src0 + */ + } + + if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + if (PVR_HAS_QUIRK(dev_info, 51210)) { + /* Write the constant for the coefficient register write. */ + doutw = pvr_pds_encode_doutw_src1( + program->barrier_coefficient + 4, + PVR_PDS_DOUTW_LOWER64, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, + true, + dev_info); + pvr_pds_write_constant32(buffer, barrier_ctrl_word2, doutw); + } + /* Write the constant for the coefficient register write. */ + doutw = pvr_pds_encode_doutw_src1( + program->barrier_coefficient, + PVR_PDS_DOUTW_LOWER64, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, + true, + dev_info); + + /* Check whether the barrier is going to be the last DOUTW done by + * the coefficient sync task. + */ + if ((program->work_group_input_regs[0] == + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) && + (program->work_group_input_regs[1] == + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) && + (program->work_group_input_regs[2] == + PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) { + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + } + + pvr_pds_write_constant32(buffer, barrier_ctrl_word, doutw); + } + + /* If we want work-group id X, see if we also want work-group id Y. */ + if (program->work_group_input_regs[0] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED && + program->work_group_input_regs[1] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + /* Make sure we are going to DOUTW them into adjacent registers + * otherwise we can't do it in one. + */ + assert(program->work_group_input_regs[1] == + (program->work_group_input_regs[0] + 1)); + + doutw = pvr_pds_encode_doutw_src1( + program->work_group_input_regs[0], + PVR_PDS_DOUTW_LOWER64, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, + true, + dev_info); + + /* If we don't want the Z work-group id then this is the last one. + */ + if (program->work_group_input_regs[2] == + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + } + + pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[0], doutw); + } + /* If we only want one of X or Y then handle them separately. */ + else { + if (program->work_group_input_regs[0] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + doutw = pvr_pds_encode_doutw_src1( + program->work_group_input_regs[0], + PVR_PDS_DOUTW_LOWER32, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, + true, + dev_info); + + /* If we don't want the Z work-group id then this is the last + * one. + */ + if (program->work_group_input_regs[2] == + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + } + + pvr_pds_write_constant32(buffer, + work_group_id_ctrl_words[0], + doutw); + } else if (program->work_group_input_regs[1] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + doutw = pvr_pds_encode_doutw_src1( + program->work_group_input_regs[1], + PVR_PDS_DOUTW_UPPER32, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, + true, + dev_info); + + /* If we don't want the Z work-group id then this is the last + * one. + */ + if (program->work_group_input_regs[2] == + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + } + + pvr_pds_write_constant32(buffer, + work_group_id_ctrl_words[0], + doutw); + } + } + + /* Handle work-group id Z. */ + if (program->work_group_input_regs[2] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + doutw = pvr_pds_encode_doutw_src1( + program->work_group_input_regs[2], + PVR_PDS_DOUTW_UPPER32, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE | + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN, + true, + dev_info); + + pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw); + } + + /* Handle the local IDs. */ + if ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || + (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) { + uint32_t dest_reg; + + /* If we want local id Y and Z make sure the compiler wants them in + * the same register. + */ + if (!program->flattened_work_groups) { + if ((program->local_input_regs[1] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) && + (program->local_input_regs[2] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) { + assert(program->local_input_regs[1] == + program->local_input_regs[2]); + } + } + + if (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) + dest_reg = program->local_input_regs[1]; + else + dest_reg = program->local_input_regs[2]; + + /* If we want local id X and (Y or Z) then we can do that in a + * single 64-bit DOUTW. + */ + if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + assert(dest_reg == (program->local_input_regs[0] + 1)); + + doutw = pvr_pds_encode_doutw_src1( + program->local_input_regs[0], + PVR_PDS_DOUTW_LOWER64, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + true, + dev_info); + + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + + pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw); + } + /* Otherwise just DMA in Y and Z together in a single 32-bit DOUTW. + */ + else { + doutw = pvr_pds_encode_doutw_src1( + dest_reg, + PVR_PDS_DOUTW_UPPER32, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + true, + dev_info); + + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + + pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw); + } + } + /* If we don't want Y or Z then just DMA in X in a single 32-bit DOUTW. + */ + else if (program->local_input_regs[0] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + doutw = pvr_pds_encode_doutw_src1( + program->local_input_regs[0], + PVR_PDS_DOUTW_LOWER32, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE | + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN, + true, + dev_info); + + pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw); + } + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT || + gen_mode == PDS_GENERATE_SIZES) { + const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT); +#define APPEND(X) \ + if (encode) { \ + *buffer = X; \ + buffer++; \ + } else { \ + code_size += sizeof(uint32_t); \ + } + + /* Assert that coeff_update_task_branch_size is > 0 because if it is 0 + * then we will be doing an infinite loop. + */ + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) + assert(program->coeff_update_task_branch_size > 0); + + /* Test whether this is the coefficient update task or not. */ + APPEND( + pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SRCC */ + PVR_ROGUE_PDSINST_NEG_ENABLE, /* NEG */ + PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC */ + program->coeff_update_task_branch_size /* ADDR */)); + + /* Do we need to initialize the barrier coefficient? */ + if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + if (PVR_HAS_QUIRK(dev_info, 51210)) { + /* Initialize the second barrier coefficient registers to zero. + */ + APPEND(pvr_pds_encode_doutw64(0, /* cc */ + 0, /* END */ + barrier_ctrl_word2, /* SRC1 */ + zero_constant64 >> 1)); /* SRC0 */ + } + /* Initialize the coefficient register to zero. */ + APPEND(pvr_pds_encode_doutw64(0, /* cc */ + 0, /* END */ + barrier_ctrl_word, /* SRC1 */ + zero_constant64 >> 1)); /* SRC0 */ + } + + if (program->add_base_workgroup) { + const uint32_t temp_values[3] = { 0, 1, 3 }; + for (uint32_t workgroup_component = 0; workgroup_component < 3; + workgroup_component++) { + if (program->work_group_input_regs[workgroup_component] == + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) + continue; + + APPEND(pvr_pds_inst_encode_add32( + /* cc */ 0x0, + /* ALUM */ 0, + /* SNA */ 0, + /* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER + + program->base_workgroup_constant_offset_in_dwords + [workgroup_component], + /* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER + + PVR_PDS_CDM_WORK_GROUP_ID_X + + temp_values[workgroup_component], + /* DST (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER + + PVR_PDS_CDM_WORK_GROUP_ID_X + + temp_values[workgroup_component])); + } + } + + /* If we are going to put the work-group IDs in coefficients then we + * just need to do the DOUTWs. + */ + if ((program->work_group_input_regs[0] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || + (program->work_group_input_regs[1] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) { + uint32_t dest_reg; + + if (program->work_group_input_regs[0] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X; + } else { + dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y; + } + + APPEND(pvr_pds_encode_doutw64(0, /* cc */ + 0, /* END */ + work_group_id_ctrl_words[0], /* SRC1 + */ + dest_reg >> 1)); /* SRC0 */ + } + + if (program->work_group_input_regs[2] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + APPEND(pvr_pds_encode_doutw64( + 0, /* cc */ + 0, /* END */ + work_group_id_ctrl_words[1], /* SRC1 */ + (PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Z) >> + 1)); /* SRC0 */ + } + + /* Issue the task to the USC. */ + if (program->kick_usc && program->has_coefficient_update_task) { + APPEND(pvr_pds_encode_doutu(0, /* cc */ + 1, /* END */ + usc_control_constant64_coeff_update >> + 1)); /* SRC0; DOUTU 64-bit Src0 */ + } + + /* Encode a HALT */ + APPEND(pvr_pds_inst_encode_halt(0)); + + /* Set the branch size used to skip the coefficient sync task. */ + program->coeff_update_task_branch_size = code_size / sizeof(uint32_t); + + /* DOUTW in the local IDs. */ + + /* If we want X and Y or Z, we only need one DOUTW. */ + if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) && + ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || + (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED))) { + local_input_register = + PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X; + } else { + /* If we just want X. */ + if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + local_input_register = + PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X; + } + /* If we just want Y or Z. */ + else if (program->local_input_regs[1] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED || + program->local_input_regs[2] != + PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { + local_input_register = + PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_YZ; + } + } + + if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || + (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || + (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) { + APPEND(pvr_pds_encode_doutw64(0, /* cc */ + 0, /* END */ + local_id_ctrl_word, /* SRC1 */ + local_input_register >> 1)); /* SRC0 + */ + } + + if (program->clear_pds_barrier) { + /* Zero the persistent temp (SW fence for context switch). */ + APPEND(pvr_pds_inst_encode_add64( + 0, /* cc */ + PVR_ROGUE_PDSINST_ALUM_UNSIGNED, + PVR_ROGUE_PDSINST_MAD_SNA_ADD, + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + + (zero_constant64 >> 1), /* src0 = 0 */ + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + + (zero_constant64 >> 1), /* src1 = 0 */ + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0)); /* dest = + * ptemp64[0] + */ + } + + /* If this is a fence, issue the DOUTC. */ + if (program->fence) { + APPEND(pvr_pds_inst_encode_doutc(0, /* cc */ + 0 /* END */)); + } + + if (program->kick_usc) { + if (program->conditional_render) { + /* Skip if coefficient update task. */ + APPEND(pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, + 0, + PVR_ROGUE_PDSINST_PREDICATE_KEEP, + 16)); + + /* Load the predicate. */ + APPEND(pvr_pds_inst_encode_ld(0, predicate_ld_src0_constant >> 1)); + + /* Load negate constant into temp for CMP. */ + APPEND(pvr_pds_inst_encode_add64( + 0, /* cc */ + PVR_ROGUE_PDSINST_ALUM_UNSIGNED, + PVR_ROGUE_PDSINST_MAD_SNA_ADD, + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + + (cond_render_negate_constant >> 1), /* src0 = 0 */ + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + + (zero_constant64 >> 1), /* src1 = 0 */ + PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER + + (cond_render_negate_temp >> 1))); /* dest = ptemp64[0] + */ + + APPEND(pvr_pds_inst_encode_wdf(0)); + + for (uint32_t i = 0; i < 4; i++) { + APPEND(pvr_pds_inst_encode_stflp32( + 1, /* enable immediate */ + 0, /* cc */ + PVR_ROGUE_PDSINST_LOP_AND, /* LOP */ + cond_render_pred_temp + i, /* SRC0 */ + cond_render_pred_mask_constant + i, /* SRC1 */ + 0, /* SRC2 (Shift) */ + cond_render_pred_temp + i)); /* DST */ + + APPEND( + pvr_pds_inst_encode_stflp32(1, /* enable immediate */ + 0, /* cc */ + PVR_ROGUE_PDSINST_LOP_OR, /* LOP + */ + cond_render_pred_temp + i, /* SRC0 + */ + cond_render_pred_temp, /* SRC1 */ + 0, /* SRC2 (Shift) */ + cond_render_pred_temp)); /* DST */ + } + + APPEND(pvr_pds_inst_encode_limm(0, /* cc */ + cond_render_pred_temp + 1, /* SRC1 + */ + 0, /* SRC0 */ + 0)); /* GLOBALREG */ + + APPEND(pvr_pds_inst_encode_stflp32(1, /* enable immediate */ + 0, /* cc */ + PVR_ROGUE_PDSINST_LOP_XOR, /* LOP + */ + cond_render_pred_temp, /* SRC0 */ + cond_render_negate_temp, /* SRC1 + */ + 0, /* SRC2 (Shift) */ + cond_render_pred_temp)); /* DST + */ + + /* Check that the predicate is 0. */ + APPEND(pvr_pds_inst_encode_cmpi( + 0, /* cc */ + PVR_ROGUE_PDSINST_COP_EQ, /* LOP */ + (cond_render_pred_temp >> 1) + + PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER, /* SRC0 */ + 0)); /* SRC1 */ + + /* If predicate is 0, skip DOUTU. */ + APPEND(pvr_pds_inst_encode_bra( + PVR_ROGUE_PDSINST_PREDICATE_P0, /* SRCC: + P0 */ + 0, /* NEG */ + PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC: + keep + */ + 2)); + } + + /* Issue the task to the USC. + * DoutU src1=USC Code Base address, src2=doutu word 2. + */ + APPEND(pvr_pds_encode_doutu(1, /* cc */ + 1, /* END */ + usc_control_constant64 >> 1)); /* SRC0; + * DOUTU + * 64-bit + * Src0. + */ + } + + /* End the program if the Dout did not already end it. */ + APPEND(pvr_pds_inst_encode_halt(0)); +#undef APPEND + } + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + /* Set the data segment pointer and ensure we return 1 past the buffer + * ptr. + */ + program->data_segment = buffer; + + buffer += next_constant; + } + + /* Require at least one DWORD of PDS data so the program runs. */ + data_size = MAX2(1, data_size); + + program->temps_used = temps_used; + program->highest_temp = temps_used; + program->data_size = data_size; + if (gen_mode == PDS_GENERATE_SIZES) + program->code_size = code_size; + + return buffer; +} + +/** + * Generates the PDS vertex shader data or code block. This program will do a + * DMA into USC Constants followed by a DOUTU. + * + * \param program Pointer to the PDS vertex shader program. + * \param buffer Pointer to the buffer for the program. + * \param gen_mode Generate code or data. + * \param dev_info PVR device information struct. + * \returns Pointer to just beyond the code/data. + */ +uint32_t *pvr_pds_vertex_shader_sa( + struct pvr_pds_vertex_shader_sa_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + uint32_t next_constant; + uint32_t data_size = 0; + uint32_t code_size = 0; + + uint32_t usc_control_constant64 = 0; + uint32_t dma_address_constant64 = 0; + uint32_t dma_control_constant32 = 0; + uint32_t doutw_value_constant64 = 0; + uint32_t doutw_control_constant32 = 0; + uint32_t fence_constant_word = 0; + uint32_t *buffer_base; + uint32_t kick_index; + + uint32_t total_num_doutw = + program->num_dword_doutw + program->num_q_word_doutw; + uint32_t total_size_dma = + program->num_dword_doutw + 2 * program->num_q_word_doutw; + + next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; + + /* Copy the DMA control words and USC task control words to constants. + * + * Arrange them so that the 64-bit words are together followed by the 32-bit + * words. + */ + if (program->kick_usc) { + usc_control_constant64 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + } + + if (program->clear_pds_barrier) { + fence_constant_word = + pvr_pds_get_constants(&next_constant, 2, &data_size); + } + dma_address_constant64 = pvr_pds_get_constants(&next_constant, + 2 * program->num_dma_kicks, + &data_size); + + /* Assign all unaligned constants together to avoid alignment issues caused + * by pvr_pds_get_constants with even allocation sizes. + */ + doutw_value_constant64 = pvr_pds_get_constants( + &next_constant, + total_size_dma + total_num_doutw + program->num_dma_kicks, + &data_size); + doutw_control_constant32 = doutw_value_constant64 + total_size_dma; + dma_control_constant32 = doutw_control_constant32 + total_num_doutw; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + buffer_base = buffer; + + if (program->kick_usc) { + /* Src0 for DOUTU. */ + pvr_pds_write_wide_constant(buffer_base, + usc_control_constant64, + program->usc_task_control.src0); /* DOUTU + * 64-bit + * Src0. + */ + buffer += 2; + } + + if (program->clear_pds_barrier) { + /* Encode the fence constant src0. Fence barrier is initialized to + * zero. + */ + pvr_pds_write_wide_constant(buffer_base, fence_constant_word, 0); + buffer += 2; + } + + if (total_num_doutw > 0) { + for (uint32_t i = 0; i < program->num_q_word_doutw; i++) { + /* Write the constant for the coefficient register write. */ + pvr_pds_write_constant64(buffer_base, + doutw_value_constant64, + program->q_word_doutw_value[2 * i], + program->q_word_doutw_value[2 * i + 1]); + pvr_pds_write_constant32( + buffer_base, + doutw_control_constant32, + program->q_word_doutw_control[i] | + ((!program->num_dma_kicks && i == total_num_doutw - 1) + ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN + : 0)); + + doutw_value_constant64 += 2; + doutw_control_constant32 += 1; + } + + for (uint32_t i = 0; i < program->num_dword_doutw; i++) { + /* Write the constant for the coefficient register write. */ + pvr_pds_write_constant32(buffer_base, + doutw_value_constant64, + program->dword_doutw_value[i]); + pvr_pds_write_constant32( + buffer_base, + doutw_control_constant32, + program->dword_doutw_control[i] | + ((!program->num_dma_kicks && i == program->num_dword_doutw - 1) + ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN + : 0)); + + doutw_value_constant64 += 1; + doutw_control_constant32 += 1; + } + + buffer += total_size_dma + total_num_doutw; + } + + if (program->num_dma_kicks == 1) /* Most-common case. */ + { + /* Src0 for DOUTD - Address. */ + pvr_pds_write_dma_address(buffer_base, + dma_address_constant64, + program->dma_address[0], + false, + dev_info); + + /* Src1 for DOUTD - Control Word. */ + pvr_pds_write_constant32( + buffer_base, + dma_control_constant32, + program->dma_control[0] | + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN); + + /* Move the buffer ptr along as we will return 1 past the buffer. */ + buffer += 3; + } else if (program->num_dma_kicks > 1) { + for (kick_index = 0; kick_index < program->num_dma_kicks - 1; + kick_index++) { + /* Src0 for DOUTD - Address. */ + pvr_pds_write_dma_address(buffer_base, + dma_address_constant64, + program->dma_address[kick_index], + false, + dev_info); + + /* Src1 for DOUTD - Control Word. */ + pvr_pds_write_constant32(buffer_base, + dma_control_constant32, + program->dma_control[kick_index]); + dma_address_constant64 += 2; + dma_control_constant32 += 1; + } + + /* Src0 for DOUTD - Address. */ + pvr_pds_write_dma_address(buffer_base, + dma_address_constant64, + program->dma_address[kick_index], + false, + dev_info); + + /* Src1 for DOUTD - Control Word. */ + pvr_pds_write_constant32( + buffer_base, + dma_control_constant32, + program->dma_control[kick_index] | + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN); + + buffer += 3 * program->num_dma_kicks; + } + } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + if (program->clear_pds_barrier) { + /* Zero the persistent temp (SW fence for context switch). */ + *buffer++ = pvr_pds_inst_encode_add64( + 0, /* cc */ + PVR_ROGUE_PDSINST_ALUM_UNSIGNED, + PVR_ROGUE_PDSINST_MAD_SNA_ADD, + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + + (fence_constant_word >> 1), /* src0 = 0 */ + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + + (fence_constant_word >> 1), /* src1 = 0 */ + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = + * ptemp[0] + */ + } + + if (total_num_doutw > 0) { + for (uint32_t i = 0; i < program->num_q_word_doutw; i++) { + /* Set the coefficient register to data value. */ + *buffer++ = pvr_pds_encode_doutw64( + /* cc */ 0, + /* END */ !program->num_dma_kicks && !program->kick_usc && + (i == total_num_doutw - 1), + /* SRC1 */ doutw_control_constant32, + /* SRC0 */ doutw_value_constant64 >> 1); + + doutw_value_constant64 += 2; + doutw_control_constant32 += 1; + } + + for (uint32_t i = 0; i < program->num_dword_doutw; i++) { + /* Set the coefficient register to data value. */ + *buffer++ = pvr_pds_encode_doutw64( + /* cc */ 0, + /* END */ !program->num_dma_kicks && !program->kick_usc && + (i == program->num_dword_doutw - 1), + /* SRC1 */ doutw_control_constant32, + /* SRC0 */ doutw_value_constant64 >> 1); + + doutw_value_constant64 += 1; + doutw_control_constant32 += 1; + } + } + + if (program->num_dma_kicks != 0) { + /* DMA the state into the secondary attributes. */ + + if (program->num_dma_kicks == 1) /* Most-common case. */ + { + *buffer++ = pvr_pds_encode_doutd( + /* cc */ 0, + /* END */ !program->kick_usc, + /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit Src1 */ + /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD 64-bit + * Src0. + */ + } else { + for (kick_index = 0; kick_index < program->num_dma_kicks; + kick_index++) { + *buffer++ = pvr_pds_encode_doutd( + /* cc */ 0, + /* END */ (!program->kick_usc) && + (kick_index + 1 == program->num_dma_kicks), + /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit + * Src1. + */ + /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD + * 64-bit + * Src0. + */ + dma_address_constant64 += 2; + dma_control_constant32 += 1; + } + } + } + + if (program->kick_usc) { + /* Kick the USC. */ + *buffer++ = pvr_pds_encode_doutu( + /* cc */ 0, + /* END */ 1, + /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0. + */ + } + + if (!program->kick_usc && program->num_dma_kicks == 0 && + total_num_doutw == 0) { + *buffer++ = pvr_pds_inst_encode_halt(0); + } + } + + code_size = program->num_dma_kicks + total_num_doutw; + if (program->clear_pds_barrier) + code_size++; /* ADD64 instruction. */ + + if (program->kick_usc) + code_size++; + + /* If there are no DMAs and no USC kick then code is HALT only. */ + if (code_size == 0) + code_size = 1; + + program->data_size = data_size; + program->code_size = code_size; + + return buffer; +} + +/** + * Writes the Uniform Data block for the PDS pixel shader secondary attributes + * program. + * + * \param program Pointer to the PDS pixel shader secondary attributes program. + * \param buffer Pointer to the buffer for the code/data. + * \param gen_mode Either code or data can be generated or sizes only updated. + * \returns Pointer to just beyond the buffer for the program/data. + */ +uint32_t *pvr_pds_pixel_shader_uniform_texture_code( + struct pvr_pds_pixel_shader_sa_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode) +{ + uint32_t *instruction; + uint32_t code_size = 0; + uint32_t data_size = 0; + uint32_t temps_used = 0; + uint32_t next_constant; + + assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) == + 0); + + assert(gen_mode != PDS_GENERATE_DATA_SEGMENT); + + /* clang-format off */ + /* Shape of code segment (note: clear is different) + * + * Code + * +------------+ + * | BRA if0 | + * | DOUTD | + * | ... | + * | DOUTD.halt | + * | uniform | + * | DOUTD | + * | ... | + * | ... | + * | DOUTW | + * | ... | + * | ... | + * | DOUTU.halt | + * | HALT | + * +------------+ + */ + /* clang-format on */ + instruction = buffer; + + next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; + + /* The clear color can arrive packed in the right form in the first (or + * first 2) dwords of the shared registers and the program will issue a + * single doutw for this. + */ + if (program->clear && program->packed_clear) { + uint32_t color_constant1 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + + uint32_t control_word_constant1 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { + /* DOUTW the clear color to the USC constants. Predicate with + * uniform loading flag (IF0). + */ + *instruction++ = pvr_pds_encode_doutw64( + /* cc */ 1, /* Only for uniform loading program. */ + /* END */ program->kick_usc ? 0 : 1, /* Last + * instruction + * for a clear. + */ + /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */ + /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */ + + code_size += 1; + } + } else if (program->clear) { + uint32_t color_constant1, color_constant2; + + if (program->clear_color_dest_reg & 0x1) { + uint32_t color_constant3, control_word_constant1, + control_word_constant2, color_constant4; + + color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size); + color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size); + color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size); + + control_word_constant1 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + control_word_constant2 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size); + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { + /* DOUTW the clear color to the USSE constants. Predicate with + * uniform loading flag (IF0). + */ + *instruction++ = pvr_pds_encode_doutw64( + /* cc */ 1, /* Only for Uniform Loading program */ + /* END */ 0, + /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */ + /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */ + + *instruction++ = pvr_pds_encode_doutw64( + /* cc */ 1, /* Only for Uniform Loading program */ + /* END */ 0, + /* SRC1 */ control_word_constant2, /* DOUTW 32-bit Src1 */ + /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */ + + *instruction++ = pvr_pds_encode_doutw64( + /* cc */ 1, /* Only for uniform loading program */ + /* END */ program->kick_usc ? 0 : 1, /* Last + * instruction + * for a clear. + */ + /* SRC1 */ color_constant4, /* DOUTW 32-bit Src1 */ + /* SRC0 */ color_constant3 >> 1); /* DOUTW 64-bit Src0 */ + } + + code_size += 3; + } else { + uint32_t control_word_constant, control_word_last_constant; + + /* Put the clear color and control words into the first 8 + * constants. + */ + color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size); + color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size); + control_word_constant = + pvr_pds_get_constants(&next_constant, 2, &data_size); + control_word_last_constant = + pvr_pds_get_constants(&next_constant, 2, &data_size); + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { + /* DOUTW the clear color to the USSE constants. Predicate with + * uniform loading flag (IF0). + */ + *instruction++ = pvr_pds_encode_doutw64( + /* cc */ 1, /* Only for Uniform Loading program */ + /* END */ 0, + /* SRC1 */ control_word_constant, /* DOUTW 32-bit Src1 */ + /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */ + + *instruction++ = pvr_pds_encode_doutw64( + /* cc */ 1, /* Only for uniform loading program */ + /* END */ program->kick_usc ? 0 : 1, /* Last + * instruction + * for a clear. + */ + /* SRC1 */ control_word_last_constant, /* DOUTW 32-bit Src1 + */ + /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */ + } + + code_size += 2; + } + + if (program->kick_usc) { + uint32_t doutu_constant64; + + doutu_constant64 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { + /* Issue the task to the USC. + * + * dout ds1[constant_use], ds0[constant_use], + * ds1[constant_use], emit + */ + *instruction++ = pvr_pds_encode_doutu( + /* cc */ 0, + /* END */ 1, + /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 + */ + } + + code_size += 1; + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { + /* End the program. */ + *instruction++ = pvr_pds_inst_encode_halt(0); + } + code_size += 1; + } else { + uint32_t total_num_doutw = + program->num_dword_doutw + program->num_q_word_doutw; + bool both_textures_and_uniforms = + ((program->num_texture_dma_kicks > 0) && + ((program->num_uniform_dma_kicks > 0 || total_num_doutw > 0) || + program->kick_usc)); + uint32_t doutu_constant64 = 0; + + if (both_textures_and_uniforms) { + /* If the size of a PDS data section is 0, the hardware won't run + * it. We therefore don't need to branch when there is only a + * texture OR a uniform update program. + */ + if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { + uint32_t branch_address = + MAX2(1 + program->num_texture_dma_kicks, 2); + + /* Use If0 to BRAnch to uniform code. */ + *instruction++ = pvr_pds_encode_bra( + /* SRCC */ PVR_ROGUE_PDSINST_PREDICATE_IF0, + /* NEG */ PVR_ROGUE_PDSINST_NEG_DISABLE, + /* SETC */ PVR_ROGUE_PDSINST_PREDICATE_KEEP, + /* ADDR */ branch_address); + } + + code_size += 1; + } + + if (program->num_texture_dma_kicks > 0) { + uint32_t dma_address_constant64; + uint32_t dma_control_constant32; + /* Allocate 3 constant spaces for each kick. The 64-bit constants + * come first followed by the 32-bit constants. + */ + dma_address_constant64 = PVR_PDS_CONSTANTS_BLOCK_BASE; + dma_control_constant32 = + dma_address_constant64 + (program->num_texture_dma_kicks * 2); + + for (uint32_t dma = 0; dma < program->num_texture_dma_kicks; dma++) { + code_size += 1; + if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction) + continue; + + /* DMA the state into the secondary attributes. */ + *instruction++ = pvr_pds_encode_doutd( + /* cc */ 0, + /* END */ dma == (program->num_texture_dma_kicks - 1), + /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 */ + /* SRC0 */ dma_address_constant64 >> 1); /* DOUT + * 64-bit + * Src0 + */ + dma_address_constant64 += 2; + dma_control_constant32 += 1; + } + } else if (both_textures_and_uniforms) { + if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { + /* End the program. */ + *instruction++ = pvr_pds_inst_encode_halt(0); + } + + code_size += 1; + } + + /* Reserve space at the beginning of the data segment for the DOUTU Task + * Control if one is needed. + */ + if (program->kick_usc) { + doutu_constant64 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + } + + /* Allocate 3 constant spaces for each DMA and 2 for a USC kick. The + * 64-bit constants come first followed by the 32-bit constants. + */ + uint32_t total_size_dma = + program->num_dword_doutw + 2 * program->num_q_word_doutw; + + uint32_t dma_address_constant64 = pvr_pds_get_constants( + &next_constant, + program->num_uniform_dma_kicks * 3 + total_size_dma + total_num_doutw, + &data_size); + uint32_t doutw_value_constant64 = + dma_address_constant64 + program->num_uniform_dma_kicks * 2; + uint32_t dma_control_constant32 = doutw_value_constant64 + total_size_dma; + uint32_t doutw_control_constant32 = + dma_control_constant32 + program->num_uniform_dma_kicks; + + if (total_num_doutw > 0) { + pvr_pds_get_constants(&next_constant, 0, &data_size); + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + for (uint32_t i = 0; i < program->num_q_word_doutw; i++) { + /* Set the coefficient register to data value. */ + *instruction++ = pvr_pds_encode_doutw64( + /* cc */ 0, + /* END */ !program->num_uniform_dma_kicks && + !program->kick_usc && (i == total_num_doutw - 1), + /* SRC1 */ doutw_control_constant32, + /* SRC0 */ doutw_value_constant64 >> 1); + + doutw_value_constant64 += 2; + doutw_control_constant32 += 1; + } + + for (uint32_t i = 0; i < program->num_dword_doutw; i++) { + /* Set the coefficient register to data value. */ + *instruction++ = pvr_pds_encode_doutw64( + /* cc */ 0, + /* END */ !program->num_uniform_dma_kicks && + !program->kick_usc && (i == program->num_dword_doutw - 1), + /* SRC1 */ doutw_control_constant32, + /* SRC0 */ doutw_value_constant64 >> 1); + + doutw_value_constant64 += 1; + doutw_control_constant32 += 1; + } + } + code_size += total_num_doutw; + } + + if (program->num_uniform_dma_kicks > 0) { + for (uint32_t dma = 0; dma < program->num_uniform_dma_kicks; dma++) { + code_size += 1; + + if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction) + continue; + + bool last_instruction = false; + if (!program->kick_usc && + (dma == program->num_uniform_dma_kicks - 1)) { + last_instruction = true; + } + /* DMA the state into the secondary attributes. */ + *instruction++ = pvr_pds_encode_doutd( + /* cc */ 0, + /* END */ last_instruction, + /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 + */ + /* SRC0 */ dma_address_constant64 >> 1); /* DOUT + * 64-bit + * Src0 + */ + dma_address_constant64 += 2; + dma_control_constant32 += 1; + } + } + + if (program->kick_usc) { + if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { + /* Issue the task to the USC. + * + * dout ds1[constant_use], ds0[constant_use], + * ds1[constant_use], emit + */ + + *instruction++ = pvr_pds_encode_doutu( + /* cc */ 0, + /* END */ 1, + /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 */ + } + + code_size += 1; + } else if (program->num_uniform_dma_kicks == 0 && total_num_doutw == 0) { + if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { + /* End the program. */ + *instruction++ = pvr_pds_inst_encode_halt(0); + } + + code_size += 1; + } + } + + /* Minimum temp count is 1. */ + program->temps_used = MAX2(temps_used, 1); + program->code_size = code_size; + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) + return instruction; + else + return NULL; +} + +/** + * Writes the Uniform Data block for the PDS pixel shader secondary attributes + * program. + * + * \param program Pointer to the PDS pixel shader secondary attributes program. + * \param buffer Pointer to the buffer for the code/data. + * \param gen_mode Either code or data can be generated or sizes only updated. + * \param dev_info PVR device information struct. + * \returns Pointer to just beyond the buffer for the program/data. + */ +uint32_t *pvr_pds_pixel_shader_uniform_texture_data( + struct pvr_pds_pixel_shader_sa_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + bool uniform, + const struct pvr_device_info *dev_info) +{ + uint32_t *constants = buffer; + uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; + uint32_t temps_used = 0; + uint32_t data_size = 0; + + assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) == + 0); + + assert(gen_mode != PDS_GENERATE_CODE_SEGMENT); + + /* Shape of data segment (note: clear is different). + * + * Uniform Texture + * +--------------+ +-------------+ + * | USC Task L | | USC Task L | + * | H | | H | + * | DMA1 Src0 L | | DMA1 Src0 L | + * | H | | H | + * | DMA2 Src0 L | | | + * | H | | | + * | DMA1 Src1 | | DMA1 Src1 | + * | DMA2 Src1 | | | + * | DOUTW0 Src1 | | | + * | DOUTW1 Src1 | | | + * | ... | | | + * | DOUTWn Srcn | | | + * | other data | | | + * +--------------+ +-------------+ + */ + + /* Generate the PDS pixel shader secondary attributes data. + * + * Packed Clear + * The clear color can arrive packed in the right form in the first (or + * first 2) dwords of the shared registers and the program will issue a + * single DOUTW for this. + */ + if (program->clear && uniform && program->packed_clear) { + uint32_t color_constant1 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + + uint32_t control_word_constant1 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint32_t doutw; + + pvr_pds_write_constant64(constants, + color_constant1, + program->clear_color[0], + program->clear_color[1]); + + /* Load into first constant in common store. */ + doutw = pvr_pds_encode_doutw_src1( + program->clear_color_dest_reg, + PVR_PDS_DOUTW_LOWER64, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, + false, + dev_info); + + /* Set the last flag. */ + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + pvr_pds_write_constant64(constants, control_word_constant1, doutw, 0); + } + } else if (program->clear && uniform) { + uint32_t color_constant1, color_constant2; + + if (program->clear_color_dest_reg & 0x1) { + uint32_t color_constant3, control_word_constant1, + control_word_constant2, color_constant4; + + color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size); + color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size); + color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size); + + control_word_constant1 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + control_word_constant2 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size); + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint32_t doutw; + + pvr_pds_write_constant32(constants, + color_constant1, + program->clear_color[0]); + + pvr_pds_write_constant64(constants, + color_constant2, + program->clear_color[1], + program->clear_color[2]); + + pvr_pds_write_constant32(constants, + color_constant3, + program->clear_color[3]); + + /* Load into first constant in common store. */ + doutw = pvr_pds_encode_doutw_src1( + program->clear_color_dest_reg, + PVR_PDS_DOUTW_LOWER32, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, + false, + dev_info); + + pvr_pds_write_constant64(constants, + control_word_constant1, + doutw, + 0); + + /* Move the destination register along. */ + doutw = pvr_pds_encode_doutw_src1( + program->clear_color_dest_reg + 1, + PVR_PDS_DOUTW_LOWER64, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, + false, + dev_info); + + pvr_pds_write_constant64(constants, + control_word_constant2, + doutw, + 0); + + /* Move the destination register along. */ + doutw = pvr_pds_encode_doutw_src1( + program->clear_color_dest_reg + 3, + PVR_PDS_DOUTW_LOWER32, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, + false, + dev_info); + + /* Set the last flag. */ + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + pvr_pds_write_constant64(constants, color_constant4, doutw, 0); + } + } else { + uint32_t control_word_constant, control_word_last_constant; + + /* Put the clear color and control words into the first 8 + * constants. + */ + color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size); + color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size); + control_word_constant = + pvr_pds_get_constants(&next_constant, 2, &data_size); + control_word_last_constant = + pvr_pds_get_constants(&next_constant, 2, &data_size); + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint32_t doutw; + pvr_pds_write_constant64(constants, + color_constant1, + program->clear_color[0], + program->clear_color[1]); + + pvr_pds_write_constant64(constants, + color_constant2, + program->clear_color[2], + program->clear_color[3]); + + /* Load into first constant in common store. */ + doutw = pvr_pds_encode_doutw_src1( + program->clear_color_dest_reg, + PVR_PDS_DOUTW_LOWER64, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, + false, + dev_info); + + pvr_pds_write_constant64(constants, control_word_constant, doutw, 0); + + /* Move the destination register along. */ + doutw &= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK; + doutw |= (program->clear_color_dest_reg + 2) + << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT; + + /* Set the last flag. */ + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + pvr_pds_write_constant64(constants, + control_word_last_constant, + doutw, + 0); + } + } + + /* Constants for the DOUTU Task Control, if needed. */ + if (program->kick_usc) { + uint32_t doutu_constant64 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + pvr_pds_write_wide_constant( + constants, + doutu_constant64, + program->usc_task_control.src0); /* 64-bit + */ + /* Src0 */ + } + } + } else { + if (uniform) { + /* Reserve space at the beginning of the data segment for the DOUTU + * Task Control if one is needed. + */ + if (program->kick_usc) { + uint32_t doutu_constant64 = + pvr_pds_get_constants(&next_constant, 2, &data_size); + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + pvr_pds_write_wide_constant( + constants, + doutu_constant64, + program->usc_task_control.src0); /* 64-bit Src0 */ + } + } + + uint32_t total_num_doutw = + program->num_dword_doutw + program->num_q_word_doutw; + uint32_t total_size_dma = + program->num_dword_doutw + 2 * program->num_q_word_doutw; + + /* Allocate 3 constant spaces for each kick. The 64-bit constants + * come first followed by the 32-bit constants. + */ + uint32_t dma_address_constant64 = + pvr_pds_get_constants(&next_constant, + program->num_uniform_dma_kicks * 3 + + total_size_dma + total_num_doutw, + &data_size); + uint32_t doutw_value_constant64 = + dma_address_constant64 + program->num_uniform_dma_kicks * 2; + uint32_t dma_control_constant32 = + doutw_value_constant64 + total_size_dma; + uint32_t doutw_control_constant32 = + dma_control_constant32 + program->num_uniform_dma_kicks; + + if (total_num_doutw > 0) { + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + for (uint32_t i = 0; i < program->num_q_word_doutw; i++) { + pvr_pds_write_constant64( + constants, + doutw_value_constant64, + program->q_word_doutw_value[2 * i], + program->q_word_doutw_value[2 * i + 1]); + pvr_pds_write_constant32( + constants, + doutw_control_constant32, + program->q_word_doutw_control[i] | + ((!program->num_uniform_dma_kicks && + i == total_num_doutw - 1) + ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN + : 0)); + + doutw_value_constant64 += 2; + doutw_control_constant32 += 1; + } + + for (uint32_t i = 0; i < program->num_dword_doutw; i++) { + pvr_pds_write_constant32(constants, + doutw_value_constant64, + program->dword_doutw_value[i]); + pvr_pds_write_constant32( + constants, + doutw_control_constant32, + program->dword_doutw_control[i] | + ((!program->num_uniform_dma_kicks && + i == program->num_dword_doutw - 1) + ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN + : 0)); + + doutw_value_constant64 += 1; + doutw_control_constant32 += 1; + } + } + } + + if (program->num_uniform_dma_kicks > 0) { + uint32_t kick; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + for (kick = 0; kick < program->num_uniform_dma_kicks - 1; + kick++) { + /* Copy the dma control words to constants. */ + pvr_pds_write_dma_address(constants, + dma_address_constant64, + program->uniform_dma_address[kick], + false, + dev_info); + pvr_pds_write_constant32(constants, + dma_control_constant32, + program->uniform_dma_control[kick]); + + dma_address_constant64 += 2; + dma_control_constant32 += 1; + } + + pvr_pds_write_dma_address(constants, + dma_address_constant64, + program->uniform_dma_address[kick], + false, + dev_info); + pvr_pds_write_constant32( + constants, + dma_control_constant32, + program->uniform_dma_control[kick] | + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN); + } + } + + } else if (program->num_texture_dma_kicks > 0) { + /* Allocate 3 constant spaces for each kick. The 64-bit constants + * come first followed by the 32-bit constants. + */ + uint32_t dma_address_constant64 = + pvr_pds_get_constants(&next_constant, + program->num_texture_dma_kicks * 3, + &data_size); + uint32_t dma_control_constant32 = + dma_address_constant64 + (program->num_texture_dma_kicks * 2); + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint32_t kick; + for (kick = 0; kick < program->num_texture_dma_kicks - 1; kick++) { + /* Copy the DMA control words to constants. */ + pvr_pds_write_dma_address(constants, + dma_address_constant64, + program->texture_dma_address[kick], + false, + dev_info); + + pvr_pds_write_constant32(constants, + dma_control_constant32, + program->texture_dma_control[kick]); + + dma_address_constant64 += 2; + dma_control_constant32 += 1; + } + + pvr_pds_write_dma_address(constants, + dma_address_constant64, + program->texture_dma_address[kick], + false, + dev_info); + + pvr_pds_write_constant32( + constants, + dma_control_constant32, + program->texture_dma_control[kick] | + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN); + } + } + } + + /* Save the data segment pointer and size. */ + program->data_segment = constants; + + /* Minimum temp count is 1. */ + program->temps_used = MAX2(temps_used, 1); + program->data_size = data_size; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) + return (constants + next_constant); + else + return NULL; +} + +/** + * Generates generic DOUTC PDS program. + * + * \param program Pointer to the PDS kick USC. + * \param buffer Pointer to the buffer for the program. + * \param gen_mode Either code and data can be generated, or sizes only updated. + * \returns Pointer to just beyond the buffer for the code or program segment. + */ +uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode) +{ + uint32_t constant = 0; + + /* Automatically get a data size of 1x 128bit chunks. */ + uint32_t data_size = 0, code_size = 0; + + /* Setup the data part. */ + uint32_t *constants = buffer; /* Constants placed at front of buffer. */ + uint32_t *instruction = buffer; + uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in + * dwords. + */ + + /* Update the program sizes. */ + program->data_size = data_size; + program->code_size = code_size; + program->data_segment = constants; + + if (gen_mode == PDS_GENERATE_SIZES) + return NULL; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + /* Copy the USC task control words to constants. */ + + constant = pvr_pds_get_constants(&next_constant, 2, &data_size); + pvr_pds_write_wide_constant(constants, constant + 0, 0); /* 64-bit + * Src0 + */ + + uint32_t control_word_constant = + pvr_pds_get_constants(&next_constant, 2, &data_size); + pvr_pds_write_constant64(constants, control_word_constant, 0, 0); /* 32-bit + * Src1 + */ + + program->data_size = data_size; + buffer += data_size; + + return buffer; + } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { + *instruction++ = pvr_pds_inst_encode_doutc( + /* cc */ 0, + /* END */ 0); + + code_size++; + + /* End the program. */ + *instruction++ = pvr_pds_inst_encode_halt(0); + code_size++; + + program->code_size = code_size; + } + + return instruction; +} + +/** + * Generates generic kick DOUTU PDS program in a single data+code block. + * + * \param control Pointer to the PDS kick USC. + * \param buffer Pointer to the buffer for the program. + * \param gen_mode Either code and data can be generated or sizes only updated. + * \param dev_info PVR device information structure. + * \returns Pointer to just beyond the buffer for the code or program segment. + */ +uint32_t *pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict control, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; + uint32_t doutw; + uint32_t data_size = 0, code_size = 0; + uint32_t constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS]; + uint32_t control_word_constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS]; + + /* Assert if buffer is exceeded. */ + assert(control->num_const64 <= PVR_PDS_MAX_NUM_DOUTW_CONSTANTS); + + uint32_t *constants = buffer; + uint32_t *instruction = buffer; + + /* Put the constants and control words interleaved in the data region. */ + for (uint32_t const_pair = 0; const_pair < control->num_const64; + const_pair++) { + constant[const_pair] = + pvr_pds_get_constants(&next_constant, 2, &data_size); + control_word_constant[const_pair] = + pvr_pds_get_constants(&next_constant, 2, &data_size); + } + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + /* Data segment points to start of constants. */ + control->data_segment = constants; + + for (uint32_t const_pair = 0; const_pair < control->num_const64; + const_pair++) { + pvr_pds_write_constant64(constants, + constant[const_pair], + H32(control->doutw_data[const_pair]), + L32(control->doutw_data[const_pair])); + + /* Start loading at offset 0. */ + if (control->dest_store == PDS_COMMON_STORE) { + doutw = pvr_pds_encode_doutw_src1( + (2 * const_pair), + PVR_PDS_DOUTW_LOWER64, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, + false, + dev_info); + } else { + doutw = pvr_pds_encode_doutw_src1( + (2 * const_pair), + PVR_PDS_DOUTW_LOWER64, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + false, + dev_info); + } + + if (const_pair + 1 == control->num_const64) { + /* Set the last flag for the MCU (assume there are no following + * DOUTD's). + */ + doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + } + pvr_pds_write_constant64(constants, + control_word_constant[const_pair], + doutw, + 0); + } + + control->data_size = data_size; + } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { + /* Code section. */ + + for (uint32_t const_pair = 0; const_pair < control->num_const64; + const_pair++) { + /* DOUTW the PDS data to the USC constants. */ + *instruction++ = pvr_pds_encode_doutw64( + /* cc */ 0, + /* END */ control->last_instruction && + (const_pair + 1 == control->num_const64), + /* SRC1 */ control_word_constant[const_pair], /* DOUTW 32-bit + * Src1. + */ + /* SRC0 */ constant[const_pair] >> 1); /* DOUTW 64-bit Src0. */ + + code_size++; + } + + if (control->last_instruction) { + /* End the program. */ + *instruction++ = pvr_pds_inst_encode_halt(0); + code_size++; + } + + control->code_size = code_size; + } + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) + return (constants + next_constant); + else + return instruction; +} + +/** + * Generates generic kick DOUTU PDS program in a single data+code block. + * + * \param program Pointer to the PDS kick USC. + * \param buffer Pointer to the buffer for the program. + * \param start_next_constant Next constant in data segment. Non-zero if another + * instruction precedes the DOUTU. + * \param cc_enabled If true then the DOUTU is predicated (cc set). + * \param gen_mode Either code and data can be generated or sizes only updated. + * \returns Pointer to just beyond the buffer for the code or program segment. + */ +uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program, + uint32_t *restrict buffer, + uint32_t start_next_constant, + bool cc_enabled, + enum pvr_pds_generate_mode gen_mode) +{ + uint32_t constant = 0; + + /* Automatically get a data size of 2 128bit chunks. */ + uint32_t data_size = ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE; + uint32_t code_size = 1; /* Single doutu */ + uint32_t dummy_count = 0; + + /* Setup the data part. */ + uint32_t *constants = buffer; /* Constants placed at front of buffer. */ + uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in + * dwords. + */ + + /* Update the program sizes. */ + program->data_size = data_size; + program->code_size = code_size; + program->data_segment = constants; + + if (gen_mode == PDS_GENERATE_SIZES) + return NULL; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT || + gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) { + /* Copy the USC task control words to constants. */ + + constant = pvr_pds_get_constants(&next_constant, 2, &dummy_count); + + pvr_pds_write_wide_constant(constants, + constant + 0, + program->usc_task_control.src0); /* 64-bit + * Src0. + */ + buffer += data_size; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) + return buffer; + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT || + gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) { + /* Generate the PDS pixel shader code. */ + + /* Setup the instruction pointer. */ + uint32_t *instruction = buffer; + + /* Issue the task to the USC. + * + * dout ds1[constant_use], ds0[constant_use], ds1[constant_use], emit ; + * halt halt + */ + + *instruction++ = pvr_pds_encode_doutu( + /* cc */ cc_enabled, + /* END */ 1, + /* SRC0 */ (constant + start_next_constant) >> 1); /* DOUTU + * 64-bit Src0 + */ + + /* Return pointer to just after last instruction. */ + return instruction; + } + + /* Execution should never reach here; keep compiler happy. */ + return NULL; +} + +uint32_t *pvr_pds_generate_compute_barrier_conditional( + uint32_t *buffer, + enum pvr_pds_generate_mode gen_mode) +{ + /* Compute barriers supported. Need to test for coeff sync task. */ + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) + return buffer; /* No data segment. */ + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + /* Test whether this is the coefficient update task or not. */ + *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC + */ + PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG + */ + PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SETC + */ + 1 /* ADDR */); + + /* Encode a HALT. */ + *buffer++ = pvr_pds_inst_encode_halt(1); + + /* Reset the default predicate to IF0. */ + *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC + */ + PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG + */ + PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETC + */ + 1 /* ADDR */); + } + + return buffer; +} + +/** + * Generates program to kick the USC task to store shared. + * + * \param program Pointer to the PDS shared register. + * \param buffer Pointer to the buffer for the program. + * \param gen_mode Either code and data can be generated or sizes only updated. + * \param dev_info PVR device information structure. + * \returns Pointer to just beyond the buffer for the program. + */ +uint32_t *pvr_pds_generate_shared_storing_program( + struct pvr_pds_shared_storing_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task; + struct pvr_pds_doutw_control *doutw_control = &program->doutw_control; + + if (gen_mode == PDS_GENERATE_SIZES) + return NULL; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint32_t *constants = buffer; + + constants = + pvr_pds_generate_doutw(doutw_control, constants, gen_mode, dev_info); + program->data_size = doutw_control->data_size; + + constants = pvr_pds_kick_usc(kick_usc_program, + constants, + 0, + program->cc_enable, + gen_mode); + program->data_size += kick_usc_program->data_size; + + return constants; + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + /* Generate PDS code segment. */ + uint32_t *instruction = buffer; + + /* doutw vi1, vi0 + * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use], + * emit + */ + instruction = + pvr_pds_generate_doutw(doutw_control, buffer, gen_mode, dev_info); + program->code_size = doutw_control->code_size; + + /* Offset into data segment follows on from doutw data segment. */ + instruction = pvr_pds_kick_usc(kick_usc_program, + instruction, + doutw_control->data_size, + program->cc_enable, + gen_mode); + program->code_size += kick_usc_program->code_size; + + return instruction; + } + + /* Execution should never reach here. */ + return NULL; +} + +uint32_t *pvr_pds_generate_fence_terminate_program( + struct pvr_pds_fence_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + uint32_t data_size = 0; + uint32_t code_size = 0; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + /* Data segment. */ + uint32_t *constants, *constants_base; + + constants = constants_base = (uint32_t *)buffer; + + /* DOUTC sources are not used, but they must be valid. */ + pvr_pds_generate_doutc(program, constants, PDS_GENERATE_DATA_SEGMENT); + data_size += program->data_size; + + if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) { + /* Append a 64-bit constant with value 1. Used to increment ptemp. + * Return the offset into the data segment. + */ + program->fence_constant_word = + pvr_pds_append_constant64(constants_base, 1, &data_size); + } + + program->data_size = data_size; + return constants; + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + /* Code segment. */ + uint32_t *instruction = (uint32_t *)buffer; + + instruction = pvr_pds_generate_compute_barrier_conditional( + instruction, + PDS_GENERATE_CODE_SEGMENT); + code_size += 3; + + if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) { + /* lock */ + *instruction++ = pvr_pds_inst_encode_lock(0); /* cc */ + + /* add64 pt[0], pt[0], #1 */ + *instruction++ = pvr_pds_inst_encode_add64( + 0, /* cc */ + PVR_ROGUE_PDSINST_ALUM_UNSIGNED, + PVR_ROGUE_PDSINST_MAD_SNA_ADD, + PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER + 0, /* src0 = ptemp[0] + */ + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + + (program->fence_constant_word >> 1), /* src1 = 1 */ + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = + * ptemp[0] + */ + + /* release */ + *instruction++ = pvr_pds_inst_encode_release(0); /* cc */ + + /* cmp pt[0] EQ 0x4 == Number of USC clusters per phantom */ + *instruction++ = pvr_pds_inst_encode_cmpi( + 0, /* cc */ + PVR_ROGUE_PDSINST_COP_EQ, + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0, /* src0 + * = ptemp[0] + */ + PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0)); + + /* bra -1 */ + *instruction++ = + pvr_pds_encode_bra(0, /* cc */ + 1, /* PVR_ROGUE_PDSINST_BRA_NEG_ENABLE + */ + 0, /* PVR_ROGUE_PDSINST_BRA_SETC_P0 + */ + -1); /* bra PC */ + code_size += 5; + } + + /* DOUTC */ + instruction = pvr_pds_generate_doutc(program, + instruction, + PDS_GENERATE_CODE_SEGMENT); + code_size += program->code_size; + + program->code_size = code_size; + return instruction; + } + + /* Execution should never reach here. */ + return NULL; +} + +/** + * Generates program to kick the USC task to load shared registers from memory. + * + * \param program Pointer to the PDS shared register. + * \param buffer Pointer to the buffer for the program. + * \param gen_mode Either code and data can be generated or sizes only updated. + * \param dev_info PVR device information struct. + * \returns Pointer to just beyond the buffer for the program. + */ +uint32_t *pvr_pds_generate_compute_shared_loading_program( + struct pvr_pds_shared_storing_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task; + struct pvr_pds_doutw_control *doutw_control = &program->doutw_control; + + uint32_t next_constant; + uint32_t data_size = 0; + uint32_t code_size = 0; + + /* This needs to persist to the CODE_SEGMENT call. */ + static uint32_t fence_constant_word = 0; + uint64_t zero_constant64 = 0; + + if (gen_mode == PDS_GENERATE_SIZES) + return NULL; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint32_t *constants = buffer; + + constants = pvr_pds_generate_doutw(doutw_control, + constants, + PDS_GENERATE_DATA_SEGMENT, + dev_info); + data_size += doutw_control->data_size; + + constants = pvr_pds_kick_usc(kick_usc_program, + constants, + 0, + program->cc_enable, + gen_mode); + data_size += kick_usc_program->data_size; + + /* Copy the fence constant value (64-bit). */ + next_constant = data_size; /* Assumes data words fully packed. */ + fence_constant_word = + pvr_pds_get_constants(&next_constant, 2, &data_size); + + /* Encode the fence constant src0 (offset measured from start of data + * buffer). Fence barrier is initialized to zero. + */ + pvr_pds_write_wide_constant(buffer, fence_constant_word, zero_constant64); + /* Update the const size. */ + data_size += 2; + constants += 2; + + program->data_size = data_size; + return constants; + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + /* Generate PDS code segment. */ + uint32_t *instruction = buffer; + + /* add64 pt0, c0, c0 + * IF [2x Phantoms] + * add64 pt1, c0, c0 + * st [constant_mem_addr], pt0, 4 + * ENDIF + * doutw vi1, vi0 + * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use], + * emit + * + * Zero the persistent temp (SW fence for context switch). + */ + *instruction++ = pvr_pds_inst_encode_add64( + 0, /* cc */ + PVR_ROGUE_PDSINST_ALUM_UNSIGNED, + PVR_ROGUE_PDSINST_MAD_SNA_ADD, + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + + (fence_constant_word >> 1), /* src0 + * = 0 + */ + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + + (fence_constant_word >> 1), /* src1 + * = 0 + */ + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = ptemp64[0] + */ + code_size++; + + instruction = pvr_pds_generate_doutw(doutw_control, + instruction, + PDS_GENERATE_CODE_SEGMENT, + dev_info); + code_size += doutw_control->code_size; + + /* Offset into data segment follows on from doutw data segment. */ + instruction = pvr_pds_kick_usc(kick_usc_program, + instruction, + doutw_control->data_size, + program->cc_enable, + gen_mode); + code_size += kick_usc_program->code_size; + + program->code_size = code_size; + return instruction; + } + + /* Execution should never reach here. */ + return NULL; +} + +/** + * Generates both code and data when gen_mode is not PDS_GENERATE_SIZES. + * Relies on num_fpu_iterators being initialized for size calculation. + * Relies on num_fpu_iterators, destination[], and FPU_iterators[] being + * initialized for program generation. + * + * \param program Pointer to the PDS pixel shader program. + * \param buffer Pointer to the buffer for the program. + * \param gen_mode Either code and data can be generated or sizes only updated. + * \returns Pointer to just beyond the buffer for the program. + */ +uint32_t *pvr_pds_coefficient_loading( + struct pvr_pds_coeff_loading_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode) +{ + uint32_t constant; + uint32_t *instruction; + uint32_t total_data_size, code_size; + + /* Place constants at the front of the buffer. */ + uint32_t *constants = buffer; + /* Start counting constants from 0. */ + uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; + + /* Save the data segment pointer and size. */ + program->data_segment = constants; + + total_data_size = 0; + code_size = 0; + + total_data_size += 2 * program->num_fpu_iterators; + code_size += program->num_fpu_iterators; + + /* Instructions start where constants finished, but we must take note of + * alignment. + * + * 128-bit boundary = 4 dwords. + */ + total_data_size = ALIGN_POT(total_data_size, 4); + if (gen_mode != PDS_GENERATE_SIZES) { + uint32_t data_size = 0; + uint32_t iterator = 0; + + instruction = buffer + total_data_size; + + while (iterator < program->num_fpu_iterators) { + uint64_t iterator_word; + + /* Copy the USC task control words to constants. */ + constant = pvr_pds_get_constants(&next_constant, 2, &data_size); + + /* Write the first iterator. */ + iterator_word = + (uint64_t)program->FPU_iterators[iterator] + << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT; + + /* Write the destination. */ + iterator_word |= + (uint64_t)program->destination[iterator++] + << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT; + + /* If this is the last DOUTI word the "Last Issue" bit should be + * set. + */ + if (iterator >= program->num_fpu_iterators) { + iterator_word |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN; + } + + /* Write the word to the buffer. */ + pvr_pds_write_wide_constant(constants, + constant, + iterator_word); /* 64-bit + Src0 + */ + + /* Write the DOUT instruction. */ + *instruction++ = pvr_pds_encode_douti( + /* cc */ 0, + /* END */ 0, + /* SRC0 */ constant >> 1); /* DOUT Issue word 0 64-bit */ + } + + /* Update the last DOUTI instruction to have the END flag set. */ + *(instruction - 1) |= 1 << PVR_ROGUE_PDSINST_DOUT_END_SHIFT; + } else { + instruction = NULL; + } + + /* Update the data size and code size. Minimum temp count is 1. */ + program->temps_used = 1; + program->data_size = total_data_size; + program->code_size = code_size; + + return instruction; +} + +/** + * Generate a single ld/st instruction. This can correspond to one or more + * real ld/st instructions based on the value of count. + * + * \param ld true to generate load, false to generate store. + * \param control Cache mode control. + * \param temp_index Dest temp for load/source temp for store, in 32bits + * register index. + * \param address Source for load/dest for store in bytes. + * \param count Number of dwords for load/store. + * \param next_constant + * \param total_data_size + * \param total_code_size + * \param buffer Pointer to the buffer for the program. + * \param data_fence Issue data fence. + * \param gen_mode Either code and data can be generated or sizes only updated. + * \param dev_info PVR device information structure. + * \returns Pointer to just beyond the buffer for the program. + */ +uint32_t *pvr_pds_generate_single_ldst_instruction( + bool ld, + const struct pvr_pds_ldst_control *control, + uint32_t temp_index, + uint64_t address, + uint32_t count, + uint32_t *next_constant, + uint32_t *total_data_size, + uint32_t *total_code_size, + uint32_t *restrict buffer, + bool data_fence, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + /* A single ld/ST here does NOT actually correspond to a single ld/ST + * instruction, but may needs multiple ld/ST instructions because each ld/ST + * instruction can only ld/ST a restricted max number of dwords which may + * less than count passed here. + */ + + uint32_t num_inst; + uint32_t constant; + + if (ld) { + /* ld must operate on 64bits unit, and it needs to load from and to 128 + * bits aligned. Apart from the last ld, all the other need to ld 2x(x = + * 1, 2, ...) times 64bits unit. + */ + uint32_t per_inst_count = 0; + uint32_t last_inst_count; + + assert((gen_mode == PDS_GENERATE_SIZES) || + (((count % 2) == 0) && ((address % 16) == 0) && + (temp_index % 2) == 0)); + + count >>= 1; + temp_index >>= 1; + + /* Found out how many ld instructions are needed and ld size for the all + * possible ld instructions. + */ + if (count <= PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE) { + num_inst = 1; + last_inst_count = count; + } else { + per_inst_count = PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE; + if ((per_inst_count % 2) != 0) + per_inst_count -= 1; + + num_inst = count / per_inst_count; + last_inst_count = count - per_inst_count * num_inst; + num_inst += 1; + } + + /* Generate all the instructions. */ + for (uint32_t i = 0; i < num_inst; i++) { + if ((i == (num_inst - 1)) && (last_inst_count == 0)) + break; + + /* A single load instruction. */ + constant = pvr_pds_get_constants(next_constant, 2, total_data_size); + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint64_t ld_src0 = 0; + + ld_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_LD_SRCADD_MASK) + << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT); + ld_src0 |= (((uint64_t)((i == num_inst - 1) ? last_inst_count + : per_inst_count) & + PVR_ROGUE_PDSINST_LD_COUNT8_MASK) + << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT); + ld_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS64TP_MASK) + << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT); + + if (!control) { + ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED; + + if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) + ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED; + + } else { + ld_src0 |= control->cache_control_const; + } + + /* Write it to the constant. */ + pvr_pds_write_constant64(buffer, + constant, + (uint32_t)(ld_src0), + (uint32_t)(ld_src0 >> 32)); + + /* Adjust value for next ld instruction. */ + temp_index += per_inst_count; + address += (((uint64_t)(per_inst_count)) << 3); + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + *buffer++ = pvr_pds_inst_encode_ld(0, constant >> 1); + + if (data_fence) + *buffer++ = pvr_pds_inst_encode_wdf(0); + } + } + } else { + /* ST needs source memory address to be 32bits aligned. */ + assert((gen_mode == PDS_GENERATE_SIZES) || ((address % 4) == 0)); + + /* Found out how many ST instructions are needed, each ST can only store + * PVR_ROGUE_PDSINST_ST_COUNT4_MASK number of 32bits. + */ + num_inst = count / PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE; + num_inst += ((count % PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE) == 0 ? 0 : 1); + + /* Generate all the instructions. */ + for (uint32_t i = 0; i < num_inst; i++) { + /* A single store instruction. */ + constant = pvr_pds_get_constants(next_constant, 2, total_data_size); + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { + uint32_t per_inst_count = + (count <= PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE + ? count + : PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE); + uint64_t st_src0 = 0; + + st_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_ST_SRCADD_MASK) + << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT); + st_src0 |= + (((uint64_t)per_inst_count & PVR_ROGUE_PDSINST_ST_COUNT4_MASK) + << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT); + st_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS32TP_MASK) + << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT); + + if (!control) { + st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH; + + if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) { + st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH; + } + + } else { + st_src0 |= control->cache_control_const; + } + + /* Write it to the constant. */ + pvr_pds_write_constant64(buffer, + constant, + (uint32_t)(st_src0), + (uint32_t)(st_src0 >> 32)); + + /* Adjust value for next ST instruction. */ + temp_index += per_inst_count; + count -= per_inst_count; + address += (((uint64_t)(per_inst_count)) << 2); + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + *buffer++ = pvr_pds_inst_encode_st(0, constant >> 1); + + if (data_fence) + *buffer++ = pvr_pds_inst_encode_wdf(0); + } + } + } + + (*total_code_size) += num_inst; + if (data_fence) + (*total_code_size) += num_inst; + + if (gen_mode != PDS_GENERATE_SIZES) + return buffer; + return NULL; +} + +/** + * Generate programs used to prepare stream out, i.e., clear stream out buffer + * overflow flags and update Persistent temps by a ld instruction. + * + * This must be used in PPP state update. + * + * \param program Pointer to the stream out program. + * \param buffer Pointer to the buffer for the program. + * \param store_mode If true then the data is stored to memory. If false then + * the data is loaded from memory. + * \param gen_mode Either code and data can be generated or sizes only updated. + * \param dev_info PVR device information structure. + * \returns Pointer to just beyond the buffer for the program. + */ +uint32_t *pvr_pds_generate_stream_out_init_program( + struct pvr_pds_stream_out_init_program *restrict program, + uint32_t *restrict buffer, + bool store_mode, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + uint32_t total_data_size = 0; + uint32_t PTDst = PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER; + + /* Start counting constants from 0. */ + uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; + + uint32_t total_code_size = 1; + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + /* We only need to clear global stream out predicate, other predicates + * are not used during the stream out buffer overflow test. + */ + *buffer++ = pvr_pds_inst_encode_stmc(0, 0x10); + } + + for (uint32_t index = 0; index < program->num_buffers; index++) { + if (program->dev_address_for_buffer_data[index] != 0) { + /* Generate load/store program to load/store persistent temps. */ + + /* NOTE: store_mode == true case should be handled by + * StreamOutTerminate. + */ + buffer = pvr_pds_generate_single_ldst_instruction( + !store_mode, + NULL, + PTDst, + program->dev_address_for_buffer_data[index], + program->pds_buffer_data_size[index], + &next_constant, + &total_data_size, + &total_code_size, + buffer, + false, + gen_mode, + dev_info); + } + + PTDst += program->pds_buffer_data_size[index]; + } + + total_code_size += 2; + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + /* We need to fence the loading. */ + *buffer++ = pvr_pds_inst_encode_wdf(0); + *buffer++ = pvr_pds_inst_encode_halt(0); + } + + /* Save size information to program */ + program->stream_out_init_pds_data_size = + ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */ + /* PDS program code size. */ + program->stream_out_init_pds_code_size = total_code_size; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) + return buffer + program->stream_out_init_pds_data_size; + else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) + return buffer; + + return NULL; +} + +/** + * Generate stream out terminate program for stream out. + * + * If pds_persistent_temp_size_to_store is 0, the final primitive written value + * will be stored. + * + * If pds_persistent_temp_size_to_store is non 0, the value of persistent temps + * will be stored into memory. + * + * The stream out terminate program is used to update the PPP state and the data + * and code section cannot be separate. + * + * \param program Pointer to the stream out program. + * \param buffer Pointer to the buffer for the program. + * \param gen_mode Either code and data can be generated or sizes only updated. + * \param dev_info PVR device info structure. + * \returns Pointer to just beyond the buffer for the program. + */ +uint32_t *pvr_pds_generate_stream_out_terminate_program( + struct pvr_pds_stream_out_terminate_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + uint32_t next_constant; + uint32_t total_data_size = 0, total_code_size = 0; + + /* Start counting constants from 0. */ + next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; + + /* Generate store program to store persistent temps. */ + buffer = pvr_pds_generate_single_ldst_instruction( + false, + NULL, + PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER, + program->dev_address_for_storing_persistent_temp, + program->pds_persistent_temp_size_to_store, + &next_constant, + &total_data_size, + &total_code_size, + buffer, + false, + gen_mode, + dev_info); + + total_code_size += 2; + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + *buffer++ = pvr_pds_inst_encode_wdf(0); + *buffer++ = pvr_pds_inst_encode_halt(0); + } + + /* Save size information to program. */ + program->stream_out_terminate_pds_data_size = + ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */ + /* PDS program code size. */ + program->stream_out_terminate_pds_code_size = total_code_size; + + if (gen_mode == PDS_GENERATE_DATA_SEGMENT) + return buffer + program->stream_out_terminate_pds_data_size; + else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) + return buffer; + + return NULL; +} + +/* DrawArrays works in several steps: + * + * 1) load data from draw_indirect buffer + * 2) tweak data to match hardware formats + * 3) write data to indexblock + * 4) signal the VDM to continue + * + * This is complicated by HW limitations on alignment, as well as a HWBRN. + * + * 1) Load data. + * Loads _must_ be 128-bit aligned. Because there is no such limitation in the + * spec we must deal with this by choosing an appropriate earlier address and + * loading enough dwords that we load the entirety of the buffer. + * + * if addr & 0xf: + * load [addr & ~0xf] 6 dwords -> tmp[0, 1, 2, 3, 4, 5] + * data = tmp[0 + (uiAddr & 0xf) >> 2]... + * else + * load [addr] 4 dwords -> tmp[0, 1, 2, 3] + * data = tmp[0]... + * + * + * 2) Tweak data. + * primCount in the spec does not match the encoding of INDEX_INSTANCE_COUNT in + * the VDM control stream. We must subtract 1 from the loaded primCount. + * + * However, there is a HWBRN that disallows the ADD32 instruction from sourcing + * a tmp that is non-64-bit-aligned. To work around this, we must move primCount + * into another tmp that has the correct alignment. Note: this is only required + * when data = tmp[even], as primCount is data+1: + * + * if data = tmp[even]: + * primCount = data + 1 = tmp[odd] -- not 64-bit aligned! + * else: + * primCount = data + 1 = tmp[even] -- already aligned, don't need workaround. + * + * This boils down to: + * + * primCount = data[1] + * primCountSrc = data[1] + * if brn_present && (data is even): + * mov scratch, primCount + * primCountSrc = scratch + * endif + * sub primCount, primCountSrc, 1 + * + * 3) Store Data. + * Write the now-tweaked data over the top of the indexblock. + * To ensure the write completes before the VDM re-reads the data, we must cause + * a data hazard by doing a dummy (dummy meaning we don't care about the + * returned data) load from the same addresses. Again, because the ld must + * always be 128-bit aligned (note: the ST is dword-aligned), we must ensure the + * index block is 128-bit aligned. This is the client driver's responsibility. + * + * st data[0, 1, 2] -> (idxblock + 4) + * load [idxblock] 4 dwords + * + * 4) Signal the VDM + * This is simply a DOUTV with a src1 of 0, indicating the VDM should continue + * where it is currently fenced on a dummy idxblock that has been inserted by + * the driver. + */ + +#include "pvr_draw_indirect_arrays0.h" +#include "pvr_draw_indirect_arrays1.h" +#include "pvr_draw_indirect_arrays2.h" +#include "pvr_draw_indirect_arrays3.h" + +#include "pvr_draw_indirect_arrays_base_instance0.h" +#include "pvr_draw_indirect_arrays_base_instance1.h" +#include "pvr_draw_indirect_arrays_base_instance2.h" +#include "pvr_draw_indirect_arrays_base_instance3.h" + +#include "pvr_draw_indirect_arrays_base_instance_drawid0.h" +#include "pvr_draw_indirect_arrays_base_instance_drawid1.h" +#include "pvr_draw_indirect_arrays_base_instance_drawid2.h" +#include "pvr_draw_indirect_arrays_base_instance_drawid3.h" + +#define ENABLE_SLC_MCU_CACHE_CONTROLS(device) \ + ((device)->features.has_slc_mcu_cache_controls \ + ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \ + : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS) + +void pvr_pds_generate_draw_arrays_indirect( + struct pvr_pds_drawindirect_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) || + (gen_mode == PDS_GENERATE_SIZES)) { + const struct pvr_psc_program_output *psc_program = NULL; + switch ((program->arg_buffer >> 2) % 4) { + case 0: + if (program->support_base_instance) { + if (program->increment_draw_id) { + psc_program = + &pvr_draw_indirect_arrays_base_instance_drawid0_program; + } else { + psc_program = &pvr_draw_indirect_arrays_base_instance0_program; + } + } else { + psc_program = &pvr_draw_indirect_arrays0_program; + } + break; + case 1: + if (program->support_base_instance) { + if (program->increment_draw_id) { + psc_program = + &pvr_draw_indirect_arrays_base_instance_drawid1_program; + } else { + psc_program = &pvr_draw_indirect_arrays_base_instance1_program; + } + } else { + psc_program = &pvr_draw_indirect_arrays1_program; + } + break; + case 2: + if (program->support_base_instance) { + if (program->increment_draw_id) { + psc_program = + &pvr_draw_indirect_arrays_base_instance_drawid2_program; + } else { + psc_program = &pvr_draw_indirect_arrays_base_instance2_program; + } + } else { + psc_program = &pvr_draw_indirect_arrays2_program; + } + break; + case 3: + if (program->support_base_instance) { + if (program->increment_draw_id) { + psc_program = + &pvr_draw_indirect_arrays_base_instance_drawid3_program; + } else { + psc_program = &pvr_draw_indirect_arrays_base_instance3_program; + } + } else { + psc_program = &pvr_draw_indirect_arrays3_program; + } + break; + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + memcpy(buffer, + psc_program->code, + psc_program->code_size * sizeof(uint32_t)); +#if defined(DUMP_PDS) + for (uint32_t i = 0; i < psc_program->code_size; i++) + PVR_PDS_PRINT_INST(buffer[i]); +#endif + } + + program->program = *psc_program; + } else { + switch ((program->arg_buffer >> 2) % 4) { + case 0: + if (program->support_base_instance) { + if (program->increment_draw_id) { + pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm( + buffer, + program->index_list_addr_buffer + 4); + pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates( + buffer); + } else { + pvr_write_draw_indirect_arrays_base_instance0_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_arrays_base_instance0_write_vdm( + buffer, + program->index_list_addr_buffer + 4); + pvr_write_draw_indirect_arrays_base_instance0_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_arrays_base_instance0_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer); + } + } else { + pvr_write_draw_indirect_arrays0_di_data(buffer, + program->arg_buffer & + ~0xfull, + dev_info); + pvr_write_draw_indirect_arrays0_write_vdm( + buffer, + program->index_list_addr_buffer + 4); + pvr_write_draw_indirect_arrays0_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_arrays0_num_views(buffer, + program->num_views); + pvr_write_draw_indirect_arrays0_immediates(buffer); + } + break; + case 1: + if (program->support_base_instance) { + if (program->increment_draw_id) { + pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm( + buffer, + program->index_list_addr_buffer + 4); + pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates( + buffer); + } else { + pvr_write_draw_indirect_arrays_base_instance1_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_arrays_base_instance1_write_vdm( + buffer, + program->index_list_addr_buffer + 4); + pvr_write_draw_indirect_arrays_base_instance1_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_arrays_base_instance1_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer); + } + } else { + pvr_write_draw_indirect_arrays1_di_data(buffer, + program->arg_buffer & + ~0xfull, + dev_info); + pvr_write_draw_indirect_arrays1_write_vdm( + buffer, + program->index_list_addr_buffer + 4); + pvr_write_draw_indirect_arrays1_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_arrays1_num_views(buffer, + program->num_views); + pvr_write_draw_indirect_arrays1_immediates(buffer); + } + break; + case 2: + if (program->support_base_instance) { + if (program->increment_draw_id) { + pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm( + buffer, + program->index_list_addr_buffer + 4); + pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates( + buffer); + } else { + pvr_write_draw_indirect_arrays_base_instance2_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_arrays_base_instance2_write_vdm( + buffer, + program->index_list_addr_buffer + 4); + pvr_write_draw_indirect_arrays_base_instance2_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_arrays_base_instance2_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer); + } + } else { + pvr_write_draw_indirect_arrays2_di_data(buffer, + program->arg_buffer & + ~0xfull, + dev_info); + pvr_write_draw_indirect_arrays2_write_vdm( + buffer, + program->index_list_addr_buffer + 4); + pvr_write_draw_indirect_arrays2_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_arrays2_num_views(buffer, + program->num_views); + pvr_write_draw_indirect_arrays2_immediates(buffer); + } + break; + case 3: + if (program->support_base_instance) { + if (program->increment_draw_id) { + pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm( + buffer, + program->index_list_addr_buffer + 4); + pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates( + buffer); + } else { + pvr_write_draw_indirect_arrays_base_instance3_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_arrays_base_instance3_write_vdm( + buffer, + program->index_list_addr_buffer + 4); + pvr_write_draw_indirect_arrays_base_instance3_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_arrays_base_instance3_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer); + } + } else { + pvr_write_draw_indirect_arrays3_di_data(buffer, + program->arg_buffer & + ~0xfull, + dev_info); + pvr_write_draw_indirect_arrays3_write_vdm( + buffer, + program->index_list_addr_buffer + 4); + pvr_write_draw_indirect_arrays3_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_arrays3_num_views(buffer, + program->num_views); + pvr_write_draw_indirect_arrays3_immediates(buffer); + } + break; + } + } +} + +#include "pvr_draw_indirect_elements0.h" +#include "pvr_draw_indirect_elements1.h" +#include "pvr_draw_indirect_elements2.h" +#include "pvr_draw_indirect_elements3.h" +#include "pvr_draw_indirect_elements_base_instance0.h" +#include "pvr_draw_indirect_elements_base_instance1.h" +#include "pvr_draw_indirect_elements_base_instance2.h" +#include "pvr_draw_indirect_elements_base_instance3.h" +#include "pvr_draw_indirect_elements_base_instance_drawid0.h" +#include "pvr_draw_indirect_elements_base_instance_drawid1.h" +#include "pvr_draw_indirect_elements_base_instance_drawid2.h" +#include "pvr_draw_indirect_elements_base_instance_drawid3.h" + +void pvr_pds_generate_draw_elements_indirect( + struct pvr_pds_drawindirect_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info) +{ + if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) || + (gen_mode == PDS_GENERATE_SIZES)) { + const struct pvr_psc_program_output *psc_program = NULL; + switch ((program->arg_buffer >> 2) % 4) { + case 0: + if (program->support_base_instance) { + if (program->increment_draw_id) { + psc_program = + &pvr_draw_indirect_elements_base_instance_drawid0_program; + } else { + psc_program = &pvr_draw_indirect_elements_base_instance0_program; + } + } else { + psc_program = &pvr_draw_indirect_elements0_program; + } + break; + case 1: + if (program->support_base_instance) { + if (program->increment_draw_id) { + psc_program = + &pvr_draw_indirect_elements_base_instance_drawid1_program; + } else { + psc_program = &pvr_draw_indirect_elements_base_instance1_program; + } + } else { + psc_program = &pvr_draw_indirect_elements1_program; + } + break; + case 2: + if (program->support_base_instance) { + if (program->increment_draw_id) { + psc_program = + &pvr_draw_indirect_elements_base_instance_drawid2_program; + } else { + psc_program = &pvr_draw_indirect_elements_base_instance2_program; + } + } else { + psc_program = &pvr_draw_indirect_elements2_program; + } + break; + case 3: + if (program->support_base_instance) { + if (program->increment_draw_id) { + psc_program = + &pvr_draw_indirect_elements_base_instance_drawid3_program; + } else { + psc_program = &pvr_draw_indirect_elements_base_instance3_program; + } + } else { + psc_program = &pvr_draw_indirect_elements3_program; + } + break; + } + + if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { + memcpy(buffer, + psc_program->code, + psc_program->code_size * sizeof(uint32_t)); + +#if defined(DUMP_PDS) + for (uint32_t i = 0; i < psc_program->code_size; i++) + PVR_PDS_PRINT_INST(buffer[i]); +#endif + } + + program->program = *psc_program; + } else { + switch ((program->arg_buffer >> 2) % 4) { + case 0: + if (program->support_base_instance) { + if (program->increment_draw_id) { + pvr_write_draw_indirect_elements_base_instance_drawid0_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance_drawid0_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride( + buffer, + program->index_stride); + pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base( + buffer, + program->index_buffer); + pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header( + buffer, + program->index_block_header); + pvr_write_draw_indirect_elements_base_instance_drawid0_immediates( + buffer); + } else { + pvr_write_draw_indirect_elements_base_instance0_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_elements_base_instance0_write_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance0_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance0_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_elements_base_instance0_idx_stride( + buffer, + program->index_stride); + pvr_write_draw_indirect_elements_base_instance0_idx_base( + buffer, + program->index_buffer); + pvr_write_draw_indirect_elements_base_instance0_idx_header( + buffer, + program->index_block_header); + pvr_write_draw_indirect_elements_base_instance0_immediates( + buffer); + } + } else { + pvr_write_draw_indirect_elements0_di_data(buffer, + program->arg_buffer & + ~0xfull, + dev_info); + pvr_write_draw_indirect_elements0_write_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements0_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements0_num_views(buffer, + program->num_views); + pvr_write_draw_indirect_elements0_idx_stride(buffer, + program->index_stride); + pvr_write_draw_indirect_elements0_idx_base(buffer, + program->index_buffer); + pvr_write_draw_indirect_elements0_idx_header( + buffer, + program->index_block_header); + pvr_write_draw_indirect_elements0_immediates(buffer); + } + break; + case 1: + if (program->support_base_instance) { + if (program->increment_draw_id) { + pvr_write_draw_indirect_elements_base_instance_drawid1_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance_drawid1_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride( + buffer, + program->index_stride); + pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base( + buffer, + program->index_buffer); + pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header( + buffer, + program->index_block_header); + pvr_write_draw_indirect_elements_base_instance_drawid1_immediates( + buffer); + } else { + pvr_write_draw_indirect_elements_base_instance1_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_elements_base_instance1_write_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance1_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance1_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_elements_base_instance1_idx_stride( + buffer, + program->index_stride); + pvr_write_draw_indirect_elements_base_instance1_idx_base( + buffer, + program->index_buffer); + pvr_write_draw_indirect_elements_base_instance1_idx_header( + buffer, + program->index_block_header); + pvr_write_draw_indirect_elements_base_instance1_immediates( + buffer); + } + } else { + pvr_write_draw_indirect_elements1_di_data(buffer, + program->arg_buffer & + ~0xfull, + dev_info); + pvr_write_draw_indirect_elements1_write_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements1_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements1_num_views(buffer, + program->num_views); + pvr_write_draw_indirect_elements1_idx_stride(buffer, + program->index_stride); + pvr_write_draw_indirect_elements1_idx_base(buffer, + program->index_buffer); + pvr_write_draw_indirect_elements1_idx_header( + buffer, + program->index_block_header); + pvr_write_draw_indirect_elements1_immediates(buffer); + } + break; + case 2: + if (program->support_base_instance) { + if (program->increment_draw_id) { + pvr_write_draw_indirect_elements_base_instance_drawid2_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance_drawid2_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride( + buffer, + program->index_stride); + pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base( + buffer, + program->index_buffer); + pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header( + buffer, + program->index_block_header); + pvr_write_draw_indirect_elements_base_instance_drawid2_immediates( + buffer); + } else { + pvr_write_draw_indirect_elements_base_instance2_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_elements_base_instance2_write_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance2_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance2_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_elements_base_instance2_idx_stride( + buffer, + program->index_stride); + pvr_write_draw_indirect_elements_base_instance2_idx_base( + buffer, + program->index_buffer); + pvr_write_draw_indirect_elements_base_instance2_idx_header( + buffer, + program->index_block_header); + pvr_write_draw_indirect_elements_base_instance2_immediates( + buffer); + } + } else { + pvr_write_draw_indirect_elements2_di_data(buffer, + program->arg_buffer & + ~0xfull, + dev_info); + pvr_write_draw_indirect_elements2_write_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements2_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements2_num_views(buffer, + program->num_views); + pvr_write_draw_indirect_elements2_idx_stride(buffer, + program->index_stride); + pvr_write_draw_indirect_elements2_idx_base(buffer, + program->index_buffer); + pvr_write_draw_indirect_elements2_idx_header( + buffer, + program->index_block_header); + pvr_write_draw_indirect_elements2_immediates(buffer); + } + break; + case 3: + if (program->support_base_instance) { + if (program->increment_draw_id) { + pvr_write_draw_indirect_elements_base_instance_drawid3_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance_drawid3_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride( + buffer, + program->index_stride); + pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base( + buffer, + program->index_buffer); + pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header( + buffer, + program->index_block_header); + pvr_write_draw_indirect_elements_base_instance_drawid3_immediates( + buffer); + } else { + pvr_write_draw_indirect_elements_base_instance3_di_data( + buffer, + program->arg_buffer & ~0xfull, + dev_info); + pvr_write_draw_indirect_elements_base_instance3_write_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance3_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements_base_instance3_num_views( + buffer, + program->num_views); + pvr_write_draw_indirect_elements_base_instance3_idx_stride( + buffer, + program->index_stride); + pvr_write_draw_indirect_elements_base_instance3_idx_base( + buffer, + program->index_buffer); + pvr_write_draw_indirect_elements_base_instance3_idx_header( + buffer, + program->index_block_header); + pvr_write_draw_indirect_elements_base_instance3_immediates( + buffer); + } + } else { + pvr_write_draw_indirect_elements3_di_data(buffer, + program->arg_buffer & + ~0xfull, + dev_info); + pvr_write_draw_indirect_elements3_write_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements3_flush_vdm( + buffer, + program->index_list_addr_buffer); + pvr_write_draw_indirect_elements3_num_views(buffer, + program->num_views); + pvr_write_draw_indirect_elements3_idx_stride(buffer, + program->index_stride); + pvr_write_draw_indirect_elements3_idx_base(buffer, + program->index_buffer); + pvr_write_draw_indirect_elements3_idx_header( + buffer, + program->index_block_header); + pvr_write_draw_indirect_elements3_immediates(buffer); + } + break; + } + } +} diff --git a/src/imagination/vulkan/pds/pvr_pds.h b/src/imagination/vulkan/pds/pvr_pds.h new file mode 100644 index 00000000000..18307c9bbfd --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds.h @@ -0,0 +1,1161 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_PDS_H +#define PVR_PDS_H + +#include + +#include "pvr_device_info.h" +#include "pvr_limits.h" +#include "pds/pvr_rogue_pds_defs.h" +#include "util/macros.h" + +#ifdef __cplusplus +# define restrict __restrict__ +#endif + +/***************************************************************************** + Macro definitions +*****************************************************************************/ + +/* Based on Maximum number of passes that may emit DOUTW x Maximum number that + * might be emitted. + */ +#define PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW 6 +/* Based on Maximum number of passes that may emit DOUTW x Maximum number that + * might be emitted. + */ +#define PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW 3 +/* Based on max(max(UBOs,cbuffers), numTextures). */ +#define PVR_PDS_MAX_NUM_DMA_KICKS 32 +#define PVR_PDS_NUM_VERTEX_STREAMS 32 +#define PVR_PDS_NUM_VERTEX_ELEMENTS 32 +#define PVR_MAXIMUM_ITERATIONS 128 + +#define PVR_PDS_NUM_COMPUTE_INPUT_REGS 3 + +#define PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) \ + PVR_HAS_FEATURE(dev_info, compute_morton_capable) && \ + !PVR_HAS_ERN(dev_info, 45493) + +/* FIXME: Change BIL to SPV. */ +/* Any variable location can have at most 4 32-bit components. */ +#define BIL_COMPONENTS_PER_LOCATION 4 + +/* Maximum number of DDMAD's that may be performed (Num attribs * Num DMA's per + * attribute). + */ +#define PVR_MAX_VERTEX_ATTRIB_DMAS \ + (PVR_MAX_VERTEX_INPUT_BINDINGS * BIL_COMPONENTS_PER_LOCATION) + +/***************************************************************************** + Typedefs +*****************************************************************************/ + +/* FIXME: We might need to change some bools to this. */ +typedef uint32_t PVR_PDS_BOOL; + +/***************************************************************************** + Enums +*****************************************************************************/ + +enum pvr_pds_generate_mode { + PDS_GENERATE_SIZES, + PDS_GENERATE_CODE_SEGMENT, + PDS_GENERATE_DATA_SEGMENT, + PDS_GENERATE_CODEDATA_SEGMENTS +}; + +enum pvr_pds_store_type { PDS_COMMON_STORE, PDS_UNIFIED_STORE }; + +enum pvr_pds_vertex_attrib_program_type { + PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC, + PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE, + PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT, + PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT +}; + +/***************************************************************************** + Structure definitions +*****************************************************************************/ + +struct pvr_psc_register { + uint32_t num; + + unsigned int size; /* size of each element. */ + unsigned int dim : 4; /* max number of elements. */ + unsigned int index; /* offset into array. */ + + unsigned int cast; + + unsigned int type; + uint64_t name; + bool auto_assign; + unsigned int original_type; +}; + +struct pvr_psc_program_output { + const uint32_t *code; + + struct pvr_psc_register *data; + unsigned int data_count; + + unsigned int data_size_aligned; + unsigned int code_size_aligned; + unsigned int temp_size_aligned; + + unsigned int data_size; + unsigned int code_size; + unsigned int temp_size; + + void (*write_data)(void *data, uint32_t *buffer); +}; + +struct pvr_pds_usc_task_control { + uint64_t src0; +}; + +/* Up to 4 64-bit state words currently supported. */ +#define PVR_PDS_MAX_NUM_DOUTW_CONSTANTS 4 + +/* Structure for DOUTW. */ +struct pvr_pds_doutw_control { + enum pvr_pds_store_type dest_store; + uint32_t num_const64; + uint64_t doutw_data[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS]; + bool last_instruction; + + uint32_t *data_segment; + uint32_t data_size; + uint32_t code_size; +}; + +/* Structure representing the PDS pixel event program. + * + * data_segment - pointer to the data segment + * task_control - USC task control words + * emit_words - array of Emit words + * data_size - size of data segment + * code_size - size of code segment + */ +struct pvr_pds_event_program { + uint32_t *data_segment; + struct pvr_pds_usc_task_control task_control; + + uint32_t num_emit_word_pairs; + uint32_t *emit_words; + + uint32_t data_size; + uint32_t code_size; +}; + +/* + * Structure representing the PDS pixel shader secondary attribute program. + * + * data_segment - pointer to the data segment + * + * num_uniform_dma_kicks - number of Uniform DMA kicks + * uniform_dma_control - array of Uniform DMA control words + * uniform_dma_address - array of Uniform DMA address words + * + * num_texture_dma_kicks - number of Texture State DMA kicks + * texture_dma_control - array of Texture State DMA control words + * texture_dma_address - array of Texture State DMA address words + * + * data_size - size of data segment + * code_size - size of code segment + * + * temps_used - PDS Temps + */ +struct pvr_pds_pixel_shader_sa_program { + uint32_t *data_segment; + + uint32_t num_dword_doutw; + uint32_t dword_doutw_value[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW]; + uint32_t dword_doutw_control[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW]; + + uint32_t num_q_word_doutw; + uint32_t q_word_doutw_value[2 * PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW]; + uint32_t q_word_doutw_control[PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW]; + + uint32_t num_uniform_dma_kicks; + uint64_t uniform_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS]; + uint32_t uniform_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS]; + + uint32_t num_texture_dma_kicks; + uint64_t texture_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS]; + uint32_t texture_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS]; + + bool kick_usc; + bool write_tile_position; + uint32_t tile_position_attr_dest; + struct pvr_pds_usc_task_control usc_task_control; + + bool clear; + uint32_t *clear_color; + uint32_t clear_color_dest_reg; + bool packed_clear; + + uint32_t data_size; + uint32_t code_size; + + uint32_t temps_used; +}; + +/* Structure representing the PDS pixel shader program. + * + * data_segment - pointer to the data segment + * usc_task_control - array of USC task control words + * + * data_size - size of data segment + * code_size - size of code segment + */ +struct pvr_pds_kickusc_program { + uint32_t *data_segment; + struct pvr_pds_usc_task_control usc_task_control; + + uint32_t data_size; + uint32_t code_size; +}; + +/* Structure representing the PDS fence/doutc program. + * + * data_segment - pointer to the data segment + * data_size - size of data segment + * code_size - size of code segment + */ +struct pvr_pds_fence_program { + uint32_t *data_segment; + uint32_t fence_constant_word; + uint32_t data_size; + uint32_t code_size; +}; + +/* Structure representing the PDS coefficient loading. + * + * data_segment - pointer to the data segment + * num_fpu_iterators - number of FPU iterators + * FPU_iterators - array of FPU iterator control words + * destination - array of Common Store destinations + * + * data_size - size of data segment + * code_size - size of code segment + */ +struct pvr_pds_coeff_loading_program { + uint32_t *data_segment; + uint32_t num_fpu_iterators; + uint32_t FPU_iterators[PVR_MAXIMUM_ITERATIONS]; + uint32_t destination[PVR_MAXIMUM_ITERATIONS]; + + uint32_t data_size; + uint32_t code_size; + + uint32_t temps_used; +}; + +/* Structure representing the PDS vertex shader secondary attribute program. + * + * data_segment - pointer to the data segment + * num_dma_kicks - number of DMA kicks + * dma_control - array of DMA control words + * dma_address - array of DMA address words + * + * data_size - size of data segment + * code_size - size of code segment + */ +struct pvr_pds_vertex_shader_sa_program { + uint32_t *data_segment; + + /* num_uniform_dma_kicks, uniform_dma_address, uniform_dma_control, are not + * used for generating PDS data section and code section, they are currently + * only used to simpler the driver implementation. The driver should correct + * these information into num_dma_kicks, dma_address and dma_control to get + * the PDS properly generated. + */ + + uint32_t num_dword_doutw; + uint32_t dword_doutw_value[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW]; + uint32_t dword_doutw_control[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW]; + + uint32_t num_q_word_doutw; + uint32_t q_word_doutw_value[2 * PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW]; + uint32_t q_word_doutw_control[PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW]; + + uint32_t num_uniform_dma_kicks; + uint64_t uniform_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS]; + uint32_t uniform_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS]; + + uint32_t num_texture_dma_kicks; + uint64_t texture_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS]; + uint32_t texture_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS]; + + uint32_t num_dma_kicks; + uint64_t dma_address[PVR_PDS_MAX_NUM_DMA_KICKS]; + uint32_t dma_control[PVR_PDS_MAX_NUM_DMA_KICKS]; + + bool kick_usc; + struct pvr_pds_usc_task_control usc_task_control; + + /* Shared register buffer base address (VDM/CDM context load case only). */ + bool clear_pds_barrier; + + uint32_t data_size; + uint32_t code_size; +}; + +/* Structure representing a PDS vertex stream element. + * + * There are two types of element, repeat DMA and non-repeat DMA. + * + * Non repeat DMA are the classic DMA of some number of bytes from an offset + * into contiguous registers. It is assumed the address and size are dword + * aligned. To use this, specify 0 for the component size. Each four bytes read + * will go to the next HW register. + * + * Repeat DMA enables copying of sub dword amounts at non dword aligned + * addresses. To use this, specify the component size as either 1,2,3 or 4 + * bytes. Size specifies the number of components, and each component read + * will go to the next HW register. + * + * In both cases, HW registers are written contiguously. + * + * offset - offset of the vertex stream element + * size - size of the vertex stream element in bytes for non repeat DMA, or + * number of components for repeat DMA. + * reg - first vertex stream element register to DMA to. + * component_size - Size of component for repeat DMA, or 0 for non repeat dma. + */ +struct pvr_pds_vertex_element { + uint32_t offset; + uint32_t size; + uint16_t reg; + uint16_t component_size; +}; + +/* Structure representing a PDS vertex stream. + * + * instance_data - flag whether the vertex stream is indexed or instance data + * read_back - If True, vertex is reading back data output by GPU earlier in + * same kick. This will enable MCU coherency if relevant. + * multiplier - vertex stream frequency multiplier + * shift - vertex stream frequency shift + * address - vertex stream address in bytes + * buffer_size_in_bytes - buffer size in bytes if vertex attribute is sourced + * from buffer object + * stride - vertex stream stride in bytes + * num_vertices - number of vertices in buffer. Used for OOB checking. + - 0 = disable oob checking. + * num_elements - number of vertex stream elements + * elements - array of vertex stream elements + * use_ddmadt - When the has_pds_ddmadt feature is enabled. Boolean allowing + * DDMADT to be use per stream element. + */ +struct pvr_pds_vertex_stream { + bool current_state; + bool instance_data; + bool read_back; + uint32_t multiplier; + uint32_t shift; + uint64_t address; + uint32_t buffer_size_in_bytes; + uint32_t stride; + uint32_t num_vertices; + uint32_t num_elements; + struct pvr_pds_vertex_element elements[PVR_PDS_NUM_VERTEX_ELEMENTS]; + + bool use_ddmadt; +}; + +/* Structure representing the PDS vertex shader program. + * + * This structure describes the USC code and vertex buffers required + * by the PDS vertex loading program. + * + * data_segment - Pointer to the data segment. + * usc_task_control - Description of USC task for vertex shader program. + * num_streams - Number of vertex streams. + * iterate_vtx_id - If set, the vertex id should be iterated. + * vtx_id_register - The register to iterate the VertexID into (if applicable) + * vtx_id_modifier - Value to pvr_add/SUB from index value received by PDS. + * This is used because the index value received by PDS has + * INDEX_OFFSET added, and generally VertexID wouldn't. + * vtx_id_sub_modifier - If true, vtx_id_modifier is subtracted, else added. + * iterate_instance_id - If set, the instance id should be iterated. + * instance_id_register - The register to iterate the InstanceID into (if + * applicable). The vertex and instance id will both be + * iterated as unsigned ints + * + * iterate_remap_id - Should be set to true if vertex shader needs + * VS_REMAPPED_INDEX_ID (e.g. Another TA shader runs after + * it). + * null_idx - Indicates no index buffer is bound, so every index should be + * null_idx_value. + * null_idx_value - The value to use as index if null_idx set. + * data_size - Size of data segment, in dwords. Output by call to + * pvr_pds_vertex_shader, and used as input when generating data. + * code_size - Size of code segment. Output by call to pvr_pds_vertex_shader. + * This is the number of dword instructions that are/were generated. + * temps_used - Number of temporaries used. Output by call to + * pvr_pds_vertex_shader. + */ +struct pvr_pds_vertex_shader_program { + uint32_t *data_segment; + struct pvr_pds_usc_task_control usc_task_control; + uint32_t num_streams; + + bool iterate_vtx_id; + uint32_t vtx_id_register; + uint32_t vtx_id_modifier; + bool vtx_id_sub_modifier; + + bool iterate_instance_id; + uint32_t instance_id_register; + uint32_t instance_ID_modifier; + uint32_t base_instance; + + bool iterate_remap_id; + + bool null_idx; + uint32_t null_idx_value; + + uint32_t *stream_patch_offsets; + uint32_t num_stream_patches; + + uint32_t data_size; + uint32_t code_size; + uint32_t temps_used; + uint32_t ddmadt_enables; + uint32_t skip_stream_flag; + + bool draw_indirect; + bool indexed; + + struct pvr_pds_vertex_stream streams[PVR_PDS_NUM_VERTEX_STREAMS]; +}; + +/* Structure representing PDS shared reg storing program. */ +struct pvr_pds_shared_storing_program { + struct pvr_pds_doutw_control doutw_control; /*!< DOUTW state */ + struct pvr_pds_kickusc_program usc_task; /*!< DOUTU state */ + bool cc_enable; /*!< cc bit is set on the doutu instruction. */ + uint32_t data_size; /*!< total data size, non-aligned. */ + uint32_t code_size; /*!< total code size, non-aligned. */ +}; + +#define PVR_MAX_STREAMOUT_BUFFERS 4 + +/* Structure representing stream out init PDS programs. */ +struct pvr_pds_stream_out_init_program { + /* --- Input to PDS_STREAM_OUT_INT_PROGRAM --- */ + + /* Number of buffers to load/store. + * This indicates the number of entries in the next two arrays. + * Data is loaded/stored contiguously to persistent temps. + */ + uint32_t num_buffers; + + /* Number of persistent temps in dword to load/store for each buffer. */ + uint32_t pds_buffer_data_size[PVR_MAX_STREAMOUT_BUFFERS]; + /* The device address for loading/storing persistent temps for each buffer. + * If address is zero, then no data is loaded/stored + * into pt registers for the buffer. + */ + uint64_t dev_address_for_buffer_data[PVR_MAX_STREAMOUT_BUFFERS]; + + /* PDS state update Stream Out Init Programs. */ + uint32_t stream_out_init_pds_data_size; + uint32_t stream_out_init_pds_code_size; +}; + +/* Structure representing stream out terminate PDS program. */ +struct pvr_pds_stream_out_terminate_program { + /* Input to PDS_STREAM_OUT_TERMINATE_PROGRAM. + * + * Number of persistent temps in dword used in stream out PDS programs needs + * to be stored. + * The terminate program writes pds_persistent_temp_size_to_store number + * persistent temps to dev_address_for_storing_persistent_temp. + */ + uint32_t pds_persistent_temp_size_to_store; + + /* The device address for storing persistent temps. */ + uint64_t dev_address_for_storing_persistent_temp; + + /* PPP state update Stream Out Program for stream out terminate. */ + uint32_t stream_out_terminate_pds_data_size; + uint32_t stream_out_terminate_pds_code_size; +}; + +/* Structure representing the PDS compute shader program. + * This structure describes the USC code and compute buffers required + * by the PDS compute task loading program + * + * data_segment + * pointer to the data segment + * usc_task_control + * Description of USC task for compute shader program. + * data_size + * Size of data segment, in dwords. + * Output by call to pvr_pds_compute_shader, and used as input when + * generating data. code_size Size of code segment. Output by call to + * pvr_pds_compute_shader. This is the number of dword instructions that + * are/were generated. temps_used Number of temporaries used. Output by call + *to pvr_pds_compute_shader. highest_temp The highest temp number used. Output + *by call to pvr_pds_compute_shader coeff_update_task_branch_size The number of + * instructions we need to branch over to skip the coefficient update task. + */ + +struct pvr_pds_compute_shader_program { + uint32_t *data_segment; + struct pvr_pds_usc_task_control usc_task_control; + struct pvr_pds_usc_task_control usc_task_control_coeff_update; + + uint32_t data_size; + uint32_t code_size; + + uint32_t temps_used; + uint32_t highest_temp; + + uint32_t local_input_regs[3]; + uint32_t work_group_input_regs[3]; + uint32_t global_input_regs[3]; + + uint32_t barrier_coefficient; + + bool fence; + + bool flattened_work_groups; + + bool clear_pds_barrier; + + bool has_coefficient_update_task; + + uint32_t coeff_update_task_branch_size; + + bool add_base_workgroup; + uint32_t base_workgroup_constant_offset_in_dwords[3]; + + bool kick_usc; + + bool conditional_render; + uint32_t cond_render_const_offset_in_dwords; + uint32_t cond_render_pred_temp; +}; +struct pvr_pds_ldst_control { + uint64_t cache_control_const; +}; + +/* Define a value we can use as a register number in the driver to denote that + * the value is unused. + */ +#define PVR_PDS_COMPUTE_INPUT_REG_UNUSED 0xFFFFFFFFU + +/***************************************************************************** + function declarations +*****************************************************************************/ + +/***************************************************************************** + Constructors +*****************************************************************************/ + +void pvr_pds_pixel_shader_sa_initialize( + struct pvr_pds_pixel_shader_sa_program *program); +void pvr_pds_compute_shader_initialize( + struct pvr_pds_compute_shader_program *program); + +/* Utility */ + +uint32_t pvr_pds_append_constant64(uint32_t *constants, + uint64_t constant_value, + uint32_t *data_size); + +uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control, + uint64_t *dma_address, + uint32_t dest_offset, + uint32_t dma_size, + uint64_t src_address, + const struct pvr_device_info *dev_info); + +void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control, + uint64_t execution_address, + uint32_t usc_temps, + uint32_t sample_rate, + uint32_t phase_rate_change); + +/* Pixel */ +#define pvr_pds_set_sizes_pixel_shader(X) \ + pvr_pds_kick_usc(X, NULL, 0, false, PDS_GENERATE_SIZES) +#define pvr_pds_generate_pixel_shader_program(X, Y) \ + pvr_pds_kick_usc(X, Y, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS) + +#define pvr_pds_generate_VDM_sync_program(X, Y) \ + pvr_pds_kick_usc(X, Y, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS) + +uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode); + +uint32_t * +pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict psControl, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); + +uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program, + uint32_t *restrict buffer, + uint32_t start_next_constant, + bool cc_enabled, + enum pvr_pds_generate_mode gen_mode); + +/* Pixel Secondary */ +#define pvr_pds_set_sizes_pixel_shader_sa_uniform_data(X, Y) \ + pvr_pds_pixel_shader_uniform_texture_data(X, \ + NULL, \ + PDS_GENERATE_SIZES, \ + true, \ + Y) +#define pvr_pds_set_sizes_pixel_shader_sa_texture_data(X, Y) \ + pvr_pds_pixel_shader_uniform_texture_data(X, \ + NULL, \ + PDS_GENERATE_SIZES, \ + false, \ + Y) +#define pvr_pds_set_sizes_pixel_shader_uniform_texture_code(X) \ + pvr_pds_pixel_shader_uniform_texture_code(X, NULL, PDS_GENERATE_SIZES) + +#define pvr_pds_generate_pixel_shader_sa_texture_state_data(X, Y, Z) \ + pvr_pds_pixel_shader_uniform_texture_data(X, \ + Y, \ + PDS_GENERATE_DATA_SEGMENT, \ + false, \ + Z) + +#define pvr_pds_generate_pixel_shader_sa_code_segment(X, Y) \ + pvr_pds_pixel_shader_uniform_texture_code(X, Y, PDS_GENERATE_CODE_SEGMENT) + +uint32_t *pvr_pds_pixel_shader_uniform_texture_data( + struct pvr_pds_pixel_shader_sa_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + bool uniform, + const struct pvr_device_info *dev_info); + +uint32_t *pvr_pds_pixel_shader_uniform_texture_code( + struct pvr_pds_pixel_shader_sa_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode); + +/* Vertex */ +#define pvr_pds_set_sizes_vertex_shader(X, Y) \ + pvr_pds_vertex_shader(X, NULL, PDS_GENERATE_SIZES, Y) + +#define pvr_pds_generate_vertex_shader_data_segment(X, Y, Z) \ + pvr_pds_vertex_shader(X, Y, PDS_GENERATE_DATA_SEGMENT, Z) + +#define pvr_pds_generate_vertex_shader_code_segment(X, Y, Z) \ + pvr_pds_vertex_shader(X, Y, PDS_GENERATE_CODE_SEGMENT, Z) + +uint32_t * +pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); + +/* Compute */ +uint32_t * +pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); + +#define pvr_pds_set_sizes_compute_shader(X, Y) \ + pvr_pds_compute_shader(X, NULL, PDS_GENERATE_SIZES, Y) + +#define pvr_pds_generate_compute_shader_data_segment(X, Y, Z) \ + pvr_pds_compute_shader(X, Y, PDS_GENERATE_DATA_SEGMENT, Z) + +#define pvr_pds_generate_compute_shader_code_segment(X, Y, Z) \ + pvr_pds_compute_shader(X, Y, PDS_GENERATE_CODE_SEGMENT, Z) + +/* Vertex Secondary */ +#define pvr_pds_set_sizes_vertex_shader_sa(X, Y) \ + pvr_pds_vertex_shader_sa(X, NULL, PDS_GENERATE_SIZES, Y) + +#define pvr_pds_generate_vertex_shader_sa_data_segment(X, Y, Z) \ + pvr_pds_vertex_shader_sa(X, Y, PDS_GENERATE_DATA_SEGMENT, Z) + +#define pvr_pds_generate_vertex_shader_sa_code_segment(X, Y, Z) \ + pvr_pds_vertex_shader_sa(X, Y, PDS_GENERATE_CODE_SEGMENT, Z) + +uint32_t *pvr_pds_vertex_shader_sa( + struct pvr_pds_vertex_shader_sa_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); + +/* Pixel Event */ +#define pvr_pds_set_sizes_pixel_event(X) \ + pvr_pds_generate_pixel_event(X, NULL, PDS_GENERATE_SIZES, NULL) + +#define pvr_pds_generate_pixel_event_data_segment(X, Y, Z) \ + pvr_pds_generate_pixel_event(X, Y, PDS_GENERATE_DATA_SEGMENT, Z) + +#define pvr_pds_generate_pixel_event_code_segment(X, Y, Z) \ + pvr_pds_generate_pixel_event(X, Y, PDS_GENERATE_CODE_SEGMENT, Z) + +uint32_t * +pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); + +/* Coefficient Loading */ +#define pvr_pds_set_sizes_coeff_loading(X) \ + pvr_pds_coefficient_loading(X, NULL, PDS_GENERATE_SIZES) + +#define pvr_pds_generate_coeff_loading_program(X, Y) \ + pvr_pds_coefficient_loading(X, Y, PDS_GENERATE_CODE_SEGMENT) + +uint32_t *pvr_pds_coefficient_loading( + struct pvr_pds_coeff_loading_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode); + +/* Compute DM barrier-specific conditional code */ +uint32_t *pvr_pds_generate_compute_barrier_conditional( + uint32_t *buffer, + enum pvr_pds_generate_mode gen_mode); + +/* Shared register storing */ +uint32_t *pvr_pds_generate_shared_storing_program( + struct pvr_pds_shared_storing_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); + +/*Shared register loading */ +uint32_t *pvr_pds_generate_fence_terminate_program( + struct pvr_pds_fence_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); + +/* CDM Shared register loading */ +uint32_t *pvr_pds_generate_compute_shared_loading_program( + struct pvr_pds_shared_storing_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); + +/* Stream out */ +uint32_t *pvr_pds_generate_stream_out_init_program( + struct pvr_pds_stream_out_init_program *restrict program, + uint32_t *restrict buffer, + bool store_mode, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); + +uint32_t *pvr_pds_generate_stream_out_terminate_program( + struct pvr_pds_stream_out_terminate_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); + +/* Structure representing DrawIndirect PDS programs. */ +struct pvr_pds_drawindirect_program { + /* --- Input to pvr_pds_drawindirect_program --- */ + + /* Address of the index list block in the VDM control stream. + * This must point to a 128-bit aligned index list header. + */ + uint64_t index_list_addr_buffer; + /* Address of arguments for Draw call. Layout is defined by eArgFormat. */ + uint64_t arg_buffer; + + /* Address of index buffer. */ + uint64_t index_buffer; + + /* The raw (without addr msb in [7:0]) index block header. */ + uint32_t index_block_header; + + /* Number of bytes per index. */ + uint32_t index_stride; + + /* Used during/after compilation to fill in constant buffer. */ + struct pvr_psc_register data[32]; + + /* Results of compilation. */ + struct pvr_psc_program_output program; + + /* This is used for ARB_multi_draw_indirect. */ + unsigned int count; + unsigned int stride; + + /* Internal stuff. */ + unsigned int num_views; + + bool support_base_instance; + bool increment_draw_id; +}; + +void pvr_pds_generate_draw_arrays_indirect( + struct pvr_pds_drawindirect_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); +void pvr_pds_generate_draw_elements_indirect( + struct pvr_pds_drawindirect_program *restrict program, + uint32_t *restrict buffer, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); + +uint64_t pvr_pds_encode_st_src0(uint64_t src, + uint64_t count4, + uint64_t dst_add, + bool write_through, + const struct pvr_device_info *dev_info); + +uint64_t pvr_pds_encode_ld_src0(uint64_t dest, + uint64_t count8, + uint64_t src_add, + bool cached, + const struct pvr_device_info *dev_info); + +uint32_t *pvr_pds_generate_single_ldst_instruction( + bool ld, + const struct pvr_pds_ldst_control *control, + uint32_t temp_index, + uint64_t address, + uint32_t count, + uint32_t *next_constant, + uint32_t *total_data_size, + uint32_t *total_code_size, + uint32_t *buffer, + bool data_fence, + enum pvr_pds_generate_mode gen_mode, + const struct pvr_device_info *dev_info); +struct pvr_pds_descriptor_set { + unsigned int descriptor_set; /* id of the descriptor set. */ + unsigned int size_in_dwords; /* Number of dwords to transfer. */ + unsigned int destination; /* Destination shared register to which + * descriptor entries should be loaded. + */ + bool primary; /* Primary or secondary? */ + unsigned int offset_in_dwords; /* Offset from the start of the descriptor + * set to start DMA'ing from. + */ +}; + +#define PVR_BUFFER_TYPE_UBO (0) +#define PVR_BUFFER_TYPES_COMPILE_TIME (1) +#define PVR_BUFFER_TYPE_BLEND_CONSTS (2) +#define PVR_BUFFER_TYPE_PUSH_CONSTS (3) +#define PVR_BUFFER_TYPES_BUFFER_LENGTHS (4) +#define PVR_BUFFER_TYPE_DYNAMIC (5) +#define PVR_BUFFER_TYPES_UBO_ZEROING (6) +#define PVR_BUFFER_TYPE_INVALID (~0) + +struct pvr_pds_buffer { + uint16_t type; + + uint16_t size_in_dwords; + uint32_t destination; + + union { + uint32_t *data; + struct { + uint32_t buffer_id; + uint16_t desc_set; + uint16_t binding; + uint32_t source_offset; + }; + }; +}; + +#define PVR_PDS_MAX_BUFFERS (24) + +struct pvr_descriptor_program_input { + /* User-specified descriptor sets. */ + unsigned int descriptor_set_count; + struct pvr_pds_descriptor_set descriptor_sets[8]; + + /* "State" buffers, including: + * compile-time constants + * blend constants + * push constants + * UBOs that have been hoisted. + */ + uint32_t buffer_count; + struct pvr_pds_buffer buffers[PVR_PDS_MAX_BUFFERS]; + + uint32_t blend_constants_used_mask; + + bool secondary_program_present; + struct pvr_pds_usc_task_control secondary_task_control; + + bool must_not_be_empty; +}; + +#define PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED BITFIELD_BIT(0U) +#define PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED BITFIELD_BIT(1U) +#define PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT BITFIELD_BIT(2U) +#define PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT BITFIELD_BIT(3U) +#define PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED BITFIELD_BIT(4U) + +/* BaseVertex is used in shader. */ +#define PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED BITFIELD_BIT(5U) + +#define PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED BITFIELD_BIT(6U) + +#define PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE BITFIELD_BIT(0U) + +struct pvr_pds_vertex_dma { + /* Try and keep this structure packing as small as possible. */ + uint16_t offset; + uint16_t stride; + + uint8_t flags; + uint8_t size_in_dwords; + uint8_t component_size_in_bytes; + uint8_t destination; + uint8_t binding_index; + uint32_t divisor; + + uint16_t robustness_buffer_offset; +}; + +struct pvr_pds_vertex_primary_program_input { + /* Control for the DOUTU that kicks the vertex USC shader. */ + struct pvr_pds_usc_task_control usc_task_control; + /* List of DMAs (of size dma_count). */ + struct pvr_pds_vertex_dma *dma_list; + uint32_t dma_count; + + /* ORd bitfield of PVR_PDS_VERTEX_FLAGS_* */ + uint32_t flags; + + uint16_t vertex_id_register; + uint16_t instance_id_register; + + /* API provided baseInstance (i.e. not from drawIndirect). */ + uint32_t base_instance; + + uint16_t base_instance_register; + uint16_t base_vertex_register; + uint16_t draw_index_register; +}; + +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_NULL (0) +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL64 (1) +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32 (2) +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET (3) +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER (4) +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER (5) +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS (6) +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS (7) +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS (8) + +/* Use if pds_ddmadt is enabled. */ +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE (9) + +/* Use if pds_ddmadt is not enabled. */ +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX (9) + +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE (10) +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING (11) +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX (12) +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_WORKGROUP (13) +#define PVR_PDS_CONST_MAP_ENTRY_TYPE_COND_RENDER (14) + +/* We pack all the following structs tightly into a buffer using += sizeof(x) + * offsets, this can lead to data that is not native aligned. Supplying the + * packed attribute indicates that unaligned accesses may be required, and the + * aligned attribute causes the size of the structure to be aligned to a + * specific boundary. + */ +#define PVR_ALIGNED __attribute__((packed, aligned(1))) + +struct pvr_const_map_entry { + uint8_t type; + uint8_t const_offset; +} PVR_ALIGNED; + +struct pvr_const_map_entry_literal32 { + uint8_t type; + uint8_t const_offset; + + uint32_t literal_value; +} PVR_ALIGNED; + +struct pvr_const_map_entry_literal64 { + uint8_t type; + uint8_t const_offset; + + uint64_t literal_value; +} PVR_ALIGNED; + +struct pvr_const_map_entry_descriptor_set { + uint8_t type; + uint8_t const_offset; + + uint32_t descriptor_set; + PVR_PDS_BOOL primary; + uint32_t offset_in_dwords; +} PVR_ALIGNED; + +struct pvr_const_map_entry_constant_buffer { + uint8_t type; + uint8_t const_offset; + + uint16_t buffer_id; + uint16_t desc_set; + uint16_t binding; + uint32_t offset; + uint32_t size_in_dwords; +} PVR_ALIGNED; + +struct pvr_const_map_entry_constant_buffer_zeroing { + uint8_t type; + uint8_t const_offset; + + uint16_t buffer_id; + uint32_t offset; + uint32_t size_in_dwords; +} PVR_ALIGNED; + +struct pvr_const_map_entry_special_buffer { + uint8_t type; + uint8_t const_offset; + + uint8_t buffer_type; + uint32_t buffer_index; +} PVR_ALIGNED; + +struct pvr_const_map_entry_doutu_address { + uint8_t type; + uint8_t const_offset; + + uint64_t doutu_control; +} PVR_ALIGNED; + +struct pvr_const_map_entry_vertex_attribute_address { + uint8_t type; + uint8_t const_offset; + + uint16_t offset; + uint16_t stride; + uint8_t binding_index; + uint8_t size_in_dwords; +} PVR_ALIGNED; + +struct pvr_const_map_entry_robust_vertex_attribute_address { + uint8_t type; + uint8_t const_offset; + + uint16_t offset; + uint16_t stride; + uint8_t binding_index; + uint8_t size_in_dwords; + uint16_t robustness_buffer_offset; + uint8_t component_size_in_bytes; +} PVR_ALIGNED; + +struct pvr_const_map_entry_vertex_attribute_max_index { + uint8_t type; + uint8_t const_offset; + + uint8_t binding_index; + uint8_t size_in_dwords; + uint16_t offset; + uint16_t stride; + uint8_t component_size_in_bytes; +} PVR_ALIGNED; + +struct pvr_const_map_entry_base_instance { + uint8_t type; + uint8_t const_offset; +} PVR_ALIGNED; + +struct pvr_const_map_entry_base_vertex { + uint8_t type; + uint8_t const_offset; +}; + +struct pvr_pds_const_map_entry_base_workgroup { + uint8_t type; + uint8_t const_offset; + uint8_t workgroup_component; +} PVR_ALIGNED; + +struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size { + uint8_t type; + uint8_t const_offset; + uint8_t binding_index; +} PVR_ALIGNED; + +struct pvr_pds_const_map_entry_cond_render { + uint8_t type; + uint8_t const_offset; + + uint32_t cond_render_pred_temp; +} PVR_ALIGNED; + +struct pvr_pds_info { + uint32_t temps_required; + uint32_t code_size_in_dwords; + uint32_t data_size_in_dwords; + + uint32_t entry_count; + size_t entries_size_in_bytes; + size_t entries_written_size_in_bytes; + struct pvr_const_map_entry *entries; +}; + +void pvr_pds_generate_descriptor_upload_program( + struct pvr_descriptor_program_input *input_program, + uint32_t *code_section, + struct pvr_pds_info *info); +void pvr_pds_generate_vertex_primary_program( + struct pvr_pds_vertex_primary_program_input *input_program, + uint32_t *code, + struct pvr_pds_info *info, + bool use_robust_vertex_fetch, + const struct pvr_device_info *dev_info); + +/** + * Generate USC address. + * + * \param doutu Location to write the generated address. + * \param execution_address Address to generate from. + */ +static ALWAYS_INLINE void +pvr_set_usc_execution_address64(uint64_t *doutu, uint64_t execution_address) +{ + doutu[0] |= (((execution_address >> + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_ALIGNSHIFT) + << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_SHIFT) & + ~PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_CLRMSK); +} + +#endif /* PVR_PDS_H */ diff --git a/src/imagination/vulkan/pds/pvr_pds_disasm.c b/src/imagination/vulkan/pds/pvr_pds_disasm.c new file mode 100644 index 00000000000..66ccb3e2409 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_disasm.c @@ -0,0 +1,1134 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "pvr_rogue_pds_defs.h" +#include "pvr_rogue_pds_encode.h" +#include "pvr_rogue_pds_disasm.h" +#include "util/macros.h" + +static void pvr_error_check(PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error) +{ + if (err_callback) + err_callback(error); + else + fprintf(stderr, "ERROR: %s\n", error.text); +} + +#define X(a) #a, +static const char *const instructions[] = { PVR_INSTRUCTIONS }; +#undef X + +static void error_reg_range(uint32_t raw, + void *context, + PVR_ERR_CALLBACK err_callback, + uint32_t parameter, + struct pvr_dissassembler_error error) +{ + char param[32]; + + error.type = PVR_PDS_ERR_PARAM_RANGE; + error.instruction = error.instruction; + error.parameter = parameter; + error.raw = raw; + + if (parameter == 0) + snprintf(param, sizeof(param), "dst"); + else + snprintf(param, sizeof(param), "src%u", parameter - 1); + + error.text = malloc(PVR_PDS_MAX_INST_STR_LEN); + assert(error.text); + + snprintf(error.text, + PVR_PDS_MAX_INST_STR_LEN, + "Register out of range, instruction: %s, operand: %s, value: %u", + instructions[error.instruction], + param, + raw); + pvr_error_check(err_callback, error); +} + +static struct pvr_operand * +pvr_pds_disassemble_regs32(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction, + uint32_t parameter) +{ + struct pvr_operand *op = calloc(1, sizeof(*op)); + assert(op); + + op->type = UNRESOLVED; + instruction &= PVR_ROGUE_PDSINST_REGS32_MASK; + switch (pvr_pds_inst_decode_field_range_regs32(instruction)) { + case PVR_ROGUE_PDSINST_REGS32_CONST32: + op->type = CONST32; + op->address = instruction - PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER; + op->absolute_address = op->address; + break; + case PVR_ROGUE_PDSINST_REGS32_TEMP32: + op->type = TEMP32; + op->address = instruction - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER; + op->absolute_address = op->address; + break; + case PVR_ROGUE_PDSINST_REGS32_PTEMP32: + op->type = PTEMP32; + op->address = instruction - PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER; + op->absolute_address = op->address; + break; + default: + error_reg_range(instruction, context, err_callback, parameter, error); + } + return op; +} +static struct pvr_operand * +pvr_pds_disassemble_regs32tp(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction, + uint32_t parameter) +{ + struct pvr_operand *op = calloc(1, sizeof(*op)); + assert(op); + + op->type = UNRESOLVED; + instruction &= PVR_ROGUE_PDSINST_REGS32TP_MASK; + switch (pvr_pds_inst_decode_field_range_regs32tp(instruction)) { + case PVR_ROGUE_PDSINST_REGS32TP_TEMP32: + op->type = TEMP32; + op->address = instruction - PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER; + op->absolute_address = op->address; + break; + case PVR_ROGUE_PDSINST_REGS32TP_PTEMP32: + op->type = PTEMP32; + op->address = instruction - PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER; + op->absolute_address = op->address; + break; + default: + error_reg_range(instruction, context, err_callback, parameter, error); + } + return op; +} +static struct pvr_operand * +pvr_pds_disassemble_regs32t(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction, + uint32_t parameter) +{ + struct pvr_operand *op = calloc(1, sizeof(*op)); + assert(op); + + op->type = UNRESOLVED; + instruction &= PVR_ROGUE_PDSINST_REGS32T_MASK; + switch (pvr_pds_inst_decode_field_range_regs32t(instruction)) { + case PVR_ROGUE_PDSINST_REGS32T_TEMP32: + op->type = TEMP32; + op->address = instruction - PVR_ROGUE_PDSINST_REGS32T_TEMP32_LOWER; + op->absolute_address = op->address; + break; + default: + error_reg_range(instruction, context, err_callback, parameter, error); + } + return op; +} + +static struct pvr_operand * +pvr_pds_disassemble_regs64(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction, + uint32_t parameter) +{ + struct pvr_operand *op = calloc(1, sizeof(*op)); + assert(op); + + op->type = UNRESOLVED; + instruction &= PVR_ROGUE_PDSINST_REGS64_MASK; + switch (pvr_pds_inst_decode_field_range_regs64(instruction)) { + case PVR_ROGUE_PDSINST_REGS64_CONST64: + op->type = CONST64; + op->address = instruction - PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER; + op->absolute_address = op->address * 2; + break; + case PVR_ROGUE_PDSINST_REGS64_TEMP64: + op->type = TEMP64; + op->address = instruction - PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER; + op->absolute_address = op->address * 2; + break; + case PVR_ROGUE_PDSINST_REGS64_PTEMP64: + op->type = PTEMP64; + op->address = instruction - PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER; + op->absolute_address = op->address * 2; + break; + default: + error_reg_range(instruction, context, err_callback, parameter, error); + } + + return op; +} +static struct pvr_operand * +pvr_pds_disassemble_regs64t(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction, + uint32_t parameter) +{ + struct pvr_operand *op = calloc(1, sizeof(*op)); + assert(op); + + op->type = UNRESOLVED; + instruction &= PVR_ROGUE_PDSINST_REGS64T_MASK; + switch (pvr_pds_inst_decode_field_range_regs64tp(instruction)) { + case PVR_ROGUE_PDSINST_REGS64T_TEMP64: + op->type = TEMP64; + op->address = instruction - PVR_ROGUE_PDSINST_REGS64T_TEMP64_LOWER; + op->absolute_address = op->address * 2; + break; + default: + error_reg_range(instruction, context, err_callback, parameter, error); + } + return op; +} + +static struct pvr_operand * +pvr_pds_disassemble_regs64C(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction, + uint32_t parameter) +{ + struct pvr_operand *op = calloc(1, sizeof(*op)); + assert(op); + + op->type = UNRESOLVED; + instruction &= PVR_ROGUE_PDSINST_REGS64C_MASK; + switch (pvr_rogue_pds_inst_decode_field_range_regs64c(instruction)) { + case PVR_ROGUE_PDSINST_REGS64C_CONST64: + op->type = CONST64; + op->address = instruction - PVR_ROGUE_PDSINST_REGS64C_CONST64_LOWER; + op->absolute_address = op->address * 2; + break; + default: + error_reg_range(instruction, context, err_callback, parameter, error); + } + return op; +} + +static struct pvr_operand * +pvr_pds_disassemble_regs64tp(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction, + uint32_t parameter) +{ + struct pvr_operand *op = calloc(1, sizeof(*op)); + assert(op); + + op->type = UNRESOLVED; + instruction &= PVR_ROGUE_PDSINST_REGS64TP_MASK; + switch (pvr_pds_inst_decode_field_range_regs64tp(instruction)) { + case PVR_ROGUE_PDSINST_REGS64TP_TEMP64: + op->type = TEMP64; + op->address = instruction - PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER; + op->absolute_address = op->address * 2; + break; + case PVR_ROGUE_PDSINST_REGS64TP_PTEMP64: + op->type = PTEMP64; + op->address = instruction - PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER; + op->absolute_address = op->address * 2; + break; + default: + error_reg_range(instruction, context, err_callback, parameter, error); + } + return op; +} + +#define PVR_TYPE_OPCODE BITFIELD_BIT(31U) +#define PVR_TYPE_OPCODE_SP BITFIELD_BIT(27U) +#define PVR_TYPE_OPCODEB BITFIELD_BIT(30U) + +#define PVR_TYPE_OPCODE_SHIFT 28U +#define PVR_TYPE_OPCODE_SP_SHIFT 23U +#define PVR_TYPE_OPCODEB_SHIFT 29U + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_add64(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction) +{ + struct pvr_add *add = malloc(sizeof(*add)); + assert(add); + + add->instruction.type = INS_ADD64; + add->instruction.next = NULL; + + add->cc = instruction & PVR_ROGUE_PDSINST_ADD64_CC_ENABLE; + add->alum = instruction & PVR_ROGUE_PDSINST_ADD64_ALUM_SIGNED; + add->sna = instruction & PVR_ROGUE_PDSINST_ADD64_SNA_SUB; + + add->src0 = pvr_pds_disassemble_regs64(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_ADD64_SRC0_SHIFT, + 1); + add->src0->instruction = &add->instruction; + add->src1 = pvr_pds_disassemble_regs64(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_ADD64_SRC1_SHIFT, + 2); + add->src1->instruction = &add->instruction; + add->dst = pvr_pds_disassemble_regs64tp(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_ADD64_DST_SHIFT, + 0); + add->dst->instruction = &add->instruction; + + return &add->instruction; +} + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_add32(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction) +{ + struct pvr_add *add = malloc(sizeof(*add)); + assert(add); + + add->instruction.type = INS_ADD32; + add->instruction.next = NULL; + + add->cc = instruction & PVR_ROGUE_PDSINST_ADD32_CC_ENABLE; + add->alum = instruction & PVR_ROGUE_PDSINST_ADD32_ALUM_SIGNED; + add->sna = instruction & PVR_ROGUE_PDSINST_ADD32_SNA_SUB; + + add->src0 = pvr_pds_disassemble_regs32(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_ADD32_SRC0_SHIFT, + 1); + add->src0->instruction = &add->instruction; + add->src1 = pvr_pds_disassemble_regs32(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_ADD32_SRC1_SHIFT, + 2); + add->src1->instruction = &add->instruction; + add->dst = pvr_pds_disassemble_regs32tp(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_ADD32_DST_SHIFT, + 0); + add->dst->instruction = &add->instruction; + + return &add->instruction; +} + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_stm(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction) +{ + struct pvr_stm *stm = malloc(sizeof(*stm)); + assert(stm); + + stm->instruction.next = NULL; + stm->instruction.type = INS_STM; + + stm->cc = instruction & (1 << PVR_ROGUE_PDSINST_STM_CCS_CCS_CC_SHIFT); + stm->ccs_global = instruction & + (1 << PVR_ROGUE_PDSINST_STM_CCS_CCS_GLOBAL_SHIFT); + stm->ccs_so = instruction & (1 << PVR_ROGUE_PDSINST_STM_CCS_CCS_SO_SHIFT); + stm->tst = instruction & (1 << PVR_ROGUE_PDSINST_STM_SO_TST_SHIFT); + + stm->stream_out = (instruction >> PVR_ROGUE_PDSINST_STM_SO_SHIFT) & + PVR_ROGUE_PDSINST_SO_MASK; + + stm->src0 = pvr_pds_disassemble_regs64tp( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_STM_SO_SRC0_SHIFT, + 1); + stm->src0->instruction = &stm->instruction; + + stm->src1 = pvr_pds_disassemble_regs64tp( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_STM_SO_SRC1_SHIFT, + 2); + stm->src1->instruction = &stm->instruction; + + stm->src2 = pvr_pds_disassemble_regs32( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_STM_SO_SRC2_SHIFT, + 3); + stm->src2->instruction = &stm->instruction; + + stm->src3 = pvr_pds_disassemble_regs64tp( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_STM_SO_SRC3_SHIFT, + 4); + stm->src3->instruction = &stm->instruction; + + return &stm->instruction; +} + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_sftlp32(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction) +{ + struct pvr_sftlp *ins = malloc(sizeof(*ins)); + assert(ins); + + ins->instruction.next = NULL; + ins->instruction.type = INS_SFTLP32; + + ins->cc = instruction & PVR_ROGUE_PDSINST_SFTLP32_CC_ENABLE; + ins->IM = instruction & PVR_ROGUE_PDSINST_SFTLP32_IM_ENABLE; + ins->lop = (instruction >> PVR_ROGUE_PDSINST_SFTLP32_LOP_SHIFT) & + PVR_ROGUE_PDSINST_LOP_MASK; + ins->src0 = pvr_pds_disassemble_regs32t( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_SFTLP32_SRC0_SHIFT, + 1); + ins->src0->instruction = &ins->instruction; + ins->src1 = pvr_pds_disassemble_regs32( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_SFTLP32_SRC1_SHIFT, + 2); + ins->src1->instruction = &ins->instruction; + ins->dst = pvr_pds_disassemble_regs32t( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_SFTLP32_DST_SHIFT, + 0); + ins->dst->instruction = &ins->instruction; + + if (ins->IM) { + signed char cImmediate = + ((instruction >> PVR_ROGUE_PDSINST_SFTLP32_SRC2_SHIFT) & + PVR_ROGUE_PDSINST_REGS32_MASK) + << 2; + ins->src2 = calloc(1, sizeof(*ins->src2)); + assert(ins->src2); + + ins->src2->literal = abs((cImmediate / 4)); + ins->src2->negate = cImmediate < 0; + ins->src2->instruction = &ins->instruction; + } else { + ins->src2 = pvr_pds_disassemble_regs32tp( + context, + err_callback, + error, + (instruction >> PVR_ROGUE_PDSINST_SFTLP32_SRC2_SHIFT), + 3); + ins->src2->instruction = &ins->instruction; + } + + return &ins->instruction; +} + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_sftlp64(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction) +{ + struct pvr_sftlp *ins = malloc(sizeof(*ins)); + assert(ins); + + ins->instruction.next = NULL; + ins->instruction.type = INS_SFTLP64; + + ins->cc = instruction & PVR_ROGUE_PDSINST_SFTLP64_CC_ENABLE; + ins->IM = instruction & PVR_ROGUE_PDSINST_SFTLP64_IM_ENABLE; + ins->lop = (instruction >> PVR_ROGUE_PDSINST_SFTLP64_LOP_SHIFT) & + PVR_ROGUE_PDSINST_LOP_MASK; + ins->src0 = pvr_pds_disassemble_regs64tp( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_SFTLP64_SRC0_SHIFT, + 1); + ins->src0->instruction = &ins->instruction; + ins->src1 = pvr_pds_disassemble_regs64tp( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_SFTLP64_SRC1_SHIFT, + 2); + ins->src1->instruction = &ins->instruction; + ins->dst = pvr_pds_disassemble_regs64tp( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_SFTLP64_DST_SHIFT, + 0); + ins->dst->instruction = &ins->instruction; + + if (ins->IM) { + signed char cImmediate = + (instruction >> PVR_ROGUE_PDSINST_SFTLP64_SRC2_SHIFT) & + PVR_ROGUE_PDSINST_REGS32_MASK; + ins->src2 = calloc(1, sizeof(*ins->src2)); + assert(ins->src2); + + ins->src2->literal = (abs(cImmediate) > 63) ? 63 : abs(cImmediate); + ins->src2->negate = (cImmediate < 0); + ins->src2->instruction = &ins->instruction; + } else { + ins->src2 = pvr_pds_disassemble_regs32( + context, + err_callback, + error, + (instruction >> PVR_ROGUE_PDSINST_SFTLP64_SRC2_SHIFT), + 3); + ins->src2->instruction = &ins->instruction; + } + + return &ins->instruction; +} +static struct pvr_instruction * +pvr_pds_disassemble_instruction_cmp(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction) +{ + struct pvr_cmp *cmp = malloc(sizeof(*cmp)); + assert(cmp); + + cmp->instruction.next = NULL; + cmp->instruction.type = INS_CMP; + cmp->cc = instruction & PVR_ROGUE_PDSINST_CMP_CC_ENABLE; + cmp->IM = instruction & PVR_ROGUE_PDSINST_CMP_IM_ENABLE; + cmp->cop = instruction >> PVR_ROGUE_PDSINST_CMP_COP_SHIFT & + PVR_ROGUE_PDSINST_COP_MASK; + cmp->src0 = pvr_pds_disassemble_regs64tp(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_CMP_SRC0_SHIFT, + 1); + cmp->src0->instruction = &cmp->instruction; + + if (cmp->IM) { + uint32_t immediate = (instruction >> PVR_ROGUE_PDSINST_CMP_SRC1_SHIFT) & + PVR_ROGUE_PDSINST_IMM16_MASK; + cmp->src1 = calloc(1, sizeof(*cmp->src1)); + assert(cmp->src1); + + cmp->src1->type = LITERAL_NUM; + cmp->src1->literal = immediate; + } else { + cmp->src1 = pvr_pds_disassemble_regs64( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_CMP_SRC1_SHIFT, + 2); + } + cmp->src1->instruction = &cmp->instruction; + + return &cmp->instruction; +} + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_sp_ld_st(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + bool ld, + uint32_t instruction, + bool cc) +{ + struct pvr_ldst *ins = malloc(sizeof(*ins)); + assert(ins); + + ins->instruction.next = NULL; + ins->instruction.type = ld ? INS_LD : INS_ST; + + ins->cc = cc; + ins->src0 = + pvr_pds_disassemble_regs64(context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_LD_SRC0_SHIFT, + 1); + ins->src0->instruction = &ins->instruction; + ins->st = !ld; + + return &ins->instruction; +} + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_sp_stmc(uint32_t instruction, bool cc) +{ + struct pvr_stmc *stmc = malloc(sizeof(*stmc)); + assert(stmc); + + stmc->instruction.next = NULL; + stmc->instruction.type = INS_STMC; + + stmc->cc = cc; + stmc->src0 = calloc(1, sizeof(*stmc->src0)); + assert(stmc->src0); + + stmc->src0->type = LITERAL_NUM; + stmc->src0->literal = (instruction >> PVR_ROGUE_PDSINST_STMC_SOMASK_SHIFT) & + PVR_ROGUE_PDSINST_SOMASK_MASK; + stmc->src0->instruction = &stmc->instruction; + + return &stmc->instruction; +} + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_sp_limm(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction, + bool cc) +{ + struct pvr_limm *limm = malloc(sizeof(*limm)); + assert(limm); + limm->instruction.next = NULL; + limm->instruction.type = INS_LIMM; + + limm->cc = cc; + limm->GR = (instruction & PVR_ROGUE_PDSINST_LIMM_GR_ENABLE) != 0; + limm->src0 = calloc(1, sizeof(*limm->src0)); + assert(limm->src0); + + limm->src0->type = LITERAL_NUM; + limm->src0->literal = (instruction >> PVR_ROGUE_PDSINST_LIMM_SRC0_SHIFT) & + PVR_ROGUE_PDSINST_IMM16_MASK; + limm->src0->instruction = &limm->instruction; + limm->dst = pvr_pds_disassemble_regs32t(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_LIMM_SRC1_SHIFT, + 0); + limm->dst->instruction = &limm->instruction; + + return &limm->instruction; +} + +static struct pvr_instruction * +pvr_pds_disassemble_simple(enum pvr_instruction_type type, bool cc) +{ + struct pvr_simple *ins = malloc(sizeof(*ins)); + assert(ins); + + ins->instruction.next = NULL; + ins->instruction.type = type; + ins->cc = cc; + + return &ins->instruction; +} + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_bra(uint32_t instruction) +{ + uint32_t branch_addr; + struct pvr_bra *bra = (struct pvr_bra *)malloc(sizeof(*bra)); + assert(bra); + + bra->instruction.type = INS_BRA; + bra->instruction.next = NULL; + + branch_addr = (instruction >> PVR_ROGUE_PDSINST_BRA_ADDR_SHIFT) & + PVR_ROGUE_PDSINST_BRAADDR_MASK; + bra->address = (branch_addr & 0x40000U) ? ((int)branch_addr) - 0x80000 + : (int)branch_addr; + + bra->srcc = malloc(sizeof(*bra->srcc)); + assert(bra->srcc); + + bra->srcc->predicate = (instruction >> PVR_ROGUE_PDSINST_BRA_SRCC_SHIFT) & + PVR_ROGUE_PDSINST_PREDICATE_MASK; + bra->srcc->negate = instruction & PVR_ROGUE_PDSINST_BRA_NEG_ENABLE; + + bra->setc = malloc(sizeof(*bra->setc)); + assert(bra->setc); + + bra->setc->predicate = (instruction >> PVR_ROGUE_PDSINST_BRA_SETC_SHIFT) & + PVR_ROGUE_PDSINST_PREDICATE_MASK; + + bra->target = NULL; + + return &bra->instruction; +} + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_sp(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction) +{ + uint32_t op = (instruction >> PVR_TYPE_OPCODE_SP_SHIFT) & + PVR_ROGUE_PDSINST_OPCODESP_MASK; + bool cc = instruction & PVR_TYPE_OPCODE_SP; + + switch (op) { + case PVR_ROGUE_PDSINST_OPCODESP_LD: + error.instruction = INS_LD; + return pvr_pds_disassemble_instruction_sp_ld_st( + context, + err_callback, + error, + true, + instruction, + instruction & (1 << PVR_ROGUE_PDSINST_LD_CC_SHIFT)); + case PVR_ROGUE_PDSINST_OPCODESP_ST: + error.instruction = INS_ST; + return pvr_pds_disassemble_instruction_sp_ld_st( + context, + err_callback, + error, + false, + instruction, + instruction & (1 << PVR_ROGUE_PDSINST_ST_CC_SHIFT)); + case PVR_ROGUE_PDSINST_OPCODESP_STMC: + error.instruction = INS_STMC; + return pvr_pds_disassemble_instruction_sp_stmc(instruction, cc); + case PVR_ROGUE_PDSINST_OPCODESP_LIMM: + error.instruction = INS_LIMM; + return pvr_pds_disassemble_instruction_sp_limm(context, + err_callback, + error, + instruction, + cc); + case PVR_ROGUE_PDSINST_OPCODESP_WDF: + error.instruction = INS_WDF; + return pvr_pds_disassemble_simple(INS_WDF, cc); + case PVR_ROGUE_PDSINST_OPCODESP_LOCK: + error.instruction = INS_LOCK; + return pvr_pds_disassemble_simple(INS_LOCK, cc); + case PVR_ROGUE_PDSINST_OPCODESP_RELEASE: + error.instruction = INS_RELEASE; + return pvr_pds_disassemble_simple(INS_RELEASE, cc); + case PVR_ROGUE_PDSINST_OPCODESP_HALT: + error.instruction = INS_HALT; + return pvr_pds_disassemble_simple(INS_HALT, cc); + case PVR_ROGUE_PDSINST_OPCODESP_NOP: + error.instruction = INS_NOP; + return pvr_pds_disassemble_simple(INS_NOP, cc); + default: + error.type = PVR_PDS_ERR_SP_UNKNOWN; + error.text = "opcode unknown for special instruction"; + pvr_error_check(err_callback, error); + return NULL; + } +} + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_ddmad(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction) +{ + struct pvr_ddmad *ddmad = malloc(sizeof(*ddmad)); + assert(ddmad); + + ddmad->instruction.next = NULL; + ddmad->instruction.type = INS_DDMAD; + + ddmad->cc = instruction & PVR_ROGUE_PDSINST_DDMAD_CC_ENABLE; + ddmad->END = instruction & PVR_ROGUE_PDSINST_DDMAD_END_ENABLE; + + ddmad->src0 = pvr_pds_disassemble_regs32( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_DDMAD_SRC0_SHIFT, + 1); + ddmad->src0->instruction = &ddmad->instruction; + + ddmad->src1 = pvr_pds_disassemble_regs32t( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_DDMAD_SRC1_SHIFT, + 2); + ddmad->src1->instruction = &ddmad->instruction; + + ddmad->src2 = pvr_pds_disassemble_regs64( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_DDMAD_SRC2_SHIFT, + 3); + ddmad->src2->instruction = &ddmad->instruction; + + ddmad->src3 = pvr_pds_disassemble_regs64C( + context, + err_callback, + error, + instruction >> PVR_ROGUE_PDSINST_DDMAD_SRC3_SHIFT, + 4); + ddmad->src3->instruction = &ddmad->instruction; + + return &ddmad->instruction; +} + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_mad(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction) +{ + struct pvr_mad *mad = malloc(sizeof(*mad)); + assert(mad); + + mad->instruction.next = NULL; + mad->instruction.type = INS_MAD; + + mad->cc = instruction & PVR_ROGUE_PDSINST_MAD_CC_ENABLE; + mad->sna = instruction & PVR_ROGUE_PDSINST_MAD_SNA_SUB; + mad->alum = (instruction & PVR_ROGUE_PDSINST_MAD_ALUM_SIGNED); + + mad->src0 = pvr_pds_disassemble_regs32(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_MAD_SRC0_SHIFT, + 1); + mad->src0->instruction = &mad->instruction; + + mad->src1 = pvr_pds_disassemble_regs32(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_MAD_SRC1_SHIFT, + 2); + mad->src1->instruction = &mad->instruction; + + mad->src2 = pvr_pds_disassemble_regs64(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_MAD_SRC2_SHIFT, + 3); + mad->src2->instruction = &mad->instruction; + + mad->dst = pvr_pds_disassemble_regs64t(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_MAD_DST_SHIFT, + 0); + mad->dst->instruction = &mad->instruction; + + return &mad->instruction; +} + +static struct pvr_instruction * +pvr_pds_disassemble_instruction_dout(void *context, + PVR_ERR_CALLBACK err_callback, + struct pvr_dissassembler_error error, + uint32_t instruction) +{ + struct pvr_dout *dout = malloc(sizeof(*dout)); + assert(dout); + + dout->instruction.next = NULL; + dout->instruction.type = INS_DOUT; + + dout->END = instruction & PVR_ROGUE_PDSINST_DOUT_END_ENABLE; + dout->cc = instruction & PVR_ROGUE_PDSINST_DOUT_CC_ENABLE; + dout->dst = (instruction >> PVR_ROGUE_PDSINST_DOUT_DST_SHIFT) & + PVR_ROGUE_PDSINST_DSTDOUT_MASK; + + dout->src0 = pvr_pds_disassemble_regs64(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_DOUT_SRC0_SHIFT, + 1); + dout->src0->instruction = &dout->instruction; + + dout->src1 = pvr_pds_disassemble_regs32(context, + err_callback, + error, + instruction >> + PVR_ROGUE_PDSINST_DOUT_SRC1_SHIFT, + 2); + dout->src1->instruction = &dout->instruction; + + return &dout->instruction; +} + +static void pvr_pds_free_instruction_limm(struct pvr_limm *inst) +{ + free(inst->dst); + free(inst->src0); + free(inst); +} + +static void pvr_pds_free_instruction_add(struct pvr_add *inst) +{ + free(inst->dst); + free(inst->src0); + free(inst->src1); + free(inst); +} + +static void pvr_pds_free_instruction_cmp(struct pvr_cmp *inst) +{ + free(inst->src0); + free(inst->src1); + free(inst); +} + +static void pvr_pds_free_instruction_mad(struct pvr_mad *inst) +{ + free(inst->dst); + free(inst->src0); + free(inst->src1); + free(inst->src2); + free(inst); +} + +static void pvr_pds_free_instruction_bra(struct pvr_bra *inst) +{ + free(inst->setc); + free(inst->srcc); + free(inst); +} + +static void pvr_pds_free_instruction_ddmad(struct pvr_ddmad *inst) +{ + free(inst->src0); + free(inst->src1); + free(inst->src2); + free(inst->src3); + free(inst); +} + +static void pvr_pds_free_instruction_dout(struct pvr_dout *inst) +{ + free(inst->src0); + free(inst->src1); + free(inst); +} + +static void pvr_pds_free_instruction_ldst(struct pvr_ldst *inst) +{ + free(inst->src0); + free(inst); +} + +static void pvr_pds_free_instruction_simple(struct pvr_simple *inst) +{ + free(inst); +} + +static void pvr_pds_free_instruction_sfltp(struct pvr_sftlp *inst) +{ + free(inst->dst); + free(inst->src0); + free(inst->src1); + free(inst->src2); + free(inst); +} + +static void pvr_pds_free_instruction_stm(struct pvr_stm *inst) +{ + free(inst->src0); + free(inst->src1); + free(inst->src2); + free(inst->src3); + free(inst); +} + +static void pvr_pds_free_instruction_stmc(struct pvr_stmc *inst) +{ + free(inst->src0); + free(inst); +} + +void pvr_pds_free_instruction(struct pvr_instruction *instruction) +{ + if (!instruction) + return; + + switch (instruction->type) { + case INS_LIMM: + pvr_pds_free_instruction_limm((struct pvr_limm *)instruction); + break; + case INS_ADD64: + case INS_ADD32: + pvr_pds_free_instruction_add((struct pvr_add *)instruction); + break; + case INS_CMP: + pvr_pds_free_instruction_cmp((struct pvr_cmp *)instruction); + break; + case INS_MAD: + pvr_pds_free_instruction_mad((struct pvr_mad *)instruction); + break; + case INS_BRA: + pvr_pds_free_instruction_bra((struct pvr_bra *)instruction); + break; + case INS_DDMAD: + pvr_pds_free_instruction_ddmad((struct pvr_ddmad *)instruction); + break; + case INS_DOUT: + pvr_pds_free_instruction_dout((struct pvr_dout *)instruction); + break; + case INS_LD: + case INS_ST: + pvr_pds_free_instruction_ldst((struct pvr_ldst *)instruction); + break; + case INS_WDF: + case INS_LOCK: + case INS_RELEASE: + case INS_HALT: + case INS_NOP: + pvr_pds_free_instruction_simple((struct pvr_simple *)instruction); + break; + case INS_SFTLP64: + case INS_SFTLP32: + pvr_pds_free_instruction_sfltp((struct pvr_sftlp *)instruction); + break; + case INS_STM: + pvr_pds_free_instruction_stm((struct pvr_stm *)instruction); + break; + case INS_STMC: + pvr_pds_free_instruction_stmc((struct pvr_stmc *)instruction); + break; + } +} + +struct pvr_instruction * +pvr_pds_disassemble_instruction2(void *context, + PVR_ERR_CALLBACK err_callback, + uint32_t instruction) +{ + struct pvr_dissassembler_error error = { .context = context }; + + /* First we need to find out what type of OPCODE we are dealing with. */ + if (instruction & PVR_TYPE_OPCODE) { + uint32_t opcode_C = (instruction >> PVR_TYPE_OPCODE_SHIFT) & + PVR_ROGUE_PDSINST_OPCODEC_MASK; + switch (opcode_C) { + case PVR_ROGUE_PDSINST_OPCODEC_ADD64: + error.instruction = INS_ADD64; + return pvr_pds_disassemble_instruction_add64(context, + err_callback, + error, + instruction); + case PVR_ROGUE_PDSINST_OPCODEC_ADD32: + error.instruction = INS_ADD32; + return pvr_pds_disassemble_instruction_add32(context, + err_callback, + error, + instruction); + case PVR_ROGUE_PDSINST_OPCODEC_SFTLP64: + error.instruction = INS_SFTLP64; + return pvr_pds_disassemble_instruction_sftlp64(context, + err_callback, + error, + instruction); + case PVR_ROGUE_PDSINST_OPCODEC_CMP: + error.instruction = INS_CMP; + return pvr_pds_disassemble_instruction_cmp(context, + err_callback, + error, + instruction); + case PVR_ROGUE_PDSINST_OPCODEC_BRA: + error.instruction = INS_BRA; + return pvr_pds_disassemble_instruction_bra(instruction); + case PVR_ROGUE_PDSINST_OPCODEC_SP: + return pvr_pds_disassemble_instruction_sp(context, + err_callback, + error, + instruction); + case PVR_ROGUE_PDSINST_OPCODEC_DDMAD: + error.instruction = INS_DDMAD; + return pvr_pds_disassemble_instruction_ddmad(context, + err_callback, + error, + instruction); + case PVR_ROGUE_PDSINST_OPCODEC_DOUT: + error.instruction = INS_DOUT; + return pvr_pds_disassemble_instruction_dout(context, + err_callback, + error, + instruction); + } + } else if (instruction & PVR_TYPE_OPCODEB) { + uint32_t opcode_B = (instruction >> PVR_TYPE_OPCODEB_SHIFT) & + PVR_ROGUE_PDSINST_OPCODEB_MASK; + switch (opcode_B) { + case PVR_ROGUE_PDSINST_OPCODEB_SFTLP32: + error.instruction = INS_SFTLP32; + return pvr_pds_disassemble_instruction_sftlp32(context, + err_callback, + error, + instruction); + case PVR_ROGUE_PDSINST_OPCODEB_STM: + error.instruction = INS_STM; + return pvr_pds_disassemble_instruction_stm(context, + err_callback, + error, + instruction); + } + } else { /* Opcode A - MAD instruction. */ + error.instruction = INS_MAD; + return pvr_pds_disassemble_instruction_mad(context, + err_callback, + error, + instruction); + } + return NULL; +} diff --git a/src/imagination/vulkan/pds/pvr_pds_printer.c b/src/imagination/vulkan/pds/pvr_pds_printer.c new file mode 100644 index 00000000000..59730c96227 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_printer.c @@ -0,0 +1,666 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "pvr_rogue_pds_defs.h" +#include "pvr_rogue_pds_disasm.h" +#include "pvr_rogue_pds_encode.h" +#include "util/log.h" + +#define X(lop, str) #str, +static const char *const LOP[] = { PVR_PDS_LOP }; +#undef X + +static void pvr_pds_disassemble_operand(struct pvr_operand *op, + char *instr_str, + size_t instr_len) +{ +#define X(enum, str, size) { #str, #size }, + static const char *const regs[][2] = { PVR_PDS_OPERAND_TYPES }; +#undef X + + if (op->type == LITERAL_NUM) { + snprintf(instr_str, + instr_len, + "%s (%llu)", + regs[op->type][0], + (unsigned long long)op->literal); + } else if (op->type == UNRESOLVED) { + snprintf(instr_str, instr_len, "UNRESOLVED"); + } else { + snprintf(instr_str, + instr_len, + "%s[%u].%s", + regs[op->type][0], + op->absolute_address, + regs[op->type][1]); + } +} + +static void pvr_pds_disassemble_instruction_add64(struct pvr_add *add, + char *instr_str, + size_t instr_len) +{ + char dst[32]; + char src0[32]; + char src1[32]; + + pvr_pds_disassemble_operand(add->src0, src0, sizeof(src0)); + pvr_pds_disassemble_operand(add->src1, src1, sizeof(src1)); + pvr_pds_disassemble_operand(add->dst, dst, sizeof(dst)); + + snprintf(instr_str, + instr_len, + "%-16s%s%s = %s %s %s %s", + "ADD64", + add->cc ? "? " : "", + dst, + src0, + add->sna ? "-" : "+", + src1, + add->alum ? "[signed]" : ""); +} + +static void pvr_pds_disassemble_instruction_add32(struct pvr_add *add, + char *instr_str, + size_t instr_len) +{ + char dst[32]; + char src0[32]; + char src1[32]; + + pvr_pds_disassemble_operand(add->src0, src0, sizeof(src0)); + pvr_pds_disassemble_operand(add->src1, src1, sizeof(src1)); + pvr_pds_disassemble_operand(add->dst, dst, sizeof(dst)); + + snprintf(instr_str, + instr_len, + "%-16s%s%s = %s %s %s %s", + "ADD32", + add->cc ? "? " : "", + dst, + src0, + add->sna ? "-" : "+", + src1, + add->alum ? "[signed]" : ""); +} + +static void +pvr_pds_disassemble_instruction_sftlp32(struct pvr_sftlp *instruction, + char *instr_str, + size_t instr_len) +{ + char dst[32]; + char src0[32]; + char src1[32]; + char src2[32]; + + pvr_pds_disassemble_operand(instruction->src0, src0, sizeof(src0)); + pvr_pds_disassemble_operand(instruction->src1, src1, sizeof(src1)); + pvr_pds_disassemble_operand(instruction->dst, dst, sizeof(dst)); + + if (instruction->IM) + snprintf(src2, sizeof(src2), "%u", (uint32_t)instruction->src2->literal); + else + pvr_pds_disassemble_operand(instruction->src2, src2, sizeof(src2)); + + if (instruction->lop == LOP_NONE) { + snprintf(instr_str, + instr_len, + "%-16s%s%s = %s %s %s", + "SFTLP32", + instruction->cc ? "? " : "", + dst, + src0, + instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<", + src2); + } else if (instruction->lop == LOP_NOT) { + snprintf(instr_str, + instr_len, + "%-16s%s%s = (~%s) %s %s", + "SFTLP32", + instruction->cc ? "? " : "", + dst, + src0, + instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<", + src2); + } else { + snprintf(instr_str, + instr_len, + "%-16s%s%s = (%s %s %s) %s %s", + "SFTLP32", + instruction->cc ? "? " : "", + dst, + src0, + LOP[instruction->lop], + src1, + instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<", + src2); + } +} + +static void pvr_pds_disassemble_instruction_stm(struct pvr_stm *instruction, + char *instr_str, + size_t instr_len) +{ + char src0[32]; + char src1[32]; + char src2[32]; + char src3[32]; + + char stm_pred[64]; + + pvr_pds_disassemble_operand(instruction->src0, src0, sizeof(src0)); + pvr_pds_disassemble_operand(instruction->src1, src1, sizeof(src1)); + pvr_pds_disassemble_operand(instruction->src2, src2, sizeof(src2)); + pvr_pds_disassemble_operand(instruction->src3, src3, sizeof(src3)); + + if (instruction->ccs_global) + snprintf(stm_pred, sizeof(stm_pred), "overflow_any"); + else if (instruction->ccs_so) + snprintf(stm_pred, sizeof(stm_pred), "overflow_current"); + else + stm_pred[0] = 0; + + snprintf(instr_str, + instr_len, + "%-16s%s%s%s stm%u = %s, %s, %s, %s", + "STM", + instruction->cc ? "? " : "", + stm_pred, + instruction->tst ? " (TST only)" : "", + instruction->stream_out, + src0, + src1, + src2, + src3); +} + +static void pds_disassemble_instruction_stmc(struct pvr_stmc *instruction, + char *instr_str, + size_t instr_len) +{ + char src0[32]; + + pvr_pds_disassemble_operand(instruction->src0, src0, sizeof(src0)); + + snprintf(instr_str, + instr_len, + "%-16s%s %s", + "STMC", + instruction->cc ? "? " : "", + src0); +} + +static void +pvr_pds_disassemble_instruction_sftlp64(struct pvr_sftlp *instruction, + char *instr_str, + size_t instr_len) +{ + char dst[32]; + char src0[32]; + char src1[32]; + char src2[32]; + + pvr_pds_disassemble_operand(instruction->src0, src0, sizeof(src0)); + pvr_pds_disassemble_operand(instruction->src1, src1, sizeof(src1)); + pvr_pds_disassemble_operand(instruction->dst, dst, sizeof(dst)); + + if (instruction->IM) + snprintf(src2, sizeof(src2), "%u", (uint32_t)instruction->src2->literal); + else + pvr_pds_disassemble_operand(instruction->src2, src2, sizeof(src2)); + + if (instruction->lop == LOP_NONE) { + snprintf(instr_str, + instr_len, + "%-16s%s%s = %s %s %s", + "SFTLP64", + instruction->cc ? "? " : "", + dst, + src0, + instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<", + src2); + } else if (instruction->lop == LOP_NOT) { + snprintf(instr_str, + instr_len, + "%-16s%s%s = (~%s) %s %s", + "SFTLP64", + instruction->cc ? "? " : "", + dst, + src0, + instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<", + src2); + } else { + snprintf(instr_str, + instr_len, + "%-16s%s%s = (%s %s %s) %s %s", + "SFTLP64", + instruction->cc ? "? " : "", + dst, + src0, + LOP[instruction->lop], + src1, + instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<", + src2); + } +} + +static void pvr_pds_disassemble_instruction_cmp(struct pvr_cmp *cmp, + char *instr_str, + size_t instr_len) +{ + char src0[32]; + char src1[32]; + static const char *const COP[] = { "=", ">", "<", "!=" }; + + pvr_pds_disassemble_operand(cmp->src0, src0, sizeof(src0)); + + if (cmp->IM) { + snprintf(src1, + sizeof(src1), + "%#04llx", + (unsigned long long)cmp->src1->literal); + } else { + pvr_pds_disassemble_operand(cmp->src1, src1, sizeof(src1)); + } + + snprintf(instr_str, + instr_len, + "%-16s%sP0 = (%s %s %s)", + "CMP", + cmp->cc ? "? " : "", + src0, + COP[cmp->cop], + src1); +} + +static void pvr_pds_disassemble_instruction_ldst(struct pvr_ldst *ins, + char *instr_str, + size_t instr_len) +{ + char src0[PVR_PDS_MAX_INST_STR_LEN]; + + pvr_pds_disassemble_operand(ins->src0, src0, sizeof(src0)); + + if (ins->st) { + snprintf(instr_str, + instr_len, + "%-16s%s%s: mem(%s) <= src(%s)", + "ST", + ins->cc ? "? " : "", + src0, + "?", + "?"); + } else { + snprintf(instr_str, + instr_len, + "%-16s%s%s: dst(%s) <= mem(%s)", + "ld", + ins->cc ? "? " : "", + src0, + "?", + "?"); + } +} + +static void pvr_pds_disassemble_simple(struct pvr_simple *simple, + const char *type, + char *instr_str, + size_t instr_len) +{ + snprintf(instr_str, instr_len, "%-16s%s", type, simple->cc ? "? " : ""); +} + +static void pvr_pds_disassemble_instruction_limm(struct pvr_limm *limm, + char *instr_str, + size_t instr_len) +{ + int32_t imm = (uint32_t)limm->src0->literal; + char dst[PVR_PDS_MAX_INST_STR_LEN]; + + pvr_pds_disassemble_operand(limm->dst, dst, sizeof(dst)); + + if (limm->GR) { + char *pchGReg; + + switch (imm) { + case 0: + pchGReg = "cluster"; + break; + case 1: + pchGReg = "instance"; + break; + default: + pchGReg = "unknown"; + } + + snprintf(instr_str, + instr_len, + "%-16s%s%s = G%d (%s)", + "LIMM", + limm->cc ? "? " : "", + dst, + imm, + pchGReg); + } else { + snprintf(instr_str, + instr_len, + "%-16s%s%s = %#04x", + "LIMM", + limm->cc ? "? " : "", + dst, + imm); + } +} + +static void pvr_pds_disassemble_instruction_ddmad(struct pvr_ddmad *ddmad, + char *instr_str, + size_t instr_len) +{ + char src0[PVR_PDS_MAX_INST_STR_LEN]; + char src1[PVR_PDS_MAX_INST_STR_LEN]; + char src2[PVR_PDS_MAX_INST_STR_LEN]; + char src3[PVR_PDS_MAX_INST_STR_LEN]; + + pvr_pds_disassemble_operand(ddmad->src0, src0, sizeof(src0)); + pvr_pds_disassemble_operand(ddmad->src1, src1, sizeof(src1)); + pvr_pds_disassemble_operand(ddmad->src2, src2, sizeof(src2)); + pvr_pds_disassemble_operand(ddmad->src3, src3, sizeof(src3)); + + snprintf(instr_str, + instr_len, + "%-16s%sdoutd = (%s * %s) + %s, %s%s", + "DDMAD", + ddmad->cc ? "? " : "", + src0, + src1, + src2, + src3, + ddmad->END ? "; HALT" : ""); +} + +static void pvr_pds_disassemble_predicate(uint32_t predicate, + char *buffer, + size_t buffer_length) +{ + switch (predicate) { + case PVR_ROGUE_PDSINST_PREDICATE_P0: + snprintf(buffer, buffer_length, "%s", "p0"); + break; + case PVR_ROGUE_PDSINST_PREDICATE_IF0: + snprintf(buffer, buffer_length, "%s", "if0"); + break; + case PVR_ROGUE_PDSINST_PREDICATE_IF1: + snprintf(buffer, buffer_length, "%s", "if1"); + break; + case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_0: + snprintf(buffer, buffer_length, "%s", "so_overflow_0"); + break; + case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_1: + snprintf(buffer, buffer_length, "%s", "so_overflow_1"); + break; + case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_2: + snprintf(buffer, buffer_length, "%s", "so_overflow_2"); + break; + case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_3: + snprintf(buffer, buffer_length, "%s", "so_overflow_3"); + break; + case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_GLOBAL: + snprintf(buffer, buffer_length, "%s", "so_overflow_any"); + break; + case PVR_ROGUE_PDSINST_PREDICATE_KEEP: + snprintf(buffer, buffer_length, "%s", "keep"); + break; + case PVR_ROGUE_PDSINST_PREDICATE_OOB: + snprintf(buffer, buffer_length, "%s", "oob"); + break; + default: + snprintf(buffer, buffer_length, "%s", ""); + break; + } +} + +static void pvr_pds_disassemble_instruction_bra(struct pvr_bra *bra, + char *instr_str, + size_t instr_len) +{ + char setc_pred[32]; + char srcc_pred[32]; + + pvr_pds_disassemble_predicate(bra->srcc->predicate, + srcc_pred, + sizeof(srcc_pred)); + pvr_pds_disassemble_predicate(bra->setc->predicate, + setc_pred, + sizeof(setc_pred)); + + if (bra->setc->predicate != PVR_ROGUE_PDSINST_PREDICATE_KEEP) { + snprintf(instr_str, + instr_len, + "%-16sif %s%s %d ( setc = %s )", + "BRA", + bra->srcc->negate ? "! " : "", + srcc_pred, + bra->address, + setc_pred); + } else { + snprintf(instr_str, + instr_len, + "%-16sif %s%s %d", + "BRA", + bra->srcc->negate ? "! " : "", + srcc_pred, + bra->address); + } +} + +static void pvr_pds_disassemble_instruction_mad(struct pvr_mad *mad, + char *instr_str, + size_t instr_len) +{ + char src0[PVR_PDS_MAX_INST_STR_LEN]; + char src1[PVR_PDS_MAX_INST_STR_LEN]; + char src2[PVR_PDS_MAX_INST_STR_LEN]; + char dst[PVR_PDS_MAX_INST_STR_LEN]; + + pvr_pds_disassemble_operand(mad->src0, src0, sizeof(src0)); + pvr_pds_disassemble_operand(mad->src1, src1, sizeof(src1)); + pvr_pds_disassemble_operand(mad->src2, src2, sizeof(src2)); + pvr_pds_disassemble_operand(mad->dst, dst, sizeof(dst)); + + snprintf(instr_str, + instr_len, + "%-16s%s%s = (%s * %s) %s %s%s", + "MAD", + mad->cc ? "? " : "", + dst, + src0, + src1, + mad->sna ? "-" : "+", + src2, + mad->alum ? " [signed]" : ""); +} + +static void pvr_pds_disassemble_instruction_dout(struct pvr_dout *dout, + char *instr_str, + size_t instr_len) +{ + char src0[PVR_PDS_MAX_INST_STR_LEN]; + char src1[PVR_PDS_MAX_INST_STR_LEN]; + +#define X(dout_dst, str) #str, + static const char *const dst[] = { PVR_PDS_DOUT_DSTS }; +#undef X + + pvr_pds_disassemble_operand(dout->src0, src0, sizeof(src0)); + pvr_pds_disassemble_operand(dout->src1, src1, sizeof(src1)); + + { + snprintf(instr_str, + instr_len, + "%-16s%s%s = %s, %s%s", + "DOUT", + dout->cc ? "? " : "", + dst[dout->dst], + src0, + src1, + dout->END ? "; HALT" : ""); + } +} + +void pvr_pds_disassemble_instruction(char *instr_str, + size_t instr_len, + struct pvr_instruction *instruction) +{ + if (!instruction) { + snprintf(instr_str, + instr_len, + "Instruction was not disassembled properly\n"); + return; + } + + switch (instruction->type) { + case INS_LIMM: + pvr_pds_disassemble_instruction_limm((struct pvr_limm *)instruction, + instr_str, + instr_len); + break; + case INS_ADD64: + pvr_pds_disassemble_instruction_add64((struct pvr_add *)instruction, + instr_str, + instr_len); + break; + case INS_ADD32: + pvr_pds_disassemble_instruction_add32((struct pvr_add *)instruction, + instr_str, + instr_len); + break; + case INS_CMP: + pvr_pds_disassemble_instruction_cmp((struct pvr_cmp *)instruction, + instr_str, + instr_len); + break; + case INS_MAD: + pvr_pds_disassemble_instruction_mad((struct pvr_mad *)instruction, + instr_str, + instr_len); + break; + case INS_BRA: + pvr_pds_disassemble_instruction_bra((struct pvr_bra *)instruction, + instr_str, + instr_len); + break; + case INS_DDMAD: + pvr_pds_disassemble_instruction_ddmad((struct pvr_ddmad *)instruction, + instr_str, + instr_len); + break; + case INS_DOUT: + pvr_pds_disassemble_instruction_dout((struct pvr_dout *)instruction, + instr_str, + instr_len); + break; + case INS_LD: + case INS_ST: + pvr_pds_disassemble_instruction_ldst((struct pvr_ldst *)instruction, + instr_str, + instr_len); + break; + case INS_WDF: + pvr_pds_disassemble_simple((struct pvr_simple *)instruction, + "WDF", + instr_str, + instr_len); + break; + case INS_LOCK: + pvr_pds_disassemble_simple((struct pvr_simple *)instruction, + "LOCK", + instr_str, + instr_len); + break; + case INS_RELEASE: + pvr_pds_disassemble_simple((struct pvr_simple *)instruction, + "RELEASE", + instr_str, + instr_len); + break; + case INS_HALT: + pvr_pds_disassemble_simple((struct pvr_simple *)instruction, + "HALT", + instr_str, + instr_len); + break; + case INS_NOP: + pvr_pds_disassemble_simple((struct pvr_simple *)instruction, + "NOP", + instr_str, + instr_len); + break; + case INS_SFTLP32: + pvr_pds_disassemble_instruction_sftlp32((struct pvr_sftlp *)instruction, + instr_str, + instr_len); + break; + case INS_SFTLP64: + pvr_pds_disassemble_instruction_sftlp64((struct pvr_sftlp *)instruction, + instr_str, + instr_len); + break; + case INS_STM: + pvr_pds_disassemble_instruction_stm((struct pvr_stm *)instruction, + instr_str, + instr_len); + break; + case INS_STMC: + pds_disassemble_instruction_stmc((struct pvr_stmc *)instruction, + instr_str, + instr_len); + break; + default: + snprintf(instr_str, instr_len, "Printing not implemented\n"); + break; + } +} + +#if defined(DUMP_PDS) +void pvr_pds_print_instruction(uint32_t instr) +{ + char instruction_str[1024]; + struct pvr_instruction *decoded = + pvr_pds_disassemble_instruction2(0, 0, instr); + + if (!decoded) { + mesa_logd("%X\n", instr); + } else { + pvr_pds_disassemble_instruction(instruction_str, + sizeof(instruction_str), + decoded); + mesa_logd("\t0x%08x, /* %s */\n", instr, instruction_str); + } +} +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays0.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays0.h new file mode 100644 index 00000000000..ac1d5e3d2e2 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays0.h @@ -0,0 +1,105 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTARRAYS0_H +#define PVR_DRAW_INDIRECTARRAYS0_H + +/* Initially generated from ARB_draw_indirect_arrays.pds */ + +static const uint32_t pvr_draw_indirect_arrays0_code[15] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1940000, /* LIMM temp[5].32 = 0000 */ + 0x500c0804, /* SFTLP32 temp[4].32 = temp[1].32 << 0 */ + 0xb1880000, /* CMP P0 = (temp[4].64 = 0000) */ + 0xd9800000, /* LIMM ? temp[0].32 = 0000 */ + 0xd9840000, /* LIMM ? temp[1].32 = 0000 */ + 0x04081023, /* MAD temp[6].64 = (temp[1].32 * const[2].32) + + const[4].64 */ + 0x50343001, /* SFTLP32 temp[1].32 = temp[6].32 << 0 */ + 0x912040c1, /* ADD32 temp[1].32 = temp[1].32 - const[3].32 */ + 0xd0800003, /* ST const[6].64: mem(?) <= src(?) */ + 0xd0000004, /* LD const[8].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf40a4003, /* DOUT doutv = temp[0].64, const[10].32; HALT */ +}; + +static const struct pvr_psc_program_output pvr_draw_indirect_arrays0_program = { + pvr_draw_indirect_arrays0_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 4, /* number of constant mappings */ + + 12, /* size of data segment, in dwords, aligned to 4 */ + 16, /* size of code segment, in dwords, aligned to 4 */ + 12, /* size of temp segment, in dwords, aligned to 4 */ + 11, /* size of data segment, in dwords */ + 15, /* size of code segment, in dwords */ + 10, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ +}; + +#define pvr_write_draw_indirect_arrays0_di_data(buffer, addr, device) \ + do { \ + uint64_t data = ((addr) | (0x40000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays0_write_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x30000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays0_flush_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x1940000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays0_num_views(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays0_immediates(buffer) \ + do { \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays1.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays1.h new file mode 100644 index 00000000000..19678dc8c67 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays1.h @@ -0,0 +1,105 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTARRAYS1_H +#define PVR_DRAW_INDIRECTARRAYS1_H + +/* Initially generated from ARB_draw_indirect_arrays.pds */ + +static const uint32_t pvr_draw_indirect_arrays1_code[15] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1940000, /* LIMM temp[5].32 = 0000 */ + 0x50141004, /* SFTLP32 temp[4].32 = temp[2].32 << 0 */ + 0xb1880000, /* CMP P0 = (temp[4].64 = 0000) */ + 0xd9840000, /* LIMM ? temp[1].32 = 0000 */ + 0xd9880000, /* LIMM ? temp[2].32 = 0000 */ + 0x04101023, /* MAD temp[6].64 = (temp[2].32 * const[2].32) + + const[4].64 */ + 0x50343002, /* SFTLP32 temp[2].32 = temp[6].32 << 0 */ + 0x912080c2, /* ADD32 temp[2].32 = temp[2].32 - const[3].32 */ + 0xd0800003, /* ST const[6].64: mem(?) <= src(?) */ + 0xd0000004, /* LD const[8].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf40a4003, /* DOUT doutv = temp[0].64, const[10].32; HALT */ +}; + +static const struct pvr_psc_program_output pvr_draw_indirect_arrays1_program = { + pvr_draw_indirect_arrays1_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 4, /* number of constant mappings */ + + 12, /* size of data segment, in dwords, aligned to 4 */ + 16, /* size of code segment, in dwords, aligned to 4 */ + 12, /* size of temp segment, in dwords, aligned to 4 */ + 11, /* size of data segment, in dwords */ + 15, /* size of code segment, in dwords */ + 10, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ +}; + +#define pvr_write_draw_indirect_arrays1_di_data(buffer, addr, device) \ + do { \ + uint64_t data = ((addr) | (0x40000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays1_write_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x430000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays1_flush_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x1940000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays1_num_views(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays1_immediates(buffer) \ + do { \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays2.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays2.h new file mode 100644 index 00000000000..ae75d9a38e9 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays2.h @@ -0,0 +1,105 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTARRAYS2_H +#define PVR_DRAW_INDIRECTARRAYS2_H + +/* Initially generated from ARB_draw_indirect_arrays.pds */ + +static const uint32_t pvr_draw_indirect_arrays2_code[15] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1840000, /* LIMM temp[1].32 = 0000 */ + 0x501c1800, /* SFTLP32 temp[0].32 = temp[3].32 << 0 */ + 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */ + 0xd9880000, /* LIMM ? temp[2].32 = 0000 */ + 0xd98c0000, /* LIMM ? temp[3].32 = 0000 */ + 0x04181023, /* MAD temp[6].64 = (temp[3].32 * const[2].32) + + const[4].64 */ + 0x50343003, /* SFTLP32 temp[3].32 = temp[6].32 << 0 */ + 0x9120c0c3, /* ADD32 temp[3].32 = temp[3].32 - const[3].32 */ + 0xd0800003, /* ST const[6].64: mem(?) <= src(?) */ + 0xd0000004, /* LD const[8].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf40a4003, /* DOUT doutv = temp[0].64, const[10].32; HALT */ +}; + +static const struct pvr_psc_program_output pvr_draw_indirect_arrays2_program = { + pvr_draw_indirect_arrays2_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 4, /* number of constant mappings */ + + 12, /* size of data segment, in dwords, aligned to 4 */ + 16, /* size of code segment, in dwords, aligned to 4 */ + 12, /* size of temp segment, in dwords, aligned to 4 */ + 11, /* size of data segment, in dwords */ + 15, /* size of code segment, in dwords */ + 10, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ +}; + +#define pvr_write_draw_indirect_arrays2_di_data(buffer, addr, device) \ + do { \ + uint64_t data = ((addr) | (0x60000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays2_write_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x830000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays2_flush_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x1940000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays2_num_views(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays2_immediates(buffer) \ + do { \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays3.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays3.h new file mode 100644 index 00000000000..c3df11ed070 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays3.h @@ -0,0 +1,105 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTARRAYS3_H +#define PVR_DRAW_INDIRECTARRAYS3_H + +/* Initially generated from ARB_draw_indirect_arrays.pds */ + +static const uint32_t pvr_draw_indirect_arrays3_code[15] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1840000, /* LIMM temp[1].32 = 0000 */ + 0x50242000, /* SFTLP32 temp[0].32 = temp[4].32 << 0 */ + 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */ + 0xd98c0000, /* LIMM ? temp[3].32 = 0000 */ + 0xd9900000, /* LIMM ? temp[4].32 = 0000 */ + 0x04201023, /* MAD temp[6].64 = (temp[4].32 * const[2].32) + + const[4].64 */ + 0x50343004, /* SFTLP32 temp[4].32 = temp[6].32 << 0 */ + 0x912100c4, /* ADD32 temp[4].32 = temp[4].32 - const[3].32 */ + 0xd0800003, /* ST const[6].64: mem(?) <= src(?) */ + 0xd0000004, /* LD const[8].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf40a4003, /* DOUT doutv = temp[0].64, const[10].32; HALT */ +}; + +static const struct pvr_psc_program_output pvr_draw_indirect_arrays3_program = { + pvr_draw_indirect_arrays3_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 4, /* number of constant mappings */ + + 12, /* size of data segment, in dwords, aligned to 4 */ + 16, /* size of code segment, in dwords, aligned to 4 */ + 12, /* size of temp segment, in dwords, aligned to 4 */ + 11, /* size of data segment, in dwords */ + 15, /* size of code segment, in dwords */ + 10, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ +}; + +#define pvr_write_draw_indirect_arrays3_di_data(buffer, addr, device) \ + do { \ + uint64_t data = ((addr) | (0x60000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays3_write_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0xc30000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays3_flush_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x1940000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays3_num_views(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays3_immediates(buffer) \ + do { \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance0.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance0.h new file mode 100644 index 00000000000..6513876fdd5 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance0.h @@ -0,0 +1,111 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE0_H +#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE0_H + +/* Initially generated from ARB_draw_indirect_arrays.pds */ + +static const uint32_t pvr_draw_indirect_arrays_base_instance0_code[18] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1940000, /* LIMM temp[5].32 = 0000 */ + 0x500c0804, /* SFTLP32 temp[4].32 = temp[1].32 << 0 */ + 0xb1880000, /* CMP P0 = (temp[4].64 = 0000) */ + 0xd9800000, /* LIMM ? temp[0].32 = 0000 */ + 0xd9840000, /* LIMM ? temp[1].32 = 0000 */ + 0x04081023, /* MAD temp[6].64 = (temp[1].32 * const[2].32) + + const[4].64 */ + 0x50343001, /* SFTLP32 temp[1].32 = temp[6].32 << 0 */ + 0x912040c1, /* ADD32 temp[1].32 = temp[1].32 - const[3].32 */ + 0x9001a0a0, /* ADD32 ptemp[0].32 = const[6].32 + temp[2].32 */ + 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */ + 0x9001a0e1, /* ADD32 ptemp[1].32 = const[6].32 + temp[3].32 */ + 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */ + 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */ +}; + +static const struct pvr_psc_program_output + pvr_draw_indirect_arrays_base_instance0_program = { + pvr_draw_indirect_arrays_base_instance0_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 4, /* number of constant mappings */ + + 12, /* size of data segment, in dwords, aligned to 4 */ + 20, /* size of code segment, in dwords, aligned to 4 */ + 12, /* size of temp segment, in dwords, aligned to 4 */ + 12, /* size of data segment, in dwords */ + 18, /* size of code segment, in dwords */ + 10, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_arrays_base_instance0_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x40000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance0_write_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x30000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance0_flush_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x1940000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance0_num_views(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer) \ + do { \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance1.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance1.h new file mode 100644 index 00000000000..aa5f67592f1 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance1.h @@ -0,0 +1,111 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE1_H +#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE1_H + +/* Initially generated from ARB_draw_indirect_arrays.pds */ + +static const uint32_t pvr_draw_indirect_arrays_base_instance1_code[18] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd19c0000, /* LIMM temp[7].32 = 0000 */ + 0x50141006, /* SFTLP32 temp[6].32 = temp[2].32 << 0 */ + 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */ + 0xd9840000, /* LIMM ? temp[1].32 = 0000 */ + 0xd9880000, /* LIMM ? temp[2].32 = 0000 */ + 0x04101024, /* MAD temp[8].64 = (temp[2].32 * const[2].32) + + const[4].64 */ + 0x50444002, /* SFTLP32 temp[2].32 = temp[8].32 << 0 */ + 0x912080c2, /* ADD32 temp[2].32 = temp[2].32 - const[3].32 */ + 0x9001a0e0, /* ADD32 ptemp[0].32 = const[6].32 + temp[3].32 */ + 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */ + 0x9001a121, /* ADD32 ptemp[1].32 = const[6].32 + temp[4].32 */ + 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */ + 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */ +}; + +static const struct pvr_psc_program_output + pvr_draw_indirect_arrays_base_instance1_program = { + pvr_draw_indirect_arrays_base_instance1_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 4, /* number of constant mappings */ + + 12, /* size of data segment, in dwords, aligned to 4 */ + 20, /* size of code segment, in dwords, aligned to 4 */ + 12, /* size of temp segment, in dwords, aligned to 4 */ + 12, /* size of data segment, in dwords */ + 18, /* size of code segment, in dwords */ + 12, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_arrays_base_instance1_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x60000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance1_write_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x430000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance1_flush_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x2140000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance1_num_views(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer) \ + do { \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance2.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance2.h new file mode 100644 index 00000000000..588c0cb7be2 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance2.h @@ -0,0 +1,111 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE2_H +#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE2_H + +/* Initially generated from ARB_draw_indirect_arrays.pds */ + +static const uint32_t pvr_draw_indirect_arrays_base_instance2_code[18] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1840000, /* LIMM temp[1].32 = 0000 */ + 0x501c1800, /* SFTLP32 temp[0].32 = temp[3].32 << 0 */ + 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */ + 0xd9880000, /* LIMM ? temp[2].32 = 0000 */ + 0xd98c0000, /* LIMM ? temp[3].32 = 0000 */ + 0x04181023, /* MAD temp[6].64 = (temp[3].32 * const[2].32) + + const[4].64 */ + 0x50343003, /* SFTLP32 temp[3].32 = temp[6].32 << 0 */ + 0x9120c0c3, /* ADD32 temp[3].32 = temp[3].32 - const[3].32 */ + 0x9001a120, /* ADD32 ptemp[0].32 = const[6].32 + temp[4].32 */ + 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */ + 0x9001a161, /* ADD32 ptemp[1].32 = const[6].32 + temp[5].32 */ + 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */ + 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */ +}; + +static const struct pvr_psc_program_output + pvr_draw_indirect_arrays_base_instance2_program = { + pvr_draw_indirect_arrays_base_instance2_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 4, /* number of constant mappings */ + + 12, /* size of data segment, in dwords, aligned to 4 */ + 20, /* size of code segment, in dwords, aligned to 4 */ + 12, /* size of temp segment, in dwords, aligned to 4 */ + 12, /* size of data segment, in dwords */ + 18, /* size of code segment, in dwords */ + 10, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_arrays_base_instance2_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x60000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance2_write_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x830000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance2_flush_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x1940000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance2_num_views(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer) \ + do { \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance3.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance3.h new file mode 100644 index 00000000000..2f36afa298d --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance3.h @@ -0,0 +1,111 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE3_H +#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE3_H + +/* Initially generated from ARB_draw_indirect_arrays.pds */ + +static const uint32_t pvr_draw_indirect_arrays_base_instance3_code[18] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1840000, /* LIMM temp[1].32 = 0000 */ + 0x50242000, /* SFTLP32 temp[0].32 = temp[4].32 << 0 */ + 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */ + 0xd98c0000, /* LIMM ? temp[3].32 = 0000 */ + 0xd9900000, /* LIMM ? temp[4].32 = 0000 */ + 0x04201024, /* MAD temp[8].64 = (temp[4].32 * const[2].32) + + const[4].64 */ + 0x50444004, /* SFTLP32 temp[4].32 = temp[8].32 << 0 */ + 0x912100c4, /* ADD32 temp[4].32 = temp[4].32 - const[3].32 */ + 0x9001a160, /* ADD32 ptemp[0].32 = const[6].32 + temp[5].32 */ + 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */ + 0x9001a1a1, /* ADD32 ptemp[1].32 = const[6].32 + temp[6].32 */ + 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */ + 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */ +}; + +static const struct pvr_psc_program_output + pvr_draw_indirect_arrays_base_instance3_program = { + pvr_draw_indirect_arrays_base_instance3_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 4, /* number of constant mappings */ + + 12, /* size of data segment, in dwords, aligned to 4 */ + 20, /* size of code segment, in dwords, aligned to 4 */ + 12, /* size of temp segment, in dwords, aligned to 4 */ + 12, /* size of data segment, in dwords */ + 18, /* size of code segment, in dwords */ + 12, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_arrays_base_instance3_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x80000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance3_write_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0xc30000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance3_flush_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x2140000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance3_num_views(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer) \ + do { \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid0.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid0.h new file mode 100644 index 00000000000..62c36fd40df --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid0.h @@ -0,0 +1,115 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID0_H +#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID0_H + +/* Initially generated from ARB_draw_indirect_arrays.pds */ + +static const uint32_t pvr_draw_indirect_arrays_base_instance_drawid0_code[18] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1940000, /* LIMM temp[5].32 = 0000 */ + 0x500c0804, /* SFTLP32 temp[4].32 = temp[1].32 << 0 */ + 0xb1880000, /* CMP P0 = (temp[4].64 = 0000) */ + 0xd9800000, /* LIMM ? temp[0].32 = 0000 */ + 0xd9840000, /* LIMM ? temp[1].32 = 0000 */ + 0x04081023, /* MAD temp[6].64 = (temp[1].32 * const[2].32) + + const[4].64 */ + 0x50343001, /* SFTLP32 temp[1].32 = temp[6].32 << 0 */ + 0x912040c1, /* ADD32 temp[1].32 = temp[1].32 - const[3].32 */ + 0x9001a0a0, /* ADD32 ptemp[0].32 = const[6].32 + temp[2].32 */ + 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */ + 0x9001a0e1, /* ADD32 ptemp[1].32 = const[6].32 + temp[3].32 */ + 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */ + 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */ +}; + +static const struct pvr_psc_program_output + pvr_draw_indirect_arrays_base_instance_drawid0_program = { + pvr_draw_indirect_arrays_base_instance_drawid0_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 4, /* number of constant mappings */ + + 12, /* size of data segment, in dwords, aligned to 4 */ + 20, /* size of code segment, in dwords, aligned to 4 */ + 12, /* size of temp segment, in dwords, aligned to 4 */ + 12, /* size of data segment, in dwords */ + 18, /* size of code segment, in dwords */ + 10, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x40000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x30000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x1940000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates( \ + buffer) \ + do { \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid1.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid1.h new file mode 100644 index 00000000000..2207ac79e29 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid1.h @@ -0,0 +1,115 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID1_H +#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID1_H + +/* Initially generated from ARB_draw_indirect_arrays.pds */ + +static const uint32_t pvr_draw_indirect_arrays_base_instance_drawid1_code[18] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd19c0000, /* LIMM temp[7].32 = 0000 */ + 0x50141006, /* SFTLP32 temp[6].32 = temp[2].32 << 0 */ + 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */ + 0xd9840000, /* LIMM ? temp[1].32 = 0000 */ + 0xd9880000, /* LIMM ? temp[2].32 = 0000 */ + 0x04101024, /* MAD temp[8].64 = (temp[2].32 * const[2].32) + + const[4].64 */ + 0x50444002, /* SFTLP32 temp[2].32 = temp[8].32 << 0 */ + 0x912080c2, /* ADD32 temp[2].32 = temp[2].32 - const[3].32 */ + 0x9001a0e0, /* ADD32 ptemp[0].32 = const[6].32 + temp[3].32 */ + 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */ + 0x9001a121, /* ADD32 ptemp[1].32 = const[6].32 + temp[4].32 */ + 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */ + 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */ +}; + +static const struct pvr_psc_program_output + pvr_draw_indirect_arrays_base_instance_drawid1_program = { + pvr_draw_indirect_arrays_base_instance_drawid1_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 4, /* number of constant mappings */ + + 12, /* size of data segment, in dwords, aligned to 4 */ + 20, /* size of code segment, in dwords, aligned to 4 */ + 12, /* size of temp segment, in dwords, aligned to 4 */ + 12, /* size of data segment, in dwords */ + 18, /* size of code segment, in dwords */ + 12, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x60000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x430000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x2140000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates( \ + buffer) \ + do { \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid2.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid2.h new file mode 100644 index 00000000000..daf4df6c02d --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid2.h @@ -0,0 +1,115 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID2_H +#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID2_H + +/* Initially generated from ARB_draw_indirect_arrays.pds */ + +static const uint32_t pvr_draw_indirect_arrays_base_instance_drawid2_code[18] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1840000, /* LIMM temp[1].32 = 0000 */ + 0x501c1800, /* SFTLP32 temp[0].32 = temp[3].32 << 0 */ + 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */ + 0xd9880000, /* LIMM ? temp[2].32 = 0000 */ + 0xd98c0000, /* LIMM ? temp[3].32 = 0000 */ + 0x04181023, /* MAD temp[6].64 = (temp[3].32 * const[2].32) + + const[4].64 */ + 0x50343003, /* SFTLP32 temp[3].32 = temp[6].32 << 0 */ + 0x9120c0c3, /* ADD32 temp[3].32 = temp[3].32 - const[3].32 */ + 0x9001a120, /* ADD32 ptemp[0].32 = const[6].32 + temp[4].32 */ + 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */ + 0x9001a161, /* ADD32 ptemp[1].32 = const[6].32 + temp[5].32 */ + 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */ + 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */ +}; + +static const struct pvr_psc_program_output + pvr_draw_indirect_arrays_base_instance_drawid2_program = { + pvr_draw_indirect_arrays_base_instance_drawid2_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 4, /* number of constant mappings */ + + 12, /* size of data segment, in dwords, aligned to 4 */ + 20, /* size of code segment, in dwords, aligned to 4 */ + 12, /* size of temp segment, in dwords, aligned to 4 */ + 12, /* size of data segment, in dwords */ + 18, /* size of code segment, in dwords */ + 10, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x60000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x830000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x1940000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates( \ + buffer) \ + do { \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid3.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid3.h new file mode 100644 index 00000000000..56e773bd9a3 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid3.h @@ -0,0 +1,115 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID3_H +#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID3_H + +/* Initially generated from ARB_draw_indirect_arrays.pds */ + +static const uint32_t pvr_draw_indirect_arrays_base_instance_drawid3_code[18] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1840000, /* LIMM temp[1].32 = 0000 */ + 0x50242000, /* SFTLP32 temp[0].32 = temp[4].32 << 0 */ + 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */ + 0xd98c0000, /* LIMM ? temp[3].32 = 0000 */ + 0xd9900000, /* LIMM ? temp[4].32 = 0000 */ + 0x04201024, /* MAD temp[8].64 = (temp[4].32 * const[2].32) + + const[4].64 */ + 0x50444004, /* SFTLP32 temp[4].32 = temp[8].32 << 0 */ + 0x912100c4, /* ADD32 temp[4].32 = temp[4].32 - const[3].32 */ + 0x9001a160, /* ADD32 ptemp[0].32 = const[6].32 + temp[5].32 */ + 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */ + 0x9001a1a1, /* ADD32 ptemp[1].32 = const[6].32 + temp[6].32 */ + 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */ + 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */ +}; + +static const struct pvr_psc_program_output + pvr_draw_indirect_arrays_base_instance_drawid3_program = { + pvr_draw_indirect_arrays_base_instance_drawid3_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 4, /* number of constant mappings */ + + 12, /* size of data segment, in dwords, aligned to 4 */ + 20, /* size of code segment, in dwords, aligned to 4 */ + 12, /* size of temp segment, in dwords, aligned to 4 */ + 12, /* size of data segment, in dwords */ + 18, /* size of code segment, in dwords */ + 12, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x80000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0xc30000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x2140000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates( \ + buffer) \ + do { \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements0.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements0.h new file mode 100644 index 00000000000..d2f00a3f276 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements0.h @@ -0,0 +1,131 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTELEMENTS0_H +#define PVR_DRAW_INDIRECTELEMENTS0_H + +/* Initially generated from ARB_draw_indirect_elements.pds */ + +static const uint32_t pvr_draw_indirect_elements0_code[21] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0x9000a0e0, /* ADD32 ptemp[0].32 = const[2].32 + temp[3].32 */ + 0x04101822, /* MAD temp[4].64 = (temp[2].32 * const[3].32) + + const[4].64 */ + 0x53283006, /* SFTLP32 temp[6].32 = (temp[5].32 | const[6].32) << 0 + */ + 0x50242007, /* SFTLP32 temp[7].32 = temp[4].32 << 0 */ + 0x04083842, /* MAD temp[4].64 = (temp[1].32 * const[7].32) + + const[8].64 */ + 0x50242001, /* SFTLP32 temp[1].32 = temp[4].32 << 0 */ + 0x50040008, /* SFTLP32 temp[8].32 = temp[0].32 << 0 */ + 0x91204289, /* ADD32 temp[9].32 = temp[1].32 - const[10].32 */ + 0x501c180a, /* SFTLP32 temp[10].32 = temp[3].32 << 0 */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1940000, /* LIMM temp[5].32 = 0000 */ + 0x500c0804, /* SFTLP32 temp[4].32 = temp[1].32 << 0 */ + 0xb1880000, /* CMP P0 = (temp[4].64 = 0000) */ + 0xd9a00000, /* LIMM ? temp[8].32 = 0000 */ + 0xd9a40000, /* LIMM ? temp[9].32 = 0000 */ + 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */ + 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */ +}; + +static const struct pvr_psc_program_output pvr_draw_indirect_elements0_program = { + pvr_draw_indirect_elements0_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 7, /* number of constant mappings */ + + 16, /* size of data segment, in dwords, aligned to 4 */ + 24, /* size of code segment, in dwords, aligned to 4 */ + 20, /* size of temp segment, in dwords, aligned to 4 */ + 16, /* size of data segment, in dwords */ + 21, /* size of code segment, in dwords */ + 18, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ +}; + +#define pvr_write_draw_indirect_elements0_di_data(buffer, addr, device) \ + do { \ + uint64_t data = ((addr) | (0x40000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements0_write_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x1850000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 12, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements0_flush_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x3160000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 14, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements0_idx_stride(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements0_idx_base(buffer, value) \ + do { \ + uint64_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements0_idx_header(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements0_num_views(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 7, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements0_immediates(buffer) \ + do { \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements1.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements1.h new file mode 100644 index 00000000000..6ce4d636541 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements1.h @@ -0,0 +1,131 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTELEMENTS1_H +#define PVR_DRAW_INDIRECTELEMENTS1_H + +/* Initially generated from ARB_draw_indirect_elements.pds */ + +static const uint32_t pvr_draw_indirect_elements1_code[21] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0x9000a120, /* ADD32 ptemp[0].32 = const[2].32 + temp[4].32 */ + 0x04181823, /* MAD temp[6].64 = (temp[3].32 * const[3].32) + + const[4].64 */ + 0x53383008, /* SFTLP32 temp[8].32 = (temp[7].32 | const[6].32) << 0 + */ + 0x50343009, /* SFTLP32 temp[9].32 = temp[6].32 << 0 */ + 0x04103843, /* MAD temp[6].64 = (temp[2].32 * const[7].32) + + const[8].64 */ + 0x50343002, /* SFTLP32 temp[2].32 = temp[6].32 << 0 */ + 0x500c080a, /* SFTLP32 temp[10].32 = temp[1].32 << 0 */ + 0x9120828b, /* ADD32 temp[11].32 = temp[2].32 - const[10].32 */ + 0x5024200c, /* SFTLP32 temp[12].32 = temp[4].32 << 0 */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd19c0000, /* LIMM temp[7].32 = 0000 */ + 0x50141006, /* SFTLP32 temp[6].32 = temp[2].32 << 0 */ + 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */ + 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */ + 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */ + 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */ + 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */ +}; + +static const struct pvr_psc_program_output pvr_draw_indirect_elements1_program = { + pvr_draw_indirect_elements1_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 7, /* number of constant mappings */ + + 16, /* size of data segment, in dwords, aligned to 4 */ + 24, /* size of code segment, in dwords, aligned to 4 */ + 20, /* size of temp segment, in dwords, aligned to 4 */ + 16, /* size of data segment, in dwords */ + 21, /* size of code segment, in dwords */ + 20, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ +}; + +#define pvr_write_draw_indirect_elements1_di_data(buffer, addr, device) \ + do { \ + uint64_t data = ((addr) | (0x60000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements1_write_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x2050000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 12, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements1_flush_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x3960000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 14, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements1_idx_stride(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements1_idx_base(buffer, value) \ + do { \ + uint64_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements1_idx_header(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements1_num_views(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 7, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements1_immediates(buffer) \ + do { \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements2.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements2.h new file mode 100644 index 00000000000..d7823eb9a27 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements2.h @@ -0,0 +1,131 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTELEMENTS2_H +#define PVR_DRAW_INDIRECTELEMENTS2_H + +/* Initially generated from ARB_draw_indirect_elements.pds */ + +static const uint32_t pvr_draw_indirect_elements2_code[21] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0x9000a160, /* ADD32 ptemp[0].32 = const[2].32 + temp[5].32 */ + 0x04201820, /* MAD temp[0].64 = (temp[4].32 * const[3].32) + + const[4].64 */ + 0x53083006, /* SFTLP32 temp[6].32 = (temp[1].32 | const[6].32) << 0 + */ + 0x50040007, /* SFTLP32 temp[7].32 = temp[0].32 << 0 */ + 0x04183840, /* MAD temp[0].64 = (temp[3].32 * const[7].32) + + const[8].64 */ + 0x50040003, /* SFTLP32 temp[3].32 = temp[0].32 << 0 */ + 0x50141008, /* SFTLP32 temp[8].32 = temp[2].32 << 0 */ + 0x9120c289, /* ADD32 temp[9].32 = temp[3].32 - const[10].32 */ + 0x502c280a, /* SFTLP32 temp[10].32 = temp[5].32 << 0 */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1840000, /* LIMM temp[1].32 = 0000 */ + 0x501c1800, /* SFTLP32 temp[0].32 = temp[3].32 << 0 */ + 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */ + 0xd9a00000, /* LIMM ? temp[8].32 = 0000 */ + 0xd9a40000, /* LIMM ? temp[9].32 = 0000 */ + 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */ + 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */ +}; + +static const struct pvr_psc_program_output pvr_draw_indirect_elements2_program = { + pvr_draw_indirect_elements2_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 7, /* number of constant mappings */ + + 16, /* size of data segment, in dwords, aligned to 4 */ + 24, /* size of code segment, in dwords, aligned to 4 */ + 20, /* size of temp segment, in dwords, aligned to 4 */ + 16, /* size of data segment, in dwords */ + 21, /* size of code segment, in dwords */ + 18, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ +}; + +#define pvr_write_draw_indirect_elements2_di_data(buffer, addr, device) \ + do { \ + uint64_t data = ((addr) | (0x60000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements2_write_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x1850000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 12, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements2_flush_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x3160000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 14, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements2_idx_stride(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements2_idx_base(buffer, value) \ + do { \ + uint64_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements2_idx_header(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements2_num_views(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 7, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements2_immediates(buffer) \ + do { \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements3.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements3.h new file mode 100644 index 00000000000..27f59e3c68d --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements3.h @@ -0,0 +1,131 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTELEMENTS3_H +#define PVR_DRAW_INDIRECTELEMENTS3_H + +/* Initially generated from ARB_draw_indirect_elements.pds */ + +static const uint32_t pvr_draw_indirect_elements3_code[21] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0x9000a1a0, /* ADD32 ptemp[0].32 = const[2].32 + temp[6].32 */ + 0x04281820, /* MAD temp[0].64 = (temp[5].32 * const[3].32) + + const[4].64 */ + 0x53083007, /* SFTLP32 temp[7].32 = (temp[1].32 | const[6].32) << 0 + */ + 0x50040008, /* SFTLP32 temp[8].32 = temp[0].32 << 0 */ + 0x04203840, /* MAD temp[0].64 = (temp[4].32 * const[7].32) + + const[8].64 */ + 0x50040004, /* SFTLP32 temp[4].32 = temp[0].32 << 0 */ + 0x501c1809, /* SFTLP32 temp[9].32 = temp[3].32 << 0 */ + 0x9121028a, /* ADD32 temp[10].32 = temp[4].32 - const[10].32 */ + 0x5034300b, /* SFTLP32 temp[11].32 = temp[6].32 << 0 */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1840000, /* LIMM temp[1].32 = 0000 */ + 0x50242000, /* SFTLP32 temp[0].32 = temp[4].32 << 0 */ + 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */ + 0xd9a40000, /* LIMM ? temp[9].32 = 0000 */ + 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */ + 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */ + 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */ +}; + +static const struct pvr_psc_program_output pvr_draw_indirect_elements3_program = { + pvr_draw_indirect_elements3_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 7, /* number of constant mappings */ + + 16, /* size of data segment, in dwords, aligned to 4 */ + 24, /* size of code segment, in dwords, aligned to 4 */ + 20, /* size of temp segment, in dwords, aligned to 4 */ + 16, /* size of data segment, in dwords */ + 21, /* size of code segment, in dwords */ + 18, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ +}; + +#define pvr_write_draw_indirect_elements3_di_data(buffer, addr, device) \ + do { \ + uint64_t data = ((addr) | (0x80000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements3_write_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x1c50000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 12, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements3_flush_vdm(buffer, addr) \ + do { \ + uint64_t data = ((addr) | (0x3160000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 14, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements3_idx_stride(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements3_idx_base(buffer, value) \ + do { \ + uint64_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements3_idx_header(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements3_num_views(buffer, value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 7, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements3_immediates(buffer) \ + do { \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance0.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance0.h new file mode 100644 index 00000000000..6ac6d8bc2a8 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance0.h @@ -0,0 +1,142 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE0_H +#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE0_H + +/* Initially generated from ARB_draw_indirect_elements.pds */ + +static const uint32_t pvr_draw_indirect_elements_base_instance0_code[23] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0x9000a0e0, /* ADD32 ptemp[0].32 = const[2].32 + temp[3].32 */ + 0x9000a121, /* ADD32 ptemp[1].32 = const[2].32 + temp[4].32 */ + 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */ + 0x04101823, /* MAD temp[6].64 = (temp[2].32 * const[3].32) + + const[4].64 */ + 0x53383008, /* SFTLP32 temp[8].32 = (temp[7].32 | const[6].32) << 0 + */ + 0x50343009, /* SFTLP32 temp[9].32 = temp[6].32 << 0 */ + 0x04083843, /* MAD temp[6].64 = (temp[1].32 * const[7].32) + + const[8].64 */ + 0x50343001, /* SFTLP32 temp[1].32 = temp[6].32 << 0 */ + 0x5004000a, /* SFTLP32 temp[10].32 = temp[0].32 << 0 */ + 0x9120428b, /* ADD32 temp[11].32 = temp[1].32 - const[10].32 */ + 0x501c180c, /* SFTLP32 temp[12].32 = temp[3].32 << 0 */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd19c0000, /* LIMM temp[7].32 = 0000 */ + 0x500c0806, /* SFTLP32 temp[6].32 = temp[1].32 << 0 */ + 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */ + 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */ + 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */ + 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */ + 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */ +}; + +static const struct pvr_psc_program_output + pvr_draw_indirect_elements_base_instance0_program = { + pvr_draw_indirect_elements_base_instance0_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 7, /* number of constant mappings */ + + 16, /* size of data segment, in dwords, aligned to 4 */ + 24, /* size of code segment, in dwords, aligned to 4 */ + 20, /* size of temp segment, in dwords, aligned to 4 */ + 16, /* size of data segment, in dwords */ + 23, /* size of code segment, in dwords */ + 20, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_elements_base_instance0_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x60000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance0_write_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x2050000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 12, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance0_flush_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x3960000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 14, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance0_idx_stride(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance0_idx_base(buffer, \ + value) \ + do { \ + uint64_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance0_idx_header(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance0_num_views(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 7, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance0_immediates(buffer) \ + do { \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance1.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance1.h new file mode 100644 index 00000000000..ba91dc8e675 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance1.h @@ -0,0 +1,142 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE1_H +#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE1_H + +/* Initially generated from ARB_draw_indirect_elements.pds */ + +static const uint32_t pvr_draw_indirect_elements_base_instance1_code[23] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0x9000a120, /* ADD32 ptemp[0].32 = const[2].32 + temp[4].32 */ + 0x9000a161, /* ADD32 ptemp[1].32 = const[2].32 + temp[5].32 */ + 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */ + 0x04181823, /* MAD temp[6].64 = (temp[3].32 * const[3].32) + + const[4].64 */ + 0x53383008, /* SFTLP32 temp[8].32 = (temp[7].32 | const[6].32) << 0 + */ + 0x50343009, /* SFTLP32 temp[9].32 = temp[6].32 << 0 */ + 0x04103843, /* MAD temp[6].64 = (temp[2].32 * const[7].32) + + const[8].64 */ + 0x50343002, /* SFTLP32 temp[2].32 = temp[6].32 << 0 */ + 0x500c080a, /* SFTLP32 temp[10].32 = temp[1].32 << 0 */ + 0x9120828b, /* ADD32 temp[11].32 = temp[2].32 - const[10].32 */ + 0x5024200c, /* SFTLP32 temp[12].32 = temp[4].32 << 0 */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd19c0000, /* LIMM temp[7].32 = 0000 */ + 0x50141006, /* SFTLP32 temp[6].32 = temp[2].32 << 0 */ + 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */ + 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */ + 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */ + 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */ + 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */ +}; + +static const struct pvr_psc_program_output + pvr_draw_indirect_elements_base_instance1_program = { + pvr_draw_indirect_elements_base_instance1_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 7, /* number of constant mappings */ + + 16, /* size of data segment, in dwords, aligned to 4 */ + 24, /* size of code segment, in dwords, aligned to 4 */ + 20, /* size of temp segment, in dwords, aligned to 4 */ + 16, /* size of data segment, in dwords */ + 23, /* size of code segment, in dwords */ + 20, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_elements_base_instance1_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x60000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance1_write_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x2050000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 12, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance1_flush_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x3960000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 14, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance1_idx_stride(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance1_idx_base(buffer, \ + value) \ + do { \ + uint64_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance1_idx_header(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance1_num_views(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 7, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance1_immediates(buffer) \ + do { \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance2.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance2.h new file mode 100644 index 00000000000..a6515b3dcd9 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance2.h @@ -0,0 +1,142 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE2_H +#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE2_H + +/* Initially generated from ARB_draw_indirect_elements.pds */ + +static const uint32_t pvr_draw_indirect_elements_base_instance2_code[23] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0x9000a160, /* ADD32 ptemp[0].32 = const[2].32 + temp[5].32 */ + 0x9000a1a1, /* ADD32 ptemp[1].32 = const[2].32 + temp[6].32 */ + 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */ + 0x04201820, /* MAD temp[0].64 = (temp[4].32 * const[3].32) + + const[4].64 */ + 0x53083007, /* SFTLP32 temp[7].32 = (temp[1].32 | const[6].32) << 0 + */ + 0x50040008, /* SFTLP32 temp[8].32 = temp[0].32 << 0 */ + 0x04183840, /* MAD temp[0].64 = (temp[3].32 * const[7].32) + + const[8].64 */ + 0x50040003, /* SFTLP32 temp[3].32 = temp[0].32 << 0 */ + 0x50141009, /* SFTLP32 temp[9].32 = temp[2].32 << 0 */ + 0x9120c28a, /* ADD32 temp[10].32 = temp[3].32 - const[10].32 */ + 0x502c280b, /* SFTLP32 temp[11].32 = temp[5].32 << 0 */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1840000, /* LIMM temp[1].32 = 0000 */ + 0x501c1800, /* SFTLP32 temp[0].32 = temp[3].32 << 0 */ + 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */ + 0xd9a40000, /* LIMM ? temp[9].32 = 0000 */ + 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */ + 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */ + 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */ +}; + +static const struct pvr_psc_program_output + pvr_draw_indirect_elements_base_instance2_program = { + pvr_draw_indirect_elements_base_instance2_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 7, /* number of constant mappings */ + + 16, /* size of data segment, in dwords, aligned to 4 */ + 24, /* size of code segment, in dwords, aligned to 4 */ + 20, /* size of temp segment, in dwords, aligned to 4 */ + 16, /* size of data segment, in dwords */ + 23, /* size of code segment, in dwords */ + 18, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_elements_base_instance2_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x80000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance2_write_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x1c50000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 12, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance2_flush_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x3160000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 14, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance2_idx_stride(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance2_idx_base(buffer, \ + value) \ + do { \ + uint64_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance2_idx_header(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance2_num_views(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 7, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance2_immediates(buffer) \ + do { \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance3.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance3.h new file mode 100644 index 00000000000..7a188884ee3 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance3.h @@ -0,0 +1,142 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE3_H +#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE3_H + +/* Initially generated from ARB_draw_indirect_elements.pds */ + +static const uint32_t pvr_draw_indirect_elements_base_instance3_code[23] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0x9000a1a0, /* ADD32 ptemp[0].32 = const[2].32 + temp[6].32 */ + 0x9000a1e1, /* ADD32 ptemp[1].32 = const[2].32 + temp[7].32 */ + 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */ + 0x04281820, /* MAD temp[0].64 = (temp[5].32 * const[3].32) + + const[4].64 */ + 0x53083008, /* SFTLP32 temp[8].32 = (temp[1].32 | const[6].32) << 0 + */ + 0x50040009, /* SFTLP32 temp[9].32 = temp[0].32 << 0 */ + 0x04203840, /* MAD temp[0].64 = (temp[4].32 * const[7].32) + + const[8].64 */ + 0x50040004, /* SFTLP32 temp[4].32 = temp[0].32 << 0 */ + 0x501c180a, /* SFTLP32 temp[10].32 = temp[3].32 << 0 */ + 0x9121028b, /* ADD32 temp[11].32 = temp[4].32 - const[10].32 */ + 0x5034300c, /* SFTLP32 temp[12].32 = temp[6].32 << 0 */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1840000, /* LIMM temp[1].32 = 0000 */ + 0x50242000, /* SFTLP32 temp[0].32 = temp[4].32 << 0 */ + 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */ + 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */ + 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */ + 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */ + 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */ +}; + +static const struct pvr_psc_program_output + pvr_draw_indirect_elements_base_instance3_program = { + pvr_draw_indirect_elements_base_instance3_code, /* code segment */ + 0, /* constant mappings, zeroed since we use the macros below */ + 7, /* number of constant mappings */ + + 16, /* size of data segment, in dwords, aligned to 4 */ + 24, /* size of code segment, in dwords, aligned to 4 */ + 20, /* size of temp segment, in dwords, aligned to 4 */ + 16, /* size of data segment, in dwords */ + 23, /* size of code segment, in dwords */ + 20, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_elements_base_instance3_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x80000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance3_write_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x2050000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 12, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance3_flush_vdm(buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x3960000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 14, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance3_idx_stride(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance3_idx_base(buffer, \ + value) \ + do { \ + uint64_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance3_idx_header(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance3_num_views(buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 7, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance3_immediates(buffer) \ + do { \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid0.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid0.h new file mode 100644 index 00000000000..885c00cde4d --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid0.h @@ -0,0 +1,152 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID0_H +#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID0_H + +/* Initially generated from ARB_draw_indirect_elements.pds */ + +static const uint32_t + pvr_draw_indirect_elements_base_instance_drawid0_code[23] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0x9000a0e0, /* ADD32 ptemp[0].32 = const[2].32 + temp[3].32 */ + 0x9000a121, /* ADD32 ptemp[1].32 = const[2].32 + temp[4].32 */ + 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */ + 0x04102033, /* MAD temp[6].64 = (temp[2].32 * const[4].32) + + const[6].64 */ + 0x53382808, /* SFTLP32 temp[8].32 = (temp[7].32 | const[5].32) << + * 0 + */ + 0x50343009, /* SFTLP32 temp[9].32 = temp[6].32 << 0 */ + 0x04084053, /* MAD temp[6].64 = (temp[1].32 * const[8].32) + + const[10].64 */ + 0x50343001, /* SFTLP32 temp[1].32 = temp[6].32 << 0 */ + 0x5004000a, /* SFTLP32 temp[10].32 = temp[0].32 << 0 */ + 0x912040cb, /* ADD32 temp[11].32 = temp[1].32 - const[3].32 */ + 0x501c180c, /* SFTLP32 temp[12].32 = temp[3].32 << 0 */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd19c0000, /* LIMM temp[7].32 = 0000 */ + 0x500c0806, /* SFTLP32 temp[6].32 = temp[1].32 << 0 */ + 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */ + 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */ + 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */ + 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */ + 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */ + }; + +static const struct pvr_psc_program_output + pvr_draw_indirect_elements_base_instance_drawid0_program = { + pvr_draw_indirect_elements_base_instance_drawid0_code, /* code segment + */ + 0, /* constant mappings, zeroed since we use the macros below */ + 7, /* number of constant mappings */ + + 16, /* size of data segment, in dwords, aligned to 4 */ + 24, /* size of code segment, in dwords, aligned to 4 */ + 20, /* size of temp segment, in dwords, aligned to 4 */ + 16, /* size of data segment, in dwords */ + 23, /* size of code segment, in dwords */ + 20, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_elements_base_instance_drawid0_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x60000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm( \ + buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x2050000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 12, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm( \ + buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x3960000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 14, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride( \ + buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base( \ + buffer, \ + value) \ + do { \ + uint64_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header( \ + buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 5, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid0_num_views( \ + buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid0_immediates( \ + buffer) \ + do { \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid1.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid1.h new file mode 100644 index 00000000000..aa4dae77cef --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid1.h @@ -0,0 +1,152 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID1_H +#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID1_H + +/* Initially generated from ARB_draw_indirect_elements.pds */ + +static const uint32_t + pvr_draw_indirect_elements_base_instance_drawid1_code[23] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0x9000a120, /* ADD32 ptemp[0].32 = const[2].32 + temp[4].32 */ + 0x9000a161, /* ADD32 ptemp[1].32 = const[2].32 + temp[5].32 */ + 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */ + 0x04182033, /* MAD temp[6].64 = (temp[3].32 * const[4].32) + + const[6].64 */ + 0x53382808, /* SFTLP32 temp[8].32 = (temp[7].32 | const[5].32) << + * 0 + */ + 0x50343009, /* SFTLP32 temp[9].32 = temp[6].32 << 0 */ + 0x04104053, /* MAD temp[6].64 = (temp[2].32 * const[8].32) + + const[10].64 */ + 0x50343002, /* SFTLP32 temp[2].32 = temp[6].32 << 0 */ + 0x500c080a, /* SFTLP32 temp[10].32 = temp[1].32 << 0 */ + 0x912080cb, /* ADD32 temp[11].32 = temp[2].32 - const[3].32 */ + 0x5024200c, /* SFTLP32 temp[12].32 = temp[4].32 << 0 */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd19c0000, /* LIMM temp[7].32 = 0000 */ + 0x50141006, /* SFTLP32 temp[6].32 = temp[2].32 << 0 */ + 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */ + 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */ + 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */ + 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */ + 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */ + }; + +static const struct pvr_psc_program_output + pvr_draw_indirect_elements_base_instance_drawid1_program = { + pvr_draw_indirect_elements_base_instance_drawid1_code, /* code segment + */ + 0, /* constant mappings, zeroed since we use the macros below */ + 7, /* number of constant mappings */ + + 16, /* size of data segment, in dwords, aligned to 4 */ + 24, /* size of code segment, in dwords, aligned to 4 */ + 20, /* size of temp segment, in dwords, aligned to 4 */ + 16, /* size of data segment, in dwords */ + 23, /* size of code segment, in dwords */ + 20, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_elements_base_instance_drawid1_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x60000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm( \ + buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x2050000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 12, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm( \ + buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x3960000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 14, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride( \ + buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base( \ + buffer, \ + value) \ + do { \ + uint64_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header( \ + buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 5, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid1_num_views( \ + buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid1_immediates( \ + buffer) \ + do { \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid2.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid2.h new file mode 100644 index 00000000000..a6449f7ea57 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid2.h @@ -0,0 +1,152 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID2_H +#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID2_H + +/* Initially generated from ARB_draw_indirect_elements.pds */ + +static const uint32_t + pvr_draw_indirect_elements_base_instance_drawid2_code[23] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0x9000a160, /* ADD32 ptemp[0].32 = const[2].32 + temp[5].32 */ + 0x9000a1a1, /* ADD32 ptemp[1].32 = const[2].32 + temp[6].32 */ + 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */ + 0x04202030, /* MAD temp[0].64 = (temp[4].32 * const[4].32) + + const[6].64 */ + 0x53082807, /* SFTLP32 temp[7].32 = (temp[1].32 | const[5].32) << + * 0 + */ + 0x50040008, /* SFTLP32 temp[8].32 = temp[0].32 << 0 */ + 0x04184050, /* MAD temp[0].64 = (temp[3].32 * const[8].32) + + const[10].64 */ + 0x50040003, /* SFTLP32 temp[3].32 = temp[0].32 << 0 */ + 0x50141009, /* SFTLP32 temp[9].32 = temp[2].32 << 0 */ + 0x9120c0ca, /* ADD32 temp[10].32 = temp[3].32 - const[3].32 */ + 0x502c280b, /* SFTLP32 temp[11].32 = temp[5].32 << 0 */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1840000, /* LIMM temp[1].32 = 0000 */ + 0x501c1800, /* SFTLP32 temp[0].32 = temp[3].32 << 0 */ + 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */ + 0xd9a40000, /* LIMM ? temp[9].32 = 0000 */ + 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */ + 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */ + 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */ + }; + +static const struct pvr_psc_program_output + pvr_draw_indirect_elements_base_instance_drawid2_program = { + pvr_draw_indirect_elements_base_instance_drawid2_code, /* code segment + */ + 0, /* constant mappings, zeroed since we use the macros below */ + 7, /* number of constant mappings */ + + 16, /* size of data segment, in dwords, aligned to 4 */ + 24, /* size of code segment, in dwords, aligned to 4 */ + 20, /* size of temp segment, in dwords, aligned to 4 */ + 16, /* size of data segment, in dwords */ + 23, /* size of code segment, in dwords */ + 18, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_elements_base_instance_drawid2_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x80000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm( \ + buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x1c50000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 12, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm( \ + buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x3160000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 14, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride( \ + buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base( \ + buffer, \ + value) \ + do { \ + uint64_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header( \ + buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 5, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid2_num_views( \ + buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid2_immediates( \ + buffer) \ + do { \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid3.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid3.h new file mode 100644 index 00000000000..73ae0ee2403 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid3.h @@ -0,0 +1,152 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID3_H +#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID3_H + +/* Initially generated from ARB_draw_indirect_elements.pds */ + +static const uint32_t + pvr_draw_indirect_elements_base_instance_drawid3_code[23] = { + 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0x9000a1a0, /* ADD32 ptemp[0].32 = const[2].32 + temp[6].32 */ + 0x9000a1e1, /* ADD32 ptemp[1].32 = const[2].32 + temp[7].32 */ + 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */ + 0x04282030, /* MAD temp[0].64 = (temp[5].32 * const[4].32) + + const[6].64 */ + 0x53082808, /* SFTLP32 temp[8].32 = (temp[1].32 | const[5].32) << + * 0 + */ + 0x50040009, /* SFTLP32 temp[9].32 = temp[0].32 << 0 */ + 0x04204050, /* MAD temp[0].64 = (temp[4].32 * const[8].32) + + const[10].64 */ + 0x50040004, /* SFTLP32 temp[4].32 = temp[0].32 << 0 */ + 0x501c180a, /* SFTLP32 temp[10].32 = temp[3].32 << 0 */ + 0x912100cb, /* ADD32 temp[11].32 = temp[4].32 - const[3].32 */ + 0x5034300c, /* SFTLP32 temp[12].32 = temp[6].32 << 0 */ + 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */ + 0xd1840000, /* LIMM temp[1].32 = 0000 */ + 0x50242000, /* SFTLP32 temp[0].32 = temp[4].32 << 0 */ + 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */ + 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */ + 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */ + 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */ + 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */ + 0xd1000000, /* WDF */ + 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */ + }; + +static const struct pvr_psc_program_output + pvr_draw_indirect_elements_base_instance_drawid3_program = { + pvr_draw_indirect_elements_base_instance_drawid3_code, /* code segment + */ + 0, /* constant mappings, zeroed since we use the macros below */ + 7, /* number of constant mappings */ + + 16, /* size of data segment, in dwords, aligned to 4 */ + 24, /* size of code segment, in dwords, aligned to 4 */ + 20, /* size of temp segment, in dwords, aligned to 4 */ + 16, /* size of data segment, in dwords */ + 23, /* size of code segment, in dwords */ + 20, /* size of temp segment, in dwords */ + NULL /* function pointer to write data segment */ + }; + +#define pvr_write_draw_indirect_elements_base_instance_drawid3_di_data(buffer, \ + addr, \ + device) \ + do { \ + uint64_t data = ((addr) | (0x80000000000ULL) | \ + ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 0, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm( \ + buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x2050000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 12, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm( \ + buffer, \ + addr) \ + do { \ + uint64_t data = ((addr) | (0x3960000000000ULL)); \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 14, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride( \ + buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 4, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base( \ + buffer, \ + value) \ + do { \ + uint64_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 6, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header( \ + buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 5, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid3_num_views( \ + buffer, \ + value) \ + do { \ + uint32_t data = value; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 8, &data, sizeof(data)); \ + } while (0) +#define pvr_write_draw_indirect_elements_base_instance_drawid3_immediates( \ + buffer) \ + do { \ + { \ + uint32_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 2, &data, sizeof(data)); \ + } \ + { \ + uint32_t data = 0x1; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 3, &data, sizeof(data)); \ + } \ + { \ + uint64_t data = 0x0; \ + PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \ + memcpy(buffer + 10, &data, sizeof(data)); \ + } \ + } while (0) +#endif diff --git a/src/imagination/vulkan/pds/pvr_rogue_pds_defs.h b/src/imagination/vulkan/pds/pvr_rogue_pds_defs.h new file mode 100644 index 00000000000..249b688d559 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_rogue_pds_defs.h @@ -0,0 +1,1661 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_ROGUE_PDS_DEFS_H +#define PVR_ROGUE_PDS_DEFS_H + +#include + +/* Instruction type C */ +#define PVR_ROGUE_PDSINST_OPCODEC_MASK (0x0000000FU) +/* 64 bit add*/ +#define PVR_ROGUE_PDSINST_OPCODEC_ADD64 UINT32_C(0x00000008) +/* 32 bit add*/ +#define PVR_ROGUE_PDSINST_OPCODEC_ADD32 UINT32_C(0x00000009) +/* Shift and/or Logic Operation (64 bit)*/ +#define PVR_ROGUE_PDSINST_OPCODEC_SFTLP64 UINT32_C(0x0000000a) +/* Compare and set predicate*/ +#define PVR_ROGUE_PDSINST_OPCODEC_CMP UINT32_C(0x0000000b) +/* Branch and/or select predicate*/ +#define PVR_ROGUE_PDSINST_OPCODEC_BRA UINT32_C(0x0000000c) +/* Umbrella OpcodeSP instructions*/ +#define PVR_ROGUE_PDSINST_OPCODEC_SP UINT32_C(0x0000000d) +/* Multiply Accumulate with DOUD*/ +#define PVR_ROGUE_PDSINST_OPCODEC_DDMAD UINT32_C(0x0000000e) +/* DOUT Command*/ +#define PVR_ROGUE_PDSINST_OPCODEC_DOUT UINT32_C(0x0000000f) + +/* Logical Operation */ +#define PVR_ROGUE_PDSINST_LOP_MASK (0x00000007U) +#define PVR_ROGUE_PDSINST_LOP_NONE (0x00000000U) +#define PVR_ROGUE_PDSINST_LOP_NOT (0x00000001U) +#define PVR_ROGUE_PDSINST_LOP_AND (0x00000002U) +#define PVR_ROGUE_PDSINST_LOP_OR (0x00000003U) +#define PVR_ROGUE_PDSINST_LOP_XOR (0x00000004U) +#define PVR_ROGUE_PDSINST_LOP_XNOR (0x00000005U) +#define PVR_ROGUE_PDSINST_LOP_NAND (0x00000006U) +#define PVR_ROGUE_PDSINST_LOP_NOR (0x00000007U) + +/* 64-bit Source Temps and Persistent Temps. */ +#define PVR_ROGUE_PDSINST_REGS64TP_MASK (0x0000001FU) +#define PVR_ROGUE_PDSINST_REGS64TP_TEMP64 (0U) +#define PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER (0U) +#define PVR_ROGUE_PDSINST_REGS64TP_TEMP64_UPPER (15U) +#define PVR_ROGUE_PDSINST_REGS64TP_PTEMP64 (1U) +#define PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER (16U) +#define PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_UPPER (31U) + +/* 32-bit Registers - 32-bit aligned. */ +#define PVR_ROGUE_PDSINST_REGS32_MASK (0x000000FFU) +#define PVR_ROGUE_PDSINST_REGS32_CONST32 (0U) +#define PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER (0U) +#define PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER (127U) +#define PVR_ROGUE_PDSINST_REGS32_TEMP32 (1U) +#define PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER (128U) +#define PVR_ROGUE_PDSINST_REGS32_TEMP32_UPPER (159U) +#define PVR_ROGUE_PDSINST_REGS32_PTEMP32 (2U) +#define PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER (192U) +#define PVR_ROGUE_PDSINST_REGS32_PTEMP32_UPPER (223U) + +/* cc ? if im then + * cc ? dst = (*src0 lop *src1) << src2 + * cc ? else + * cc ? dst = (*src0 lop *src1) << *src2 + * + * Take the logical operation of the 2 sources, and shift to a 64 bit result. + * For unary operator NOT, *src0 is taken as the logical operand; for operator + * NONE, an unmodified *src0 is shifted. If IM is set use SFT as a direct shift + * value, otherwise use an address to obtain the shift value. The shift value + * (SRC2) is treated as a 2's complement encoded signed value. A negative value + * encodes a right shift. Values are clamped to the range [-63,63]. + */ +#define PVR_ROGUE_PDSINST_SFTLP64_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_SFTLP64_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_SFTLP64_OPCODE_DEFAULT (0xA0000000U) /* SFTLP64 */ +#define PVR_ROGUE_PDSINST_SFTLP64_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_SFTLP64_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_SFTLP64_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_SFTLP64_LOP_SHIFT (24U) +#define PVR_ROGUE_PDSINST_SFTLP64_IM_SHIFT (23U) +#define PVR_ROGUE_PDSINST_SFTLP64_IM_ENABLE (0x00800000U) +#define PVR_ROGUE_PDSINST_SFTLP64_SRC0_SHIFT (18U) +#define PVR_ROGUE_PDSINST_SFTLP64_SRC1_SHIFT (13U) +#define PVR_ROGUE_PDSINST_SFTLP64_SRC2_SHIFT (5U) +#define PVR_ROGUE_PDSINST_SFTLP64_DST_SHIFT (0U) + +/* Instruction type B */ +#define PVR_ROGUE_PDSINST_OPCODEB_MASK (0x00000007U) +/* Shift and/or Logic Operation (32 bit) */ +#define PVR_ROGUE_PDSINST_OPCODEB_SFTLP32 UINT32_C(0x00000002) +/* Vertex Stream Out DMA Command */ +#define PVR_ROGUE_PDSINST_OPCODEB_STM UINT32_C(0x00000003) + +/* 32-bit Source Temps. */ +#define PVR_ROGUE_PDSINST_REGS32T_MASK (0x0000001FU) +#define PVR_ROGUE_PDSINST_REGS32T_TEMP32 (0U) +#define PVR_ROGUE_PDSINST_REGS32T_TEMP32_LOWER (0U) +#define PVR_ROGUE_PDSINST_REGS32T_TEMP32_UPPER (31U) + +/* 32-bit Source Temps and Persistent Temps. */ +#define PVR_ROGUE_PDSINST_REGS32TP_MASK (0x0000003FU) +#define PVR_ROGUE_PDSINST_REGS32TP_TEMP32 (0U) +#define PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER (0U) +#define PVR_ROGUE_PDSINST_REGS32TP_TEMP32_UPPER (31U) +#define PVR_ROGUE_PDSINST_REGS32TP_PTEMP32 (1U) +#define PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER (32U) +#define PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_UPPER (63U) + +/* cc ? if im then + * cc ? dst = (*src0 lop *src1) << src2 + * cc ? else + * cc ? dst = (*src0 lop *src1) << *src2 + * + * Take the logical operation of the 2 sources, and shift to a 32 bit result. + * For unary operator NOT, *src0 is taken as the logical operand; for operator + * NONE, an unmodified *src0 is shifted.If IM is set, use the shift value SFT + * (SRC2) as a direct shift value, otherwise use an address to obtain the shift + * value. SFT (SRC2) is treated as a 2's complement encoded signed value. A + * negative value encodes a right shift. Values are clamped to the range + * [-31,31]. + */ +#define PVR_ROGUE_PDSINST_SFTLP32_OPCODE_SHIFT (29U) +#define PVR_ROGUE_PDSINST_SFTLP32_OPCODE_CLRMSK (0x1FFFFFFFU) +#define PVR_ROGUE_PDSINST_SFTLP32_OPCODE_DEFAULT (0x40000000U) /* SFTLP32 */ +#define PVR_ROGUE_PDSINST_SFTLP32_IM_SHIFT (28U) +#define PVR_ROGUE_PDSINST_SFTLP32_IM_ENABLE (0x10000000U) +#define PVR_ROGUE_PDSINST_SFTLP32_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_SFTLP32_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_SFTLP32_LOP_SHIFT (24U) +#define PVR_ROGUE_PDSINST_SFTLP32_SRC0_SHIFT (19U) +#define PVR_ROGUE_PDSINST_SFTLP32_SRC1_SHIFT (11U) +#define PVR_ROGUE_PDSINST_SFTLP32_SRC2_SHIFT (5U) +#define PVR_ROGUE_PDSINST_SFTLP32_DST_SHIFT (0U) + +/* The stream being processed within the vertex, selects 1 of 4 streams. */ +#define PVR_ROGUE_PDSINST_SO_MASK (0x00000003U) + +/* An instruction to enable the 'Streaming Out' of data to memory. + * + * This instruction can only be used when called from a Stream Output Program + * (see + * + * Stream output configuration words, as it reads its source data from unified + * vertex store within the TA. + * + * Stream Out programs use the vertex data master, but are called from the TA. + * They do not execute on the USC. If synchronization is required with the + * control stream to the next draw call, a DOUTV command must be used when + * stream out finishes for the current draw call. The VDM must have a + * corresponding entry in the control stream indicating when it should wait for + * the PDS. + * + * As SRC0, SRC1 needs to be held from program to program it is assumed these + * are in persistent temps. There are 32 (dword) persistent temps, 8 of which + * are required to support 4 streams. The driver needs to manage the allocation + * of these. If the value needs to be carried from one geometry job to another, + * it will need to be loaded from memory at the start of the geometry job, and + * stored at the end of it (using a state program in the input control stream). + * + * When a new buffer is altered which was in use, the driver will need to fence + * in order to make sure that the preceding operation have completed before the + * persistent temps are updated. + * + * It is assumed that the USC compiler will optimize the stream order to keep + * data which is contiguous in the output vertex (going to memory) + * together. This will enable multiple words to be streamed out in a single + * DMA. This will reduce the processing load on the TA. + * + * The sources are read from within the constant, temporary stores of the PDS, + * and have the following meaning. + * + * If the buffer is being appended to then persistent constants need to be + * stored to memory at the end of the geometry job, and reloaded at the start + * of the next job (as another context may be run). + * + * ccs ? if (so_address + (so_vosize * so_primtype)) <= so_limit then + * + * dma the data from the vbg, and write it into memory. so_vioff is + * an offset into the current vertex. + * ccs ? for (so_vertex=0 ; so_vertex < so_primtype; so_vertex++) + * ccs ? for (i=0 ; i < so_dmasize; i++) + * ccs ? *(so_address + so_vooff + i + (so_vertex * so_vosize)) = + * readvertexvbg(so_vioff + i + (so_vertex * stream_size)) + * + * ccs ? if so_eop then + * ccs ? so_address = so_address + (so_vosize * so_primtype) + * ccs ? so_primwritten = so_primwritten + 1 + * ccs ? + * end if + * + * ccs ? else + * + * ccs ? setp(so_overflow_predicate[so]) + * ccs ? [so_overflow_predicate[global]] + * + * ccs ? end if + * + * if so_eop then + * so_primneeded = so_primneeded + 1 + * end if + * + * The VBG presents a stream when outputted from the shader. A bit is set in the + * input register indicating which stream is present. The PDS is called on a per + * primitive basis. In simple geometry this is per input triangle, strip etc., + * in geometry shader land this is per output primitive from the geometry + * shader. Primitives are unraveled to remove vertex sharing. The PDS is called + * in submission order. The PDS program needs to be written for the primitive + * which is being emitted. + * + * Example + * + * Data is actually going into three buffers (this is defined elsewhere). + * SO_VERTEX0.Pos.XY -> buffer0 + * SO_VERTEX0.Mult.XY -> buffer0 + * SO_VERTEX1.Add.XY -> buffer1 + * + * SO_VERTEX0.Pos.ZW -> buffer2 + * + * Persistent temps: + * pt0 = Buffer0 start address; + * pt1 = Buffer1 start address; + * pt2 = Buffer2 start address; + * pt3 = 0 (buffer0 primwritten/needed) + * pt4 = 0 (buffer1 primwritten/needed) + * pt5 = 0 (buffer2 primwritten/needed) + * + * Constants: + * c0 = Buffer 0 top + * c1 = Buffer 1 top + * c2 = Buffer 2 top + * c3 = SRC2,3 for Pos.XY: VOOFF = 0, DMASIZE = 2, SO_VIOFF = 0, EOP = 0 + * c4 = SRC2,3 for Mult: VOSIZE = 4, VOOFF = 2, DMASIZE = 2, SO_VIOFF = 2, EOP = + * 1 c5 = SRC2,3 for Pos.ZW: VOSIZE=2, VOOFF = 0, DMASIZE = 2, SO_VIOFF = 0, EOP + * = 1 c6 = SRC2,3 for Add: VOSIZE=2, VOOFF = 0, DMASIZE = 2, SO_VIOFF = 0, EOP + * = 1 + * + * ifstream0 { + * + * # Write Pos.XY + * STM SO=0, SRC3=c0, SRC2=c3, SRC1=pt3, SRC0=pt0 + * STM SO=0, SRC3=c0, SRC2=c4, SRC1=pt3, SRC0=pt0 + * #Write Pos.ZW to buffer 1 and advance + * STM SO=0, SRC3=c2, SRC2=c5, SRC1=pt5, SRC0=pt2 + * + * } + * + * else if stream1 { + * + * #Write Add to buffer 1 and advance + * STM S0=1, SRC3=c1, SRC2=c6, SRC1=pt4, SRC0=pt1 + * + * } + */ +#define PVR_ROGUE_PDSINST_STM_OPCODE_SHIFT (29U) +#define PVR_ROGUE_PDSINST_STM_CCS_CCS_GLOBAL_SHIFT (28U) +#define PVR_ROGUE_PDSINST_STM_CCS_CCS_SO_SHIFT (27U) +#define PVR_ROGUE_PDSINST_STM_CCS_CCS_CC_SHIFT (26U) +#define PVR_ROGUE_PDSINST_STM_SO_TST_SHIFT (25U) +#define PVR_ROGUE_PDSINST_STM_SO_SHIFT (23U) +#define PVR_ROGUE_PDSINST_STM_SO_SRC0_SHIFT (18U) +#define PVR_ROGUE_PDSINST_STM_SO_SRC1_SHIFT (13U) +#define PVR_ROGUE_PDSINST_STM_SO_SRC2_SHIFT (5U) +#define PVR_ROGUE_PDSINST_STM_SO_SRC3_SHIFT (0U) + +/* Multiple Accumulate */ +#define PVR_ROGUE_PDSINST_OPCODEA_MAD UINT32_C(0x00000000) + +/* ALU Mode */ + +/* ALU will perform unsigned math.*/ +#define PVR_ROGUE_PDSINST_ALUM_UNSIGNED (0x00000000U) + +/* 64-bit Registers - 64-bit aligned */ +#define PVR_ROGUE_PDSINST_REGS64_MASK (0x0000007FU) +#define PVR_ROGUE_PDSINST_REGS64_CONST64 (0U) +#define PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER (0U) +#define PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER (63U) +#define PVR_ROGUE_PDSINST_REGS64_TEMP64 (1U) +#define PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER (64U) +#define PVR_ROGUE_PDSINST_REGS64_TEMP64_UPPER (79U) +#define PVR_ROGUE_PDSINST_REGS64_PTEMP64 (2U) +#define PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER (96U) +#define PVR_ROGUE_PDSINST_REGS64_PTEMP64_UPPER (111U) + +/* 64-bit Temps 0-15 Destination */ +#define PVR_ROGUE_PDSINST_REGS64T_MASK (0x0000000FU) +#define PVR_ROGUE_PDSINST_REGS64T_TEMP64 (0U) +#define PVR_ROGUE_PDSINST_REGS64T_TEMP64_LOWER (0U) +#define PVR_ROGUE_PDSINST_REGS64T_TEMP64_UPPER (15U) + +/* cc ? dst = (src0 * src1) + (src2 * -1sna) + cin + * + * Multiply 2 source 32 bit numbers to generate a 64 bit result, then add or + * subtract a third source. Conditionally takes in a carry in. Always generates + * a carry out which is held in the status register. + */ +#define PVR_ROGUE_PDSINST_MAD_OPCODE_SHIFT (30U) +#define PVR_ROGUE_PDSINST_MAD_SNA_SHIFT (29U) +#define PVR_ROGUE_PDSINST_MAD_SNA_ADD (0x00000000U) +#define PVR_ROGUE_PDSINST_MAD_SNA_SUB (0x20000000U) +#define PVR_ROGUE_PDSINST_MAD_ALUM_SHIFT (28U) +#define PVR_ROGUE_PDSINST_MAD_ALUM_SIGNED (0x10000000U) +#define PVR_ROGUE_PDSINST_MAD_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_MAD_CC_ENABLE (0x08000000U) +/* 32-bit source to multiply - 32-bit range. */ +#define PVR_ROGUE_PDSINST_MAD_SRC0_SHIFT (19U) +/* 32-bit source to multiply - 32-bit range */ +#define PVR_ROGUE_PDSINST_MAD_SRC1_SHIFT (11U) +/* 64-bit source to add - 64-bit range */ +#define PVR_ROGUE_PDSINST_MAD_SRC2_SHIFT (4U) +#define PVR_ROGUE_PDSINST_MAD_DST_SHIFT (0U) + +/* cc ? dst = src0 + (src1 * -1sna) + cin + * + * Add or subtract 2 64 bit numbers. Conditionally takes in a carry in. Always + * generates a carry out which is held in the status register. + */ +#define PVR_ROGUE_PDSINST_ADD64_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_ADD64_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_ADD64_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_ADD64_ALUM_SHIFT (26U) +#define PVR_ROGUE_PDSINST_ADD64_ALUM_SIGNED (0x04000000U) +#define PVR_ROGUE_PDSINST_ADD64_SNA_SHIFT (24U) +#define PVR_ROGUE_PDSINST_ADD64_SNA_SUB (0x01000000U) + +/* 64-bit source to add. */ +#define PVR_ROGUE_PDSINST_ADD64_SRC0_SHIFT (12U) + +/* 64-bit source to add */ +#define PVR_ROGUE_PDSINST_ADD64_SRC1_SHIFT (5U) + +/* 64-bit temp or persistent temp */ +#define PVR_ROGUE_PDSINST_ADD64_DST_SHIFT (0U) +/* cc ? dst = src0 + (src1 * -1sna) + cin + * + * Add or subtract 2 32 bit numbers. Conditionally takes in a carry in. Always + * generates a carry out which is held in the status register. + */ +#define PVR_ROGUE_PDSINST_ADD32_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_ADD32_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_ADD32_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_ADD32_ALUM_SHIFT (26U) +#define PVR_ROGUE_PDSINST_ADD32_ALUM_SIGNED (0x04000000U) +#define PVR_ROGUE_PDSINST_ADD32_SNA_SHIFT (24U) +#define PVR_ROGUE_PDSINST_ADD32_SNA_SUB (0x01000000U) +/* 32-bit source to add */ +#define PVR_ROGUE_PDSINST_ADD32_SRC0_SHIFT (14U) +#define PVR_ROGUE_PDSINST_ADD32_SRC0_CLRMSK (0xFFC03FFFU) +/* 32-bit source to add */ +#define PVR_ROGUE_PDSINST_ADD32_SRC1_SHIFT (6U) +#define PVR_ROGUE_PDSINST_ADD32_SRC1_CLRMSK (0xFFFFC03FU) +/* 32-bit temp or persistent temp */ +#define PVR_ROGUE_PDSINST_ADD32_DST_SHIFT (0U) +#define PVR_ROGUE_PDSINST_ADD32_DST_CLRMSK (0xFFFFFFC0U) + +/* Comparison Operation */ +#define PVR_ROGUE_PDSINST_COP_MASK (0x00000003U) + +/* = */ +#define PVR_ROGUE_PDSINST_COP_EQ (0x00000000U) + +/* > */ +#define PVR_ROGUE_PDSINST_COP_GT (0x00000001U) + +/* < */ +#define PVR_ROGUE_PDSINST_COP_LT (0x00000002U) + +/* != */ +#define PVR_ROGUE_PDSINST_COP_NE (0x00000003U) + +/* Compare Instruction with 2 sources (IM=0) + * + * im = 0; + * cc ? dst = src0 op src1 + * + * Test source 0 against source 1. The result is written to the destination + * predicate (P0). All arguments are treated as unsigned. + */ +#define PVR_ROGUE_PDSINST_CMP_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_CMP_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_CMP_OPCODE_DEFAULT (0xB0000000U) /* CMP */ +#define PVR_ROGUE_PDSINST_CMP_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_CMP_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_CMP_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_CMP_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_CMP_COP_SHIFT (25U) +#define PVR_ROGUE_PDSINST_CMP_COP_CLRMSK (0xF9FFFFFFU) +#define PVR_ROGUE_PDSINST_CMP_COP_EQ (0x00000000U) +#define PVR_ROGUE_PDSINST_CMP_COP_GT (0x02000000U) +#define PVR_ROGUE_PDSINST_CMP_COP_LT (0x04000000U) +#define PVR_ROGUE_PDSINST_CMP_COP_NE (0x06000000U) +#define PVR_ROGUE_PDSINST_CMP_SETCP_SHIFT (24U) +#define PVR_ROGUE_PDSINST_CMP_SETCP_CLRMSK (0xFEFFFFFFU) +#define PVR_ROGUE_PDSINST_CMP_SETCP_EN (0x01000000U) +#define PVR_ROGUE_PDSINST_CMP_IM_SHIFT (23U) +#define PVR_ROGUE_PDSINST_CMP_IM_CLRMSK (0xFF7FFFFFU) +#define PVR_ROGUE_PDSINST_CMP_IM_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_CMP_IM_ENABLE (0x00800000U) +#define PVR_ROGUE_PDSINST_CMP_SRC0_SHIFT (18U) +#define PVR_ROGUE_PDSINST_CMP_SRC0_CLRMSK (0xFF83FFFFU) +#define PVR_ROGUE_PDSINST_CMP_SRC1_SHIFT (2U) +#define PVR_ROGUE_PDSINST_CMP_SRC1_CLRMSK (0xFFFFFE03U) + +/* 16-bit signed immediate. */ +#define PVR_ROGUE_PDSINST_IMM16_MASK (0x0000FFFFU) + +/* Compare Instruction with Immediate (IM=1) + * + * im = 1; + * cc ? dst = src0 op imm16 + * + * Test source 0 against an immediate. The result is written to the destination + * predicate (P0). All arguments are treated as unsigned. + */ +#define PVR_ROGUE_PDSINST_CMPI_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_CMPI_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_CMPI_OPCODE_DEFAULT (0xB0000000U) /* CMP */ +#define PVR_ROGUE_PDSINST_CMPI_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_CMPI_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_CMPI_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_CMPI_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_CMPI_COP_SHIFT (25U) +#define PVR_ROGUE_PDSINST_CMPI_COP_CLRMSK (0xF9FFFFFFU) +#define PVR_ROGUE_PDSINST_CMPI_COP_EQ (0x00000000U) +#define PVR_ROGUE_PDSINST_CMPI_COP_GT (0x02000000U) +#define PVR_ROGUE_PDSINST_CMPI_COP_LT (0x04000000U) +#define PVR_ROGUE_PDSINST_CMPI_COP_NE (0x06000000U) +#define PVR_ROGUE_PDSINST_CMPI_SETCP_SHIFT (24U) +#define PVR_ROGUE_PDSINST_CMPI_SETCP_CLRMSK (0xFEFFFFFFU) +#define PVR_ROGUE_PDSINST_CMPI_SETCP_EN (0x01000000U) +#define PVR_ROGUE_PDSINST_CMPI_IM_SHIFT (23U) +#define PVR_ROGUE_PDSINST_CMPI_IM_CLRMSK (0xFF7FFFFFU) +#define PVR_ROGUE_PDSINST_CMPI_IM_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_CMPI_IM_ENABLE (0x00800000U) +#define PVR_ROGUE_PDSINST_CMPI_SRC0_SHIFT (18U) +#define PVR_ROGUE_PDSINST_CMPI_SRC0_CLRMSK (0xFF83FFFFU) +#define PVR_ROGUE_PDSINST_CMPI_IM16_SHIFT (2U) +#define PVR_ROGUE_PDSINST_CMPI_IM16_CLRMSK (0xFFFC0003U) + +/* Condition codes */ +#define PVR_ROGUE_PDSINST_PREDICATE_MASK (0x0000000FU) + +/* Use programmable predicate 0 */ +#define PVR_ROGUE_PDSINST_PREDICATE_P0 (0x00000000U) +/* Input Predicate 0 - When DM Pixel Start/End Program End of Tile, When DM + * Pixel State Program indicates load Uniforms, When DM Vertex Last Vertex In + * Task, When DM Compute indicates shared or kernel task (compute thread barrier + * mode) or Last In Task (normal mode), When DM Tessellator TBD. + */ +#define PVR_ROGUE_PDSINST_PREDICATE_IF0 (0x00000001U) +/* Input Predicate 1 - When DM Pixel Start/End Program End Render, When DM Pixel + * State Program indicates load Texture, When DM vertex First In Task, When DM + * Compute indicates synchronization task (compute thread barrier mode) or First + * In Task (normal mode), When DM Tessellator TBD. + */ +#define PVR_ROGUE_PDSINST_PREDICATE_IF1 (0x00000002U) +/* Stream 0 Out has overflowed. Note this is per stream not per buffer. */ +#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_0 (0x00000003U) +/* Stream 1 Out has overflowed. Note this is per stream not per buffer. */ +#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_1 (0x00000004U) +/* Stream 2 Out has overflowed. Note this is per stream not per buffer. */ +#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_2 (0x00000005U) +/* Stream 3 Out has overflowed. Note this is per stream not per buffer. */ +#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_3 (0x00000006U) +/* A Stream Out has overflowed. Note this is per stream not per buffer. */ +#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_GLOBAL (0x00000007U) +/* For SETC Don't set a new predicate, KEEP the existing one. For BRA + * instruction where this is the source predicate, KEEP the instruction, don't + * predicate it out. + */ +#define PVR_ROGUE_PDSINST_PREDICATE_KEEP (0x00000008U) +/* DMA Out of Bounds predicate - set by DDMAT instruction when DMA is out of + * bounds. + */ +#define PVR_ROGUE_PDSINST_PREDICATE_OOB (0x00000009U) + +/* Negate condition. */ + +/* Do not negate condition. */ +#define PVR_ROGUE_PDSINST_NEG_DISABLE (0x00000000U) +/* Negate condition. */ +#define PVR_ROGUE_PDSINST_NEG_ENABLE (0x00000001U) + +/* Branch Address. */ +#define PVR_ROGUE_PDSINST_BRAADDR_MASK (0x0007FFFFU) + +/* Branch and Set Selected Predicate Instruction + * + * im = 1; + * cc xor neg ? pc = dst; + * + * Conditionally branch to an address (ADDR), depending upon the predicate. The + * meaning of the predicate can be negated using NEG. This instruction also + * allows the current predicate referenced by other instructions to be set by + * the SETC field. The current predicate is available by all instructions. This + * is a signed offset from the current PC. BRA ADDR=0 would be an infinite loop + * of the instruction. + */ + +#define PVR_ROGUE_PDSINST_BRA_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_BRA_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_BRA_OPCODE_DEFAULT (0xC0000000U) /* BRA */ +#define PVR_ROGUE_PDSINST_BRA_SRCC_SHIFT (24U) +#define PVR_ROGUE_PDSINST_BRA_SRCC_CLRMSK (0xF0FFFFFFU) +#define PVR_ROGUE_PDSINST_BRA_SRCC_P0 (0x00000000U) +#define PVR_ROGUE_PDSINST_BRA_SRCC_IF0 (0x01000000U) +#define PVR_ROGUE_PDSINST_BRA_SRCC_IF1 (0x02000000U) +#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_0 (0x03000000U) +#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_1 (0x04000000U) +#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_2 (0x05000000U) +#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_3 (0x06000000U) +#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_GLOBAL (0x07000000U) +#define PVR_ROGUE_PDSINST_BRA_SRCC_KEEP (0x08000000U) +#define PVR_ROGUE_PDSINST_BRA_SRCC_OOB (0x09000000U) +#define PVR_ROGUE_PDSINST_BRA_NEG_SHIFT (23U) +#define PVR_ROGUE_PDSINST_BRA_NEG_CLRMSK (0xFF7FFFFFU) +#define PVR_ROGUE_PDSINST_BRA_NEG_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_BRA_NEG_ENABLE (0x00800000U) +#define PVR_ROGUE_PDSINST_BRA_SETC_SHIFT (19U) +#define PVR_ROGUE_PDSINST_BRA_SETC_CLRMSK (0xFF87FFFFU) +#define PVR_ROGUE_PDSINST_BRA_SETC_P0 (0x00000000U) +#define PVR_ROGUE_PDSINST_BRA_SETC_IF0 (0x00080000U) +#define PVR_ROGUE_PDSINST_BRA_SETC_IF1 (0x00100000U) +#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_0 (0x00180000U) +#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_1 (0x00200000U) +#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_2 (0x00280000U) +#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_3 (0x00300000U) +#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_GLOBAL (0x00380000U) +#define PVR_ROGUE_PDSINST_BRA_SETC_KEEP (0x00400000U) +#define PVR_ROGUE_PDSINST_BRA_SETC_OOB (0x00480000U) +#define PVR_ROGUE_PDSINST_BRA_ADDR_SHIFT (0U) +#define PVR_ROGUE_PDSINST_BRA_ADDR_CLRMSK (0xFFF80000U) + +/* SLC_MODE_LD SLC Cache Policy for loads. */ +#define PVR_ROGUE_PDSINST_SLC_MODE_LD_MASK (0x00000003U) +/* Bypass Policy */ +#define PVR_ROGUE_PDSINST_SLC_MODE_LD_BYPASS (0x00000000U) +/* Standard Cached Read */ +#define PVR_ROGUE_PDSINST_SLC_MODE_LD_CACHED (0x00000001U) +/* Cached Read no allocate */ +#define PVR_ROGUE_PDSINST_SLC_MODE_LD_CACHED_RD_NA (0x00000003U) + +/* CMODE_LD MCU (SLC) Cache Mode for Loads. */ +#define PVR_ROGUE_PDSINST_CMODE_LD_MASK (0x00000003U) + +/* Normal cache operation. */ +#define PVR_ROGUE_PDSINST_CMODE_LD_CACHED (0x00000000U) + +/* Bypass L0 and L1. */ +#define PVR_ROGUE_PDSINST_CMODE_LD_BYPASS (0x00000001U) + +/* Force line fill of L0 and L1. */ +#define PVR_ROGUE_PDSINST_CMODE_LD_FORCE_LINE_FILL (0x00000002U) + +/* ld: Number of 64 bit words to load. */ +#define PVR_ROGUE_PDSINST_LD_COUNT8_MASK (0x00000007U) + +/* Source Base Address for memory fetch in DWORDS - MUST BE 128 BIT ALIGNED. */ +#define PVR_ROGUE_PDSINST_LD_SRCADD_MASK (UINT64_C(0x0000003FFFFFFFFF)) + +/* Load Instruction DMA : Src0 */ + +/* SLC cache policy. */ +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_SHIFT (62U) +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CLRMSK \ + (UINT64_C(0x3FFFFFFFFFFFFFFF)) +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS \ + (UINT64_C(0x0000000000000000)) +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \ + (UINT64_C(0x4000000000000000)) +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED_RD_NA \ + (UINT64_C(0xc000000000000000)) + +/* The destination address in the temps (persistent or not) for the read data - + * MUST BE 128 BIT ALIGNED. + */ +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT (47U) +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_CLRMSK (UINT64_C(0xFFF07FFFFFFFFFFF)) + +/* Cache Mode */ +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_SHIFT (44U) +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CLRMSK (UINT64_C(0xFFFFCFFFFFFFFFFF)) +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED (UINT64_C(0x0000000000000000)) +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS (UINT64_C(0x0000100000000000)) +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_FORCE_LINE_FILL \ + (UINT64_C(0x0000200000000000)) + +/* ld: Number of 64 bit words to load. */ +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT (41U) +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_CLRMSK \ + (UINT64_C(0xFFFFF1FFFFFFFFFF)) + +/* Source Base Address for memory fetch - MUST BE 128 BIT ALIGNED. */ +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT (2U) +#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_CLRMSK \ + (UINT64_C(0xFFFFFF0000000003)) + +/* Special Instructions Op-code. */ +#define PVR_ROGUE_PDSINST_OPCODESP_MASK (0x0000000FU) + +/* Data Load from memory. */ +#define PVR_ROGUE_PDSINST_OPCODESP_LD UINT32_C(0x00000000) + +/* Data Store to memory. */ +#define PVR_ROGUE_PDSINST_OPCODESP_ST UINT32_C(0x00000001) + +/* Wait read or write data operations to complete. */ +#define PVR_ROGUE_PDSINST_OPCODESP_WDF UINT32_C(0x00000002) + +/* Load 16 bit immediate. */ +#define PVR_ROGUE_PDSINST_OPCODESP_LIMM UINT32_C(0x00000003) + +/* Lock the execute so only this instance can execute for this data master. */ +#define PVR_ROGUE_PDSINST_OPCODESP_LOCK UINT32_C(0x00000004) + +/* Release the lock taken by lock. */ +#define PVR_ROGUE_PDSINST_OPCODESP_RELEASE UINT32_C(0x00000005) + +/* Halt execution (program termination). */ +#define PVR_ROGUE_PDSINST_OPCODESP_HALT UINT32_C(0x00000006) + +/* Clear stream out predicate. */ +#define PVR_ROGUE_PDSINST_OPCODESP_STMC UINT32_C(0x00000007) + +/* Parallel Stream Out. */ +#define PVR_ROGUE_PDSINST_OPCODESP_STMP UINT32_C(0x00000008) + +/* Integer Divide. */ +#define PVR_ROGUE_PDSINST_OPCODESP_IDIV UINT32_C(0x00000009) + +/* Atomic Access. */ +#define PVR_ROGUE_PDSINST_OPCODESP_AA UINT32_C(0x0000000a) + +/* Issue Data Fence. */ +#define PVR_ROGUE_PDSINST_OPCODESP_IDF UINT32_C(0x0000000b) + +/* Issue Data Fence. */ +#define PVR_ROGUE_PDSINST_OPCODESP_POL (0x0000000cU) + +/*No Operation. */ +#define PVR_ROGUE_PDSINST_OPCODESP_NOP (0x0000000fU) + +/* Data Load Instruction (Opcode SP) + * + * for (i=0; i < count;i++) { + * cc ? *(src0 + i) = mem(src1 + i) + * } + * + * Load count 32 bit words from memory to the temporaries reading from the + * address in memory pointed to by SRCADD. If the final destination address + * (DEST + COUNT - 1) exceeds the amount of temps available the entire load is + * discarded. + */ +#define PVR_ROGUE_PDSINST_LD_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_LD_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_LD_OPCODE_DEFAULT (0xD0000000U) /* SP */ +#define PVR_ROGUE_PDSINST_LD_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_LD_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_LD_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_LD_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_LD_OP_SHIFT (23U) +#define PVR_ROGUE_PDSINST_LD_OP_CLRMSK (0xF87FFFFFU) +#define PVR_ROGUE_PDSINST_LD_OP_DEFAULT (0x00000000U) /* ld */ +#define PVR_ROGUE_PDSINST_LD_SRC0_SHIFT (0U) +#define PVR_ROGUE_PDSINST_LD_SRC0_CLRMSK (0xFFFFFF80U) + +/* CMODE_ST MCU (SLC) Cache Mode for stores. */ +#define PVR_ROGUE_PDSINST_CMODE_ST_MASK (0x00000003U) + +/* Write-through Policy */ +#define PVR_ROGUE_PDSINST_CMODE_ST_WRITE_THROUGH (0x00000000U) + +/* Write-back Policy. */ +#define PVR_ROGUE_PDSINST_CMODE_ST_WRITE_BACK (0x00000001U) + +/* Lazy write-back policy. */ +#define PVR_ROGUE_PDSINST_CMODE_ST_LAZY_WRITE_BACK (0x00000002U) + +/* ST: Number of 32 bit Words to store. */ +#define PVR_ROGUE_PDSINST_ST_COUNT4_MASK (0x0000000FU) + +/* Source Base Address for memory fetch in DWORDS. */ +#define PVR_ROGUE_PDSINST_ST_SRCADD_MASK (UINT64_C(0x0000003FFFFFFFFF)) + +/* Store Instruction DMA : Src0 */ + +/* SLC cache policy. */ +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_SHIFT (62U) +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_CLRMSK \ + (UINT64_C(0x3FFFFFFFFFFFFFFF)) +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH \ + (UINT64_C(0x0000000000000000)) +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK \ + (UINT64_C(0x4000000000000000)) + +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT (46U) +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_CLRMSK (UINT64_C(0xFFF03FFFFFFFFFFF)) + +/* Cache Mode. */ +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_SHIFT (44U) +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_CLRMSK (UINT64_C(0xFFFFCFFFFFFFFFFF)) +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH \ + (UINT64_C(0x0000000000000000)) +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK \ + (UINT64_C(0x0000100000000000)) +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_LAZY_WRITE_BACK \ + (UINT64_C(0x0000200000000000)) + +/* ST: Number of 32 bit Words to store. */ +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT (40U) +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_CLRMSK \ + (UINT64_C(0xFFFFF0FFFFFFFFFF)) + +/* Destination Base Address for memory write. */ +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT (2U) +#define PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_CLRMSK \ + (UINT64_C(0xFFFFFF0000000003)) + +/* Data Store Instruction (Opcode SP) + * + * for (i=0; i < count;i++) { + * cc ? mem(src1 + i) = *(src0 + i) + * } + * + * Store count 64 bit words from temporaries to memory (memory address starts at + * src1). If the instruction attempts to read data (in temps) outside of it's + * allocated region the entire store is discarded. + */ +#define PVR_ROGUE_PDSINST_ST_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_ST_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_ST_OPCODE_DEFAULT (0xD0000000U) /* SP */ +#define PVR_ROGUE_PDSINST_ST_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_ST_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_ST_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_ST_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_ST_OP_SHIFT (23U) +#define PVR_ROGUE_PDSINST_ST_OP_CLRMSK (0xF87FFFFFU) +#define PVR_ROGUE_PDSINST_ST_OP_DEFAULT (0x00800000U) /* ST */ +#define PVR_ROGUE_PDSINST_ST_SRC0_SHIFT (0U) +#define PVR_ROGUE_PDSINST_ST_SRC0_CLRMSK (0xFFFFFF80U) + +/* Data Fence Instruction (Opcode SP) + * + * Cc ? wdf + * + * The data fence instruction gives the ability to track the return of dependent + * read data and to determine when data written from the core has made it to the + * MCU. This is required on reads as there is no implicit synchronization + * between read accesses to the primary attribute bank and data returned by + * dependent reads. For writes it is required where the program is enforcing + * synchronization with another program (which could be on the PDS or any other + * processor in the system). Note, this only guarantees order within the + * PDS. For order elsewhere reads need to be issued, and flush commands may have + * to be issued to the MCU + * + * The fence mechanism takes the form of a counter that is incremented whenever + * a read (ld) or write (ST) instruction is encountered by the instruction fetch + * decoder. When the read or write instruction returns, or writes all its data + * the counter is decremented. There is 1 counter per thread. Prior to accessing + * return data a WDF instruction must be issued, when this is seen by the + * instruction decoder it will check the current count value and will suspend + * execution if it is currently non zero, execution being resumed as soon as the + * counter reaches zero, and a slot is available. + * + * Example + * Do a dependent read for data + * + * ldr0,#2,r3 Issue read + * ... Try and do some other stuff + * wdf Make sure read data has come back + * add32 r2,r1,r0 And use the returned result + * + */ +#define PVR_ROGUE_PDSINST_WDF_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_WDF_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_WDF_OPCODE_DEFAULT (0xD0000000U) /* SP */ +#define PVR_ROGUE_PDSINST_WDF_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_WDF_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_WDF_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_WDF_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_WDF_OP_SHIFT (23U) +#define PVR_ROGUE_PDSINST_WDF_OP_CLRMSK (0xF87FFFFFU) +#define PVR_ROGUE_PDSINST_WDF_OP_DEFAULT (0x01000000U) /* WDF */ + +/* PDS Global Register access control */ + +/* Disable global register access */ +#define PVR_ROGUE_PDSINST_GR_DISABLE (0x00000000U) + +/* Enable global register access, global register specified by IMM16.*/ +#define PVR_ROGUE_PDSINST_GR_ENABLE (0x00000001U) + +/* Load Immediate (Opcode SP) + * + * cc ? GR = DISABLE : *src1 = src0 + * cc ? GR = ENABLE : *src1 = greg[IMM16] + * + * Load an immediate value (src0) into the temporary registers. If the GR flag + * is set, the PDS global register specified by IMM16 will be loaded instead. + * greg[0] = cluster number greg[1] = instance number + * + */ +#define PVR_ROGUE_PDSINST_LIMM_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_LIMM_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_LIMM_OPCODE_DEFAULT (0xD0000000U) /* SP */ +#define PVR_ROGUE_PDSINST_LIMM_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_LIMM_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_LIMM_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_LIMM_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_LIMM_OP_SHIFT (23U) +#define PVR_ROGUE_PDSINST_LIMM_OP_CLRMSK (0xF87FFFFFU) +#define PVR_ROGUE_PDSINST_LIMM_OP_DEFAULT (0x01800000U) /* LIMM */ +#define PVR_ROGUE_PDSINST_LIMM_SRC1_SHIFT (18U) +#define PVR_ROGUE_PDSINST_LIMM_SRC1_CLRMSK (0xFF83FFFFU) +#define PVR_ROGUE_PDSINST_LIMM_SRC0_SHIFT (2U) +#define PVR_ROGUE_PDSINST_LIMM_SRC0_CLRMSK (0xFFFC0003U) +#define PVR_ROGUE_PDSINST_LIMM_GR_SHIFT (1U) +#define PVR_ROGUE_PDSINST_LIMM_GR_CLRMSK (0xFFFFFFFDU) +#define PVR_ROGUE_PDSINST_LIMM_GR_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_LIMM_GR_ENABLE (0x00000002U) + +/* Lock Instruction (Opcode SP) + * + * cc ? lock + * + * The hardware contains an internal mutex per data master. When the lock + * instruction is issued, the thread will attempt to take control of the mutex + * (for the current data master). If it is already taken by another thread, then + * the thread is descheduled until it is available. + * + * The purpose of the lock (and release) instructions is to allow critical + * sections of code to execute serially to other code for the same data + * master. This is particularly useful when accessing the persistent (cross + * thread) temporaries. Note that there is no communication possible across data + * masters. + * + * It is illegal to place a DOUT instruction inside a LOCK, RELEASE section of + * code. + */ +#define PVR_ROGUE_PDSINST_LOCK_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_LOCK_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_LOCK_OPCODE_DEFAULT (0xD0000000U) /* SP */ +#define PVR_ROGUE_PDSINST_LOCK_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_LOCK_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_LOCK_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_LOCK_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_LOCK_OP_SHIFT (23U) +#define PVR_ROGUE_PDSINST_LOCK_OP_CLRMSK (0xF87FFFFFU) +#define PVR_ROGUE_PDSINST_LOCK_OP_DEFAULT (0x02000000U) /* LOCK */ + +/* Release Lock (Opcode SP) + * + * cc ? release + * + * The hardware contains an internal mutex per data master. If a thread has + * issued a lock instruction, then a release instruction must be issued to + * release the lock. See the corresponding lock instruction for more details + * + * It is illegal to place a DOUT instruction inside a LOCK, RELEASE section of + * code. + */ +#define PVR_ROGUE_PDSINST_RELEASE_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_RELEASE_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_RELEASE_OPCODE_DEFAULT (0xD0000000U) /* SP */ +#define PVR_ROGUE_PDSINST_RELEASE_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_RELEASE_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_RELEASE_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_RELEASE_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_RELEASE_OP_SHIFT (23U) +#define PVR_ROGUE_PDSINST_RELEASE_OP_CLRMSK (0xF87FFFFFU) +#define PVR_ROGUE_PDSINST_RELEASE_OP_DEFAULT (0x02800000U) /* RELEASE */ + +/* Special instruction - Halt + * Halt Execution (Opcode SP) + * + * cc ? halt + * + * The last instruction in a program must always be a halt instruction, or a + * DOUT/DDMAD instruction with the END flag set. This is required in order to + * indicate the end of the program. + */ +#define PVR_ROGUE_PDSINST_HALT_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_HALT_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_HALT_OPCODE_DEFAULT (0xD0000000U) /* SP */ +#define PVR_ROGUE_PDSINST_HALT_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_HALT_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_HALT_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_HALT_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_HALT_OP_SHIFT (23U) +#define PVR_ROGUE_PDSINST_HALT_OP_CLRMSK (0xF87FFFFFU) +#define PVR_ROGUE_PDSINST_HALT_OP_DEFAULT (0x03000000U) /* HALT */ + +/* Special instruction - Nop + * No Operation (Opcode SP) + * + * cc ? NOP + * + * This instruction does no operation, and introduces a wait cycle into the + * pipeline. + * + */ +#define PVR_ROGUE_PDSINST_NOP_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_NOP_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_NOP_OPCODE_DEFAULT (0xD0000000U) /* SP */ +#define PVR_ROGUE_PDSINST_NOP_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_NOP_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_NOP_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_NOP_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_NOP_OP_SHIFT (23U) +#define PVR_ROGUE_PDSINST_NOP_OP_CLRMSK (0xF87FFFFFU) +#define PVR_ROGUE_PDSINST_NOP_OP_DEFAULT (0x07800000U) /* NOP */ + +/* The SO bits to clear 0-3 streams 0-3, bit 4-global */ +#define PVR_ROGUE_PDSINST_SOMASK_MASK (0x0000001FU) + +/* Special instruction - Stream out predicate clear + * (Opcode SP) + * + * cc ? NOP + * + * This instruction clears the stream out predicates to 0, according to the + * clear bits. + * + */ +#define PVR_ROGUE_PDSINST_STMC_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_STMC_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_STMC_OPCODE_DEFAULT (0xD0000000U) /* SP */ +#define PVR_ROGUE_PDSINST_STMC_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_STMC_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_STMC_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_STMC_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_STMC_OP_SHIFT (23U) +#define PVR_ROGUE_PDSINST_STMC_OP_CLRMSK (0xF87FFFFFU) +#define PVR_ROGUE_PDSINST_STMC_OP_DEFAULT (0x03800000U) /* STMC */ +#define PVR_ROGUE_PDSINST_STMC_SOMASK_SHIFT (0U) +#define PVR_ROGUE_PDSINST_STMC_SOMASK_CLRMSK (0xFFFFFFE0U) + +/* A 1 TB address, with byte granularity. Address must be dword aligned when + * repeat is 0. + */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_ADDRESS_MASK \ + (UINT64_C(0x000000FFFFFFFFFF)) + +/* SLC cache policy */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_SHIFT (62U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_CLRMSK \ + (UINT64_C(0x3FFFFFFFFFFFFFFF)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_BYPASS \ + (UINT64_C(0x0000000000000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_CACHED \ + (UINT64_C(0x4000000000000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_CACHED_RD_NA \ + (UINT64_C(0xc000000000000000)) + +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_ADDRESS_SHIFT (0U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_ADDRESS_CLRMSK \ + (UINT64_C(0xFFFFFF0000000000)) + +/* Size of external memory buffer in bytes (0 is 0 bytes) */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_MSIZE_MASK (0x7FFFFFFFU) + +/* When repeat is enabled the size of the DMA in bytes */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_MASK (0x00000003U) +/* DMA of 1 byte */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_ONE (0x00000000U) +/* DMA of 2 byte */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_TWO (0x00000001U) +/* DMA of 3 byte */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_THREE (0x00000002U) +/* DMA of 4 byte */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_FOUR (0x00000003U) + +/* DMA to unified store */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_DEST_UNIFIED_STORE (0x00000000U) + +/* DMA to common store */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_DEST_COMMON_STORE (0x00000001U) + +/* Primary instance data offset in 32 bit words (offset into the current + * instance). + */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_AO_MASK (0x00001FFFU) + +/* Only applies to unified store DMAs, must be clear for common store. + * + * DMA is issued natively, in its entirety. + */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_REPEAT_NOREPEAT (0x00000000U) +/* BSIZE is the number of times the DMA is repeated. Word size is the size of + * the DMA. The DMA is expanded into BSIZE DMAs. + */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_REPEAT_REPEAT (0x00000001U) + +/* Size of fetch in dwords (0 is 0 dwords). */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_MASK (0x00000FFFU) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_RANGE (0U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_LOWER (0U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER (255U) + +/* Size of external buffer in bytes. */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT (33U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK \ + (UINT64_C(0x00000001FFFFFFFF)) + +/* Perform OOB checking. */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_SHIFT (32U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_CLRMSK \ + (UINT64_C(0xFFFFFFFEFFFFFFFF)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN \ + (UINT64_C(0x0000000100000000)) + +/* Last DMA in program. */ +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_SHIFT (31U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_CLRMSK \ + (UINT64_C(0xFFFFFFFF7FFFFFFF)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN \ + (UINT64_C(0x0000000080000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_SHIFT (29U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_CLRMSK \ + (UINT64_C(0xFFFFFFFF9FFFFFFF)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE \ + (UINT64_C(0x0000000000000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO \ + (UINT64_C(0x0000000020000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE \ + (UINT64_C(0x0000000040000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR \ + (UINT64_C(0x0000000060000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_SHIFT (28U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_CLRMSK \ + (UINT64_C(0xFFFFFFFFEFFFFFFF)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE \ + (UINT64_C(0x0000000000000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_COMMON_STORE \ + (UINT64_C(0x0000000010000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_SHIFT (26U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CLRMSK \ + (UINT64_C(0xFFFFFFFFF3FFFFFF)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED \ + (UINT64_C(0x0000000000000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_BYPASS \ + (UINT64_C(0x0000000004000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_FORCE_LINE_FILL \ + (UINT64_C(0x0000000008000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT (13U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_CLRMSK \ + (UINT64_C(0xFFFFFFFFFC001FFF)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_SHIFT (12U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_CLRMSK \ + (UINT64_C(0xFFFFFFFFFFFFEFFF)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_NOREPEAT \ + (UINT64_C(0x0000000000000000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT \ + (UINT64_C(0x0000000000001000)) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT (0U) +#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_CLRMSK \ + (UINT64_C(0xFFFFFFFFFFFFF000)) + +/* Stop execution flag + * + * Continue execution after this instruction. + */ +#define PVR_ROGUE_PDSINST_END_DISABLE (0x00000000U) + +/* Halt execution after this instruction. */ +#define PVR_ROGUE_PDSINST_END_ENABLE (0x00000001U) + +/* 64-bit Consts 0-63 Destination. */ +#define PVR_ROGUE_PDSINST_REGS64C_MASK (0x0000003FU) +#define PVR_ROGUE_PDSINST_REGS64C_CONST64 (0U) +#define PVR_ROGUE_PDSINST_REGS64C_CONST64_LOWER (0U) +#define PVR_ROGUE_PDSINST_REGS64C_CONST64_UPPER (63U) + +/* Multiply-add then send to DOUTD (Opcode SP). Optionally perform out-of-bounds + * checking (DDMAD(T)). Multiply-add then send to DOUTD (Opcode SP). + * + * cc ? if ( test == 1 ) then + * cc ? if ( ((src0 * src1) + src2)[39:0] + (src3[11:0]<<2) <= src2[39:0] + + * src3[63:33] ) then cc ? OOB = 0 cc ? doutd = (src0 * src1) + src2, src3 cc + * ? else cc ? OOB = 1 cc ? endif cc ? else cc ? doutd = (src0 * src1) + + * src2 src3 cc ? endif + * + * cc ? doutd = (src0 * src1) + src2, src3 + * + * This instruction performs a 32 bit multiply, followed by a 64 bit add. This + * result is combined with a 4th source and used to create the data for an DOUTD + * emit. A DOUTD is a command to a DMA engine, which reads data from memory and + * writes it into the USC Unified or Common Store. + * + * Additionally the DDMAD performs an out-of-bounds check on the DMA when the + * test flag is set . If a buffer overflow is predicated, the DMA is skipped and + * the OOB (DMA out of bounds) predicate is set. + */ +#define PVR_ROGUE_PDSINST_DDMAD_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_DDMAD_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_DDMAD_OPCODE_DEFAULT (0xE0000000U) /* DDMAD */ +#define PVR_ROGUE_PDSINST_DDMAD_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_DDMAD_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_DDMAD_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_DDMAD_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_DDMAD_END_SHIFT (26U) +#define PVR_ROGUE_PDSINST_DDMAD_END_CLRMSK (0xFBFFFFFFU) +#define PVR_ROGUE_PDSINST_DDMAD_END_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_DDMAD_END_ENABLE (0x04000000U) + +/* 32-bit source to multiply - 32-bit range. */ +#define PVR_ROGUE_PDSINST_DDMAD_SRC0_SHIFT (18U) +#define PVR_ROGUE_PDSINST_DDMAD_SRC0_CLRMSK (0xFC03FFFFU) + +/* 32-bit source to multiply - 32-bit range. */ +#define PVR_ROGUE_PDSINST_DDMAD_SRC1_SHIFT (13U) +#define PVR_ROGUE_PDSINST_DDMAD_SRC1_CLRMSK (0xFFFC1FFFU) + +/* 64-bit source to add - 64-bit range */ +#define PVR_ROGUE_PDSINST_DDMAD_SRC2_SHIFT (6U) +#define PVR_ROGUE_PDSINST_DDMAD_SRC2_CLRMSK (0xFFFFE03FU) + +/* 64-bit constant register destination */ +#define PVR_ROGUE_PDSINST_DDMAD_SRC3_SHIFT (0U) +#define PVR_ROGUE_PDSINST_DDMAD_SRC3_CLRMSK (0xFFFFFFC0U) + +/* When DOUTU_SAMPLE_RATE is INSTANCE or SELECTIVE - 32 bit temps per instance + * at 4 word granularity. When DOUTU_SAMPLE_RATE is FULL - 32 bit temps per + * sample at 4 word granularity. + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK (0x0000003FU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_ALIGNSHIFT (2U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_ALIGNSIZE (4U) + +/* Sample rate */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SAMPLE_RATE_MASK (0x00000003U) + +/* Instance rate */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SAMPLE_RATE_INSTANCE (0x00000000U) + +/* Selective sample rate */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SAMPLE_RATE_SELECTIVE (0x00000001U) + +/* Full sample rate */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SAMPLE_RATE_FULL (0x00000002U) + +/* Code base address (4 byte alignment). */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_EXE_OFF_MASK (0x3FFFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_EXE_OFF_ALIGNSHIFT (2U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_EXE_OFF_ALIGNSIZE (4U) + +/* Use Interface doutu : Src0 */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_SHIFT (41U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_CLRMSK \ + (UINT64_C(0xFFFFFDFFFFFFFFFF)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN \ + (UINT64_C(0x0000020000000000)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT (35U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_CLRMSK \ + (UINT64_C(0xFFFFFE07FFFFFFFF)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSHIFT (2U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE (4U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT (33U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_CLRMSK \ + (UINT64_C(0xFFFFFFF9FFFFFFFF)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_INSTANCE \ + (UINT64_C(0x0000000000000000)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SELECTIVE \ + (UINT64_C(0x0000000200000000)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_FULL \ + (UINT64_C(0x0000000400000000)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_SHIFT (2U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_CLRMSK \ + (UINT64_C(0xFFFFFFFF00000003)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_ALIGNSHIFT (2U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_ALIGNSIZE (4U) + +/* Use Interface doutu : Src1. */ + +/* Secondary instance data offset in 32 bit words (offset of the instance). */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_DOFFSET_MASK (0x00001FFFU) + +/* Source Base Address for memory fetch. Address must be dword aligned when + * repeat is 0. + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SBASE_MASK \ + (UINT64_C(0x000000FFFFFFFFFF)) + +/* DMA Interface DOutD : Src0 */ + +/* SLC cache policy */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_SHIFT (62U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CLRMSK \ + (UINT64_C(0x3FFFFFFFFFFFFFFF)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_BYPASS \ + (UINT64_C(0x0000000000000000)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED \ + (UINT64_C(0x4000000000000000)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED_RD_NA \ + (UINT64_C(0xc000000000000000)) + +/* Secondary instance data offset in 32 bit words (offset of the instance). */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_DOFFSET_SHIFT (40U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_DOFFSET_CLRMSK \ + (UINT64_C(0xFFE000FFFFFFFFFF)) + +/* Source Base Address for memory fetch. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SBASE_SHIFT (0U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SBASE_CLRMSK \ + (UINT64_C(0xFFFFFF0000000000)) + +/* When repeat is enabled the size of the DMA in bytes. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_MASK (0x00000003U) + +/* DMA of 1 byte */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_ONE (0x00000000U) + +/* DMA of 2 byte */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_TWO (0x00000001U) + +/* DMA of 3 byte */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_THREE (0x00000002U) + +/* DMA of 4 byte */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_FOUR (0x00000003U) + +/* Unified Store */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_DEST_UNIFIED_STORE (0x00000000U) + +/* Common Store */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_DEST_COMMON_STORE (0x00000001U) + +/* Primary instance data offset in 32 bit words (offset into the current + * instance). + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_AO_MASK (0x00001FFFU) + +/* Only applies to unified store DMAs, ignore for common store. */ + +/* DMA is issued natively, in its entirety. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_REPEAT_NOREPEAT (0x00000000U) + +/* BSIZE is the number of times the DMA is repeated. Word size is the size of + * the DMA. The DMA is expanded into BSIZE DMAs. + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_REPEAT_REPEAT (0x00000001U) + +/* Size of fetch in dwords (0 means don't DMA, 1=1 etc.) */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_BSIZE_MASK (0x00000FFFU) + +/* DMA Interface DOutD : Src1 */ + +/* Last Write or DMA in program (This needs to only be set once on with the last + * DMA or last direct write, which ever is last). + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_SHIFT (31U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_CLRMSK (0x7FFFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN (0x80000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_SHIFT (29U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_CLRMSK (0x9FFFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_ONE (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_TWO (0x20000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_THREE (0x40000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_FOUR (0x60000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_SHIFT (28U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_CLRMSK (0xEFFFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_UNIFIED_STORE \ + (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE (0x10000000U) + +/* CMODE Cache Mode */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_SHIFT (26U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CLRMSK (0xF3FFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_BYPASS (0x04000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_FORCE_LINE_FILL \ + (0x08000000U) + +/* Primary instance data offset in 32 bit words (offset into the current + * instance). + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT (13U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_CLRMSK (0xFC001FFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_REPEAT_SHIFT (12U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_REPEAT_CLRMSK (0xFFFFEFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_REPEAT_NOREPEAT (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_REPEAT_REPEAT (0x00001000U) + +/* Size of fetch in dwords (0 means don't DMA, 1=1 etc.) */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT (0U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_CLRMSK (0xFFFFF000U) + +/* Lower 64-bit (63:0) data to be written. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SBASE0_MASK \ + (UINT64_C(0xFFFFFFFFFFFFFFFF)) + +/* Direct Write Interface doutw : Src0. */ + +/* Lower 64-bit (63:0) data to be written */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC0_DATA_SHIFT (0U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC0_DATA_CLRMSK \ + (UINT64_C(0x0000000000000000)) + +/* Unified Store */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_DEST_UNIFIED_STORE (0x00000000U) + +/* Common Store */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_DEST_COMMON_STORE (0x00000001U) + +/* Primary instance data offset in 128 bit words (offset into the current + * instance). + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_AO_MASK (0x00001FFFU) + +/* DMA Interface DOutD : Src1. */ + +/* Last Write or DMA in program (This needs to only be set once on with the last + * DMA or last direct write, which ever is last). + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_SHIFT (31U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_CLRMSK (0x7FFFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN (0x80000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_SHIFT (28U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_CLRMSK (0xEFFFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE \ + (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE (0x10000000U) + +/* CMODE Cache Mode */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_SHIFT (26U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CLRMSK (0xF3FFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS (0x04000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_FORCE_LINE_FILL \ + (0x08000000U) + +/* Primary instance data offset in 32 bit words (offset into the current + * instance). For 64 bit writes the address needs to be 64 bit aligned. + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT (13U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK (0xFC001FFFU) + +/* 2-bit dword write mask. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_SHIFT (0U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_CLRMSK (0xFFFFFFFCU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER (0x00000001U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64 (0x00000002U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_NONE (0x00000003U) + +/* VDM Writeback Interface Doutv : Src0 */ + +/* Number of Indices to use in Draw Indirect (0 = 0) */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTV_SBASE_MASK (0xFFFFFFFFU) + +/* VDM Writeback Interface Doutv : Src1 */ + +/* Number of Indices to use in Draw Indirect (0 = 0) */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTV_SRC1_SBASE_SHIFT (0U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTV_SRC1_SBASE_CLRMSK (0x00000000U) + +/* Shade Model Control */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_MASK (0x00000003U) + +/* Vertex 0 is the flat shaded color source. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_FLAT_VERTEX0 \ + (0x00000000U) + +/* Vertex 1 is the flat shaded color source. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_FLAT_VERTEX1 \ + (0x00000001U) + +/* Vertex 2 is the flat shaded color source. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_FLAT_VERTEX2 \ + (0x00000002U) + +/* Gouraud shaded. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_GOURAUD (0x00000003U) + +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_MASK (0x00000003U) + +/* 1 Dimension (U) */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_1D (0x00000000U) + +/* 2 Dimension (UV) */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_2D (0x00000001U) + +/* 3 Dimension (UVS) */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_3D (0x00000002U) + +/* 4 Dimension (UVST) */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_4D (0x00000003U) + +/* This issue is perspective correct. */ + +/* No W */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_PERSPECTIVE_DISABLE (0x00000000U) + +/* Use W */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_PERSPECTIVE_ENABLE (0x00000001U) + +/* The offset within the vertex if all data is treated as F32 (even if submitted + * as F16). + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_F32_OFFSET_MASK (0x000000FFU) + +/* The offset within vertex taking into account the F16s and F32s present. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_F16_OFFSET_MASK (0x000000FFU) + +/* TSP Parameter Fetch Interface DOutI, This command is only legal in a + * coefficient loading program. + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_MASK (0x1FFFFFFFU) + +/* Apply depth bias to this layer. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_DEPTHBIAS_SHIFT (27U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_DEPTHBIAS_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_DEPTHBIAS_EN (0x08000000U) + +/* Ignore the F16 and F32 offsets, and the WMODE and send the primitive id + * instead. + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PRIMITIVEID_SHIFT (26U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PRIMITIVEID_CLRMSK (0xFBFFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PRIMITIVEID_EN (0x04000000U) + +/* Shade Model for Layer. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_SHIFT (24U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_CLRMSK (0xFCFFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_FLAT_VERTEX0 \ + (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_FLAT_VERTEX1 \ + (0x01000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_FLAT_VERTEX2 \ + (0x02000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_GOURAUD (0x03000000U) + +/* Point sprite Forced. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_POINTSPRITE_SHIFT (23U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_POINTSPRITE_CLRMSK (0xFF7FFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_POINTSPRITE_EN (0x00800000U) + +/* Wrap S Coordinate. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPS_SHIFT (22U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPS_CLRMSK (0xFFBFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPS_EN (0x00400000U) + +/* Wrap V Coordinate. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPV_SHIFT (21U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPV_CLRMSK (0xFFDFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPV_EN (0x00200000U) + +/* Wrap U Coordinate. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPU_SHIFT (20U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPU_CLRMSK (0xFFEFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPU_EN (0x00100000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_SHIFT (18U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_CLRMSK (0xFFF3FFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_1D (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_2D (0x00040000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_3D (0x00080000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_4D (0x000C0000U) + +/* Issue is for F16 precision values. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_SHIFT (17U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_CLRMSK (0xFFFDFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_EN (0x00020000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PERSPECTIVE_SHIFT (16U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PERSPECTIVE_CLRMSK (0xFFFEFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PERSPECTIVE_DISABLE \ + (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PERSPECTIVE_ENABLE (0x00010000U) +/* The offset within the vertex if all data is treated as F32 (even if submitted + * as F16). + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F32_OFFSET_SHIFT (8U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F32_OFFSET_CLRMSK (0xFFFF00FFU) + +/* The offset within vertex taking into account the F16s and F32s present. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_OFFSET_SHIFT (0U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_OFFSET_CLRMSK (0xFFFFFF00U) + +/* The starting address to write the data into the common store allocation, in + * 128 bit words. Each 32 bit value consumes 128 bit words in the common store. + * The issues are pack, Issue 0, followed by Issue 1. + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_DEST_MASK (0x000000FFU) + +/* TSP Parameter Fetch Interface DOutI : Src0 */ + +/* This is the last issue for the triangle. */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_SHIFT (63U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_CLRMSK \ + (UINT64_C(0x7FFFFFFFFFFFFFFF)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN \ + (UINT64_C(0x8000000000000000)) + +/* The starting address to write the data into the common store allocation, in + * 128 bit words. Each 32 bit value consumes 128 bit words in the common store. + * The issues are pack, Issue 0, followed by Issue 1. + */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT (54U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_CLRMSK \ + (UINT64_C(0xC03FFFFFFFFFFFFF)) + +/* Issue 0 */ +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT (0U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_CLRMSK \ + (UINT64_C(0xFFFFFFFFE0000000)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_DEPTHBIAS_SHIFT (27U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_DEPTHBIAS_CLRMSK \ + (UINT64_C(0xfffffffff7ffffff)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_PRIMITIVEID_SHIFT (26U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_PRIMITIVEID_CLRMSK \ + (UINT64_C(0xfffffffffbffffff)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHADEMODEL_SHIFT (24U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHADEMODEL_CLRMSK \ + (UINT64_C(0xfffffffffcffffff)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_POINTSPRITE_SHIFT (23U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_POINTSPRITE_CLRMSK \ + (UINT64_C(0xffffffffff7fffff)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPS_SHIFT (22U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPS_CLRMSK \ + (UINT64_C(0xffffffffffbfffff)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPV_SHIFT (21U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPV_CLRMSK \ + (UINT64_C(0xffffffffffdfffff)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPU_SHIFT (20U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPU_CLRMSK \ + (UINT64_C(0xffffffffffefffff)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SIZE_SHIFT (18U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SIZE_CLRMSK \ + (UINT64_C(0xfffffffffff3ffff)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F16_SHIFT (17U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F16_CLRMSK \ + (UINT64_C(0xfffffffffffdffff)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_PERSPECTIVE_SHIFT (16U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_PERSPECTIVE_CLRMSK \ + (UINT64_C(0xfffffffffffeffff)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F32_OFFSET_SHIFT (8U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F32_OFFSET_CLRMSK \ + (UINT64_C(0xffffffffffff00ff)) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F16_OFFSET_SHIFT (0U) +#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F16_OFFSET_CLRMSK \ + (UINT64_C(0xffffffffffffff00)) + +/* TSP Parameter Fetch Interface DOutI : Src1 */ + +/* 32-bit Temp or DOUT. */ +#define PVR_ROGUE_PDSINST_DSTDOUT_MASK (0x00000007U) + +/* DMA data from memory to the USC. */ +#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTD (0x00000000U) + +/* Write a value directly to the USC. */ +#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTW (0x00000001U) + +/* Start a USC program. */ +#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTU (0x00000002U) + +/* Issue a fence back to the VDM (with value). */ +#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTV (0x00000003U) + +/* Issue a command to the TSP Parameter Fetch and FPU to calculate and load + * coefficients to USC. + */ +#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTI (0x00000004U) + +/* Issue a fence back to the CDM. Used if compute is enabled. */ +#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTC (0x00000005U) + +/* Issue DOUT to external devices (Opcode SP) + * + * cc ? dst = src0, src1 + * + * PDS programs have to send data somewhere. This is primary function of the + * PDS. All programs must therefore execute some one of DOUT, DDMAD, STM + * commands. There are the following program types + * + * Vertex Shader, Geometry Shader, Hull Shader Programs These programs load data + * into memory. These will use the DOUTD or DDMAD commands. Ideally the DDMAD + * command is used as the most typical operation Src Address = Index * Stride + + * Base, and then DMA from this address. They also schedule the execution of the + * USSE and will issue a DOUTU command. This would normally be the last + * instruction in the program. + * + * Obviously the shader programs must not overflow their allocated memory. + * However, the USC will do cache look-aheads and so could attempt to fetch + * shader code from beyond the end of the program. This could cause a page fault + * if the last program instructions are very close to the end of the last valid + * memory page. + * + * To avoid this happening always ensure that the start address of the last + * instruction of a shader program does not occur in the last 26 bytes of a + * page. + * + * State/Uniform Loading Programs + * These programs load data into memory. These will use the typically use the + * DOUTD command + * + * Coefficient Loading Programs + * These programs run once per triangle. They load the A,B,C Coefficient for the + * iteration of the varyings into the USC. These programs issue DOUTI + * commands. These programs must not do any other sort of DOUT command + * (DOUTW/DOUTD/DOUTU). + * + * Pixel Shader Programs + * These programs once per group of pixels, schedule the execution of a pixel + * shader on the USC for a group of pixels. This program issues a DOUTU (and + * that is all). + */ + +#define PVR_ROGUE_PDSINST_DOUT_OPCODE_SHIFT (28U) +#define PVR_ROGUE_PDSINST_DOUT_OPCODE_CLRMSK (0x0FFFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_OPCODE_DEFAULT (0xF0000000U) /* DOUT */ +#define PVR_ROGUE_PDSINST_DOUT_CC_SHIFT (27U) +#define PVR_ROGUE_PDSINST_DOUT_CC_CLRMSK (0xF7FFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_CC_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_CC_ENABLE (0x08000000U) +#define PVR_ROGUE_PDSINST_DOUT_END_SHIFT (26U) +#define PVR_ROGUE_PDSINST_DOUT_END_CLRMSK (0xFBFFFFFFU) +#define PVR_ROGUE_PDSINST_DOUT_END_DISABLE (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_END_ENABLE (0x04000000U) + +/* 32-bit source */ +#define PVR_ROGUE_PDSINST_DOUT_SRC1_SHIFT (16U) +#define PVR_ROGUE_PDSINST_DOUT_SRC1_CLRMSK (0xFF00FFFFU) + +/* 64-bit source */ +#define PVR_ROGUE_PDSINST_DOUT_SRC0_SHIFT (8U) +#define PVR_ROGUE_PDSINST_DOUT_SRC0_CLRMSK (0xFFFF80FFU) + +/* DOUT Destination */ +#define PVR_ROGUE_PDSINST_DOUT_DST_SHIFT (0U) +#define PVR_ROGUE_PDSINST_DOUT_DST_CLRMSK (0xFFFFFFF8U) +#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTD (0x00000000U) +#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTW (0x00000001U) +#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTU (0x00000002U) +#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTV (0x00000003U) +#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTI (0x00000004U) +#if defined(ROGUE_FEATURE_COMPUTE) +# define PVR_ROGUE_PDSINST_DOUT_DST_DOUTC (0x00000005U) +#endif /* ROGUE_FEATURE_COMPUTE */ + +/* Shift */ + +#endif /* PVR_ROGUE_PDS_DEFS_H */ diff --git a/src/imagination/vulkan/pds/pvr_rogue_pds_disasm.h b/src/imagination/vulkan/pds/pvr_rogue_pds_disasm.h new file mode 100644 index 00000000000..719f3ffa8a7 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_rogue_pds_disasm.h @@ -0,0 +1,287 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_ROGUE_PDS_DISASM_H +#define PVR_ROGUE_PDS_DISASM_H + +#include +#include + +#include "util/log.h" + +/* Type of operand for an instruction. */ +#define PVR_PDS_OPERAND_TYPES \ + X(TEMP32, temp, 32) \ + X(PTEMP32, ptemp, 32) \ + X(CONST32, const, 32) \ + X(TEMP64, temp, 64) \ + X(PTEMP64, ptemp, 64) \ + X(CONST64, const, 64) \ + X(UNRESOLVED, UNRESOLVED, 0) \ + X(LITERAL_NUM, literal, 0) + +#define X(enum, str, size) enum, +enum pvr_operand_type { PVR_PDS_OPERAND_TYPES }; +#undef X + +#if defined(DUMP_PDS) +# define PVR_PDS_PRINT_INST(X) pvr_pds_print_instruction(X) +# define PVR_PDS_PRINT_DATA(X, Y, Z) \ + mesa_logd("\t%s : DATA = 0x%lX ADDRESS = 0x%X\n", X, (uint64_t)(Y), Z) +#else +# define PVR_PDS_PRINT_INST(X) +# define PVR_PDS_PRINT_DATA(X, Y, Z) +#endif + +#define PVR_INSTRUCTION_STMP +#define PVR_INSTRUCTION_IDIV +#define PVR_INSTRUCTION_AA +#define PVR_INSTRUCTION_POL +#define PVR_INSTRUCTION_IDF + +#define PVR_INSTRUCTIONS \ + X(STM) \ + PVR_INSTRUCTION_STMP \ + PVR_INSTRUCTION_IDIV \ + PVR_INSTRUCTION_AA \ + PVR_INSTRUCTION_IDF \ + PVR_INSTRUCTION_POL \ + X(STMC) \ + X(LD) \ + X(ST) \ + X(ADD32) \ + X(ADD64) \ + X(MAD) \ + X(DDMAD) \ + X(DOUT) \ + X(CMP) \ + X(BRA) \ + X(LIMM) \ + X(SFTLP32) \ + X(SFTLP64) \ + X(WDF) \ + X(LOCK) \ + X(RELEASE) \ + X(HALT) \ + X(NOP) + +#define X(a) INS_##a, +enum pvr_instruction_type { PVR_INSTRUCTIONS }; +#undef X + +struct pvr_predicate { + uint32_t predicate; + bool negate; +}; + +struct pvr_instruction; + +/* Operands are either sources or dst of an instruction. */ +struct pvr_operand { + enum pvr_operand_type type; + + struct pvr_instruction *instruction; + uint64_t literal; /* Literal value if type == LITERAL_NUM */ + int address; /* Address in word-sizes. */ + unsigned absolute_address; /* Address in segment, */ + unsigned index; /* Index within instruction, 0 = dst, 1 = src0 .. */ + bool negate; /* True if the literal is negative. */ +}; + +#define PVR_PDS_LOP \ + X(LOP_NONE, none) \ + X(LOP_NOT, ~) \ + X(LOP_AND, &) \ + X(LOP_OR, |) \ + X(LOP_XOR, xor) \ + X(LOP_XNOR, xnor) \ + X(LOP_NAND, nand) \ + X(LOP_NOR, nor) + +#define X(lop, str) lop, +enum pvr_pds_lop { PVR_PDS_LOP }; +#undef X + +#define PVR_PDS_DOUT_DSTS \ + X(DOUT_D, doutd) \ + X(DOUT_W, doutw) \ + X(DOUT_U, doutu) \ + X(DOUT_V, doutv) \ + X(DOUT_I, douti) \ + X(DOUT_C, doutc) \ + X(DOUT_R, doutr) \ + X(DOUT_INVALID0, invalid) + +#define X(dout_dst, str) dout_dst, +enum pvr_dout_type { PVR_PDS_DOUT_DSTS }; +#undef X + +#define PVR_PDS_MAX_INST_STR_LEN 256 + +enum pvr_cop { COP_EQ, COP_GT, COP_LT, COP_NE }; + +struct pvr_instruction { + enum pvr_instruction_type type; + struct pvr_instruction *next; +}; + +struct pvr_add { + struct pvr_instruction instruction; + struct pvr_operand *dst; + struct pvr_operand *src1; + struct pvr_operand *src0; + bool cc; + bool sna; + bool alum; +}; + +struct pvr_simple { + struct pvr_instruction instruction; + bool cc; +}; + +struct pvr_ldst { + struct pvr_instruction instruction; + bool cc; + struct pvr_operand *src0; + bool st; +}; + +struct pvr_mad { + struct pvr_instruction instruction; + struct pvr_operand *dst; + struct pvr_operand *src0; + struct pvr_operand *src1; + struct pvr_operand *src2; + bool cc; + bool sna; + bool alum; +}; + +struct pvr_stm { + struct pvr_instruction instruction; + struct pvr_operand *src0; + struct pvr_operand *src1; + struct pvr_operand *src2; + struct pvr_operand *src3; + unsigned stream_out; + bool tst; + bool cc; + bool ccs_global; + bool ccs_so; +}; + +struct pvr_stmc { + struct pvr_instruction instruction; + struct pvr_operand *src0; + bool cc; +}; + +struct pvr_bra { + struct pvr_instruction instruction; + struct pvr_predicate *srcc; + struct pvr_predicate *setc; /* negate ignored */ + char *target; + signed address; /* signed relative address */ +}; + +struct pvr_dout { + struct pvr_instruction instruction; + struct pvr_operand *src0; + struct pvr_operand *src1; + enum pvr_dout_type dst; + bool cc; + bool END; +}; + +struct pvr_ddmad { + struct pvr_instruction instruction; + struct pvr_operand *src0; + struct pvr_operand *src1; + struct pvr_operand *src2; + struct pvr_operand *src3; + bool cc; + bool END; +}; + +struct pvr_sftlp { + struct pvr_instruction instruction; + enum pvr_pds_lop lop; + struct pvr_operand *dst; + struct pvr_operand *src0; + struct pvr_operand *src1; + struct pvr_operand *src2; + bool cc; + bool IM; +}; + +struct pvr_limm { + struct pvr_instruction instruction; + bool cc; + bool GR; + struct pvr_operand *dst; + struct pvr_operand *src0; +}; + +struct pvr_cmp { + struct pvr_instruction instruction; + enum pvr_cop cop; + bool IM; + bool cc; + struct pvr_operand *src0; + struct pvr_operand *src1; +}; + +#define PVR_PDS_ERR_PARAM_RANGE 0 /* Error when register is out of range. */ +#define PVR_PDS_ERR_SP_UNKNOWN \ + 1 /* Error when opcode for sp instruction is unknown. */ + +struct pvr_dissassembler_error { + uint32_t type; /* One of PDS_ERR_* */ + enum pvr_instruction_type instruction; /* The type of instruction where + the error occurred. */ + char *text; /* A string representation of the error. */ + uint32_t parameter; /* The parameter of the instruction, 0 = dst, + 1 = src0.. */ + uint32_t raw; /* The raw value that caused the error. */ + + void *context; /* The passed in context. */ +}; + +/* Callback when an error happens. */ +typedef void (*PVR_ERR_CALLBACK)(struct pvr_dissassembler_error); + +void pvr_pds_free_instruction(struct pvr_instruction *inst); +struct pvr_instruction * +pvr_pds_disassemble_instruction2(void *context, + PVR_ERR_CALLBACK error_call_back, + uint32_t instruction); +void pvr_pds_disassemble_instruction(char *buffer, + size_t instr_len, + struct pvr_instruction *instruction); + +#if defined(DUMP_PDS) +void pvr_pds_print_instruction(uint32_t instr); +#endif + +#endif /* PVR_ROGUE_PDS_DISASM_H */ diff --git a/src/imagination/vulkan/pds/pvr_rogue_pds_encode.h b/src/imagination/vulkan/pds/pvr_rogue_pds_encode.h new file mode 100644 index 00000000000..a7ba6172c71 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_rogue_pds_encode.h @@ -0,0 +1,538 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_ROGUE_PDS_ENCODE_H +#define PVR_ROGUE_PDS_ENCODE_H + +#include + +#include "pvr_rogue_pds_defs.h" +#include "pvr_rogue_pds_disasm.h" +#include "util/macros.h" + +static ALWAYS_INLINE uint32_t +pvr_pds_inst_decode_field_range_regs64tp(uint32_t value) +{ + if (value <= PVR_ROGUE_PDSINST_REGS64TP_TEMP64_UPPER) + return PVR_ROGUE_PDSINST_REGS64TP_TEMP64; + + if ((value >= PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER) && + (value <= PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_UPPER)) { + return PVR_ROGUE_PDSINST_REGS64TP_PTEMP64; + } + return 2; +} + +static ALWAYS_INLINE uint32_t +pvr_pds_inst_decode_field_range_regs32(uint32_t value) +{ + if (value <= PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER) + return PVR_ROGUE_PDSINST_REGS32_CONST32; + + if ((value >= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER) && + (value <= PVR_ROGUE_PDSINST_REGS32_TEMP32_UPPER)) { + return PVR_ROGUE_PDSINST_REGS32_TEMP32; + } + if ((value >= PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER) && + (value <= PVR_ROGUE_PDSINST_REGS32_PTEMP32_UPPER)) { + return PVR_ROGUE_PDSINST_REGS32_PTEMP32; + } + return 3; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_stflp64(uint32_t cc, + uint32_t lop, + uint32_t im, + uint32_t src0, + uint32_t src1, + uint32_t src2, + uint32_t dst) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_SFTLP64 + << PVR_ROGUE_PDSINST_SFTLP64_OPCODE_SHIFT; + encoded |= ((dst & PVR_ROGUE_PDSINST_REGS64TP_MASK) + << PVR_ROGUE_PDSINST_SFTLP64_DST_SHIFT); + encoded |= ((src2 & PVR_ROGUE_PDSINST_REGS32_MASK) + << PVR_ROGUE_PDSINST_SFTLP64_SRC2_SHIFT); + encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS64TP_MASK) + << PVR_ROGUE_PDSINST_SFTLP64_SRC1_SHIFT); + encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64TP_MASK) + << PVR_ROGUE_PDSINST_SFTLP64_SRC0_SHIFT); + encoded |= ((im & 1U) << PVR_ROGUE_PDSINST_SFTLP64_IM_SHIFT); + encoded |= ((lop & PVR_ROGUE_PDSINST_LOP_MASK) + << PVR_ROGUE_PDSINST_SFTLP64_LOP_SHIFT); + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_SFTLP64_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t +pvr_pds_inst_decode_field_range_regs32t(uint32_t value) +{ + if (value <= PVR_ROGUE_PDSINST_REGS32T_TEMP32_UPPER) + return PVR_ROGUE_PDSINST_REGS32T_TEMP32; + + return 1; +} + +static ALWAYS_INLINE uint32_t +pvr_pds_inst_decode_field_range_regs32tp(uint32_t value) +{ + if (value <= PVR_ROGUE_PDSINST_REGS32TP_TEMP32_UPPER) + return PVR_ROGUE_PDSINST_REGS32TP_TEMP32; + + if ((value >= PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER) && + (value <= PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_UPPER)) { + return PVR_ROGUE_PDSINST_REGS32TP_PTEMP32; + } + return 2; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_stflp32(uint32_t im, + uint32_t cc, + uint32_t lop, + uint32_t src0, + uint32_t src1, + uint32_t src2, + uint32_t dst) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEB_SFTLP32 + << PVR_ROGUE_PDSINST_SFTLP32_OPCODE_SHIFT; + encoded |= ((dst & PVR_ROGUE_PDSINST_REGS32T_MASK) + << PVR_ROGUE_PDSINST_SFTLP32_DST_SHIFT); + encoded |= ((src2 & PVR_ROGUE_PDSINST_REGS32TP_MASK) + << PVR_ROGUE_PDSINST_SFTLP32_SRC2_SHIFT); + encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32_MASK) + << PVR_ROGUE_PDSINST_SFTLP32_SRC1_SHIFT); + encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS32T_MASK) + << PVR_ROGUE_PDSINST_SFTLP32_SRC0_SHIFT); + encoded |= ((lop & PVR_ROGUE_PDSINST_LOP_MASK) + << PVR_ROGUE_PDSINST_SFTLP32_LOP_SHIFT); + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_SFTLP32_CC_SHIFT); + encoded |= ((im & 1U) << PVR_ROGUE_PDSINST_SFTLP32_IM_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_stm(uint32_t CCS_CCS_GLOBAL, + uint32_t CCS_CCS_SO, + uint32_t CCS_CCS_CC, + uint32_t SO_TST, + uint32_t SO, + uint32_t SO_SRC0, + uint32_t SO_SRC1, + uint32_t SO_SRC2, + uint32_t SO_SRC3) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEB_STM + << PVR_ROGUE_PDSINST_STM_OPCODE_SHIFT; + encoded |= ((SO_SRC3 & PVR_ROGUE_PDSINST_REGS64TP_MASK) + << PVR_ROGUE_PDSINST_STM_SO_SRC3_SHIFT); + encoded |= ((SO_SRC2 & PVR_ROGUE_PDSINST_REGS32_MASK) + << PVR_ROGUE_PDSINST_STM_SO_SRC2_SHIFT); + encoded |= ((SO_SRC1 & PVR_ROGUE_PDSINST_REGS64TP_MASK) + << PVR_ROGUE_PDSINST_STM_SO_SRC1_SHIFT); + encoded |= ((SO_SRC0 & PVR_ROGUE_PDSINST_REGS64TP_MASK) + << PVR_ROGUE_PDSINST_STM_SO_SRC0_SHIFT); + encoded |= + ((SO & PVR_ROGUE_PDSINST_SO_MASK) << PVR_ROGUE_PDSINST_STM_SO_SHIFT); + encoded |= ((SO_TST & 1U) << PVR_ROGUE_PDSINST_STM_SO_TST_SHIFT); + encoded |= ((CCS_CCS_CC & 1U) << PVR_ROGUE_PDSINST_STM_CCS_CCS_CC_SHIFT); + encoded |= ((CCS_CCS_SO & 1U) << PVR_ROGUE_PDSINST_STM_CCS_CCS_SO_SHIFT); + encoded |= + ((CCS_CCS_GLOBAL & 1U) << PVR_ROGUE_PDSINST_STM_CCS_CCS_GLOBAL_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t +pvr_pds_inst_decode_field_range_regs64(uint32_t value) +{ + if (value <= PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER) + return PVR_ROGUE_PDSINST_REGS64_CONST64; + + if ((value >= PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER) && + (value <= PVR_ROGUE_PDSINST_REGS64_TEMP64_UPPER)) { + return PVR_ROGUE_PDSINST_REGS64_TEMP64; + } + if ((value >= PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER) && + (value <= PVR_ROGUE_PDSINST_REGS64_PTEMP64_UPPER)) { + return PVR_ROGUE_PDSINST_REGS64_PTEMP64; + } + return 3; +} + +static ALWAYS_INLINE uint32_t pvr_rogue_inst_encode_mad(uint32_t sna, + uint32_t alum, + uint32_t cc, + uint32_t src0, + uint32_t src1, + uint32_t src2, + uint32_t dst) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEA_MAD + << PVR_ROGUE_PDSINST_MAD_OPCODE_SHIFT; + encoded |= ((dst & PVR_ROGUE_PDSINST_REGS64T_MASK) + << PVR_ROGUE_PDSINST_MAD_DST_SHIFT); + encoded |= ((src2 & PVR_ROGUE_PDSINST_REGS64_MASK) + << PVR_ROGUE_PDSINST_MAD_SRC2_SHIFT); + encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32_MASK) + << PVR_ROGUE_PDSINST_MAD_SRC1_SHIFT); + encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS32_MASK) + << PVR_ROGUE_PDSINST_MAD_SRC0_SHIFT); + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_MAD_CC_SHIFT); + encoded |= ((alum & 1U) << PVR_ROGUE_PDSINST_MAD_ALUM_SHIFT); + encoded |= ((sna & 1U) << PVR_ROGUE_PDSINST_MAD_SNA_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_add64(uint32_t cc, + uint32_t alum, + uint32_t sna, + uint32_t src0, + uint32_t src1, + uint32_t dst) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_ADD64 + << PVR_ROGUE_PDSINST_ADD64_OPCODE_SHIFT; + encoded |= ((dst & PVR_ROGUE_PDSINST_REGS64TP_MASK) + << PVR_ROGUE_PDSINST_ADD64_DST_SHIFT); + encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS64_MASK) + << PVR_ROGUE_PDSINST_ADD64_SRC1_SHIFT); + encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64_MASK) + << PVR_ROGUE_PDSINST_ADD64_SRC0_SHIFT); + encoded |= ((sna & 1U) << PVR_ROGUE_PDSINST_ADD64_SNA_SHIFT); + encoded |= ((alum & 1U) << PVR_ROGUE_PDSINST_ADD64_ALUM_SHIFT); + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_ADD64_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_add32(uint32_t cc, + uint32_t alum, + uint32_t sna, + uint32_t src0, + uint32_t src1, + uint32_t dst) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_ADD32 + << PVR_ROGUE_PDSINST_ADD32_OPCODE_SHIFT; + encoded |= ((dst & PVR_ROGUE_PDSINST_REGS32TP_MASK) + << PVR_ROGUE_PDSINST_ADD32_DST_SHIFT); + encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32_MASK) + << PVR_ROGUE_PDSINST_ADD32_SRC1_SHIFT); + encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS32_MASK) + << PVR_ROGUE_PDSINST_ADD32_SRC0_SHIFT); + encoded |= ((sna & 1U) << PVR_ROGUE_PDSINST_ADD32_SNA_SHIFT); + encoded |= ((alum & 1U) << PVR_ROGUE_PDSINST_ADD32_ALUM_SHIFT); + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_ADD32_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_cmp(uint32_t cc, + uint32_t cop, + uint32_t src0, + uint32_t src1) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_CMP + << PVR_ROGUE_PDSINST_CMP_OPCODE_SHIFT; + encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS64_MASK) + << PVR_ROGUE_PDSINST_CMP_SRC1_SHIFT); + encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64TP_MASK) + << PVR_ROGUE_PDSINST_CMP_SRC0_SHIFT); + encoded |= UINT32_C(0x0) << PVR_ROGUE_PDSINST_CMP_IM_SHIFT; + encoded |= UINT32_C(0x1) << PVR_ROGUE_PDSINST_CMP_SETCP_SHIFT; + encoded |= + ((cop & PVR_ROGUE_PDSINST_COP_MASK) << PVR_ROGUE_PDSINST_CMP_COP_SHIFT); + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_CMP_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_cmpi(uint32_t cc, + uint32_t cop, + uint32_t src0, + uint32_t im16) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_CMP + << PVR_ROGUE_PDSINST_CMPI_OPCODE_SHIFT; + encoded |= ((im16 & PVR_ROGUE_PDSINST_IMM16_MASK) + << PVR_ROGUE_PDSINST_CMPI_IM16_SHIFT); + encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64TP_MASK) + << PVR_ROGUE_PDSINST_CMPI_SRC0_SHIFT); + encoded |= UINT32_C(0x1) << PVR_ROGUE_PDSINST_CMPI_IM_SHIFT; + encoded |= UINT32_C(0x1) << PVR_ROGUE_PDSINST_CMPI_SETCP_SHIFT; + encoded |= + ((cop & PVR_ROGUE_PDSINST_COP_MASK) << PVR_ROGUE_PDSINST_CMPI_COP_SHIFT); + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_CMPI_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_bra(uint32_t srcc, + uint32_t neg, + uint32_t setc, + uint32_t addr) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_BRA + << PVR_ROGUE_PDSINST_BRA_OPCODE_SHIFT; + encoded |= ((addr & PVR_ROGUE_PDSINST_BRAADDR_MASK) + << PVR_ROGUE_PDSINST_BRA_ADDR_SHIFT); + encoded |= ((setc & PVR_ROGUE_PDSINST_PREDICATE_MASK) + << PVR_ROGUE_PDSINST_BRA_SETC_SHIFT); + encoded |= ((neg & 1U) << PVR_ROGUE_PDSINST_BRA_NEG_SHIFT); + encoded |= ((srcc & PVR_ROGUE_PDSINST_PREDICATE_MASK) + << PVR_ROGUE_PDSINST_BRA_SRCC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_ld(uint32_t cc, uint32_t src0) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP << PVR_ROGUE_PDSINST_LD_OPCODE_SHIFT; + encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64_MASK) + << PVR_ROGUE_PDSINST_LD_SRC0_SHIFT); + encoded |= PVR_ROGUE_PDSINST_OPCODESP_LD << PVR_ROGUE_PDSINST_LD_OP_SHIFT; + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_LD_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_st(uint32_t cc, uint32_t src0) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP << PVR_ROGUE_PDSINST_ST_OPCODE_SHIFT; + encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64_MASK) + << PVR_ROGUE_PDSINST_ST_SRC0_SHIFT); + encoded |= PVR_ROGUE_PDSINST_OPCODESP_ST << PVR_ROGUE_PDSINST_ST_OP_SHIFT; + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_ST_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_wdf(uint32_t cc) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP + << PVR_ROGUE_PDSINST_WDF_OPCODE_SHIFT; + encoded |= PVR_ROGUE_PDSINST_OPCODESP_WDF << PVR_ROGUE_PDSINST_WDF_OP_SHIFT; + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_WDF_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_limm(uint32_t cc, + uint32_t src1, + uint32_t src0, + uint32_t gr) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP + << PVR_ROGUE_PDSINST_LIMM_OPCODE_SHIFT; + encoded |= ((gr & 1U) << PVR_ROGUE_PDSINST_LIMM_GR_SHIFT); + encoded |= ((src0 & PVR_ROGUE_PDSINST_IMM16_MASK) + << PVR_ROGUE_PDSINST_LIMM_SRC0_SHIFT); + encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32T_MASK) + << PVR_ROGUE_PDSINST_LIMM_SRC1_SHIFT); + encoded |= PVR_ROGUE_PDSINST_OPCODESP_LIMM + << PVR_ROGUE_PDSINST_LIMM_OP_SHIFT; + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_LIMM_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_lock(uint32_t cc) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP + << PVR_ROGUE_PDSINST_LOCK_OPCODE_SHIFT; + encoded |= PVR_ROGUE_PDSINST_OPCODESP_LOCK + << PVR_ROGUE_PDSINST_LOCK_OP_SHIFT; + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_LOCK_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_release(uint32_t cc) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP + << PVR_ROGUE_PDSINST_RELEASE_OPCODE_SHIFT; + encoded |= PVR_ROGUE_PDSINST_OPCODESP_RELEASE + << PVR_ROGUE_PDSINST_RELEASE_OP_SHIFT; + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_RELEASE_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_halt(uint32_t cc) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP + << PVR_ROGUE_PDSINST_HALT_OPCODE_SHIFT; + encoded |= PVR_ROGUE_PDSINST_OPCODESP_HALT + << PVR_ROGUE_PDSINST_HALT_OP_SHIFT; + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_HALT_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_stmc(uint32_t cc, + uint32_t so_mask) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP + << PVR_ROGUE_PDSINST_STMC_OPCODE_SHIFT; + encoded |= ((so_mask & PVR_ROGUE_PDSINST_SOMASK_MASK) + << PVR_ROGUE_PDSINST_STMC_SOMASK_SHIFT); + encoded |= PVR_ROGUE_PDSINST_OPCODESP_STMC + << PVR_ROGUE_PDSINST_STMC_OP_SHIFT; + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_STMC_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t +pvr_rogue_pds_inst_decode_field_range_regs64c(uint32_t value) +{ + if (value <= PVR_ROGUE_PDSINST_REGS64C_CONST64_UPPER) + return PVR_ROGUE_PDSINST_REGS64C_CONST64; + + return 1; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_ddmad(uint32_t cc, + uint32_t end, + uint32_t src0, + uint32_t src1, + uint32_t src2, + uint32_t src3) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_DDMAD + << PVR_ROGUE_PDSINST_DDMAD_OPCODE_SHIFT; + encoded |= ((src3 & PVR_ROGUE_PDSINST_REGS64C_MASK) + << PVR_ROGUE_PDSINST_DDMAD_SRC3_SHIFT); + encoded |= ((src2 & PVR_ROGUE_PDSINST_REGS64_MASK) + << PVR_ROGUE_PDSINST_DDMAD_SRC2_SHIFT); + encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32T_MASK) + << PVR_ROGUE_PDSINST_DDMAD_SRC1_SHIFT); + encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS32_MASK) + << PVR_ROGUE_PDSINST_DDMAD_SRC0_SHIFT); + encoded |= ((end & 1U) << PVR_ROGUE_PDSINST_DDMAD_END_SHIFT); + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_DDMAD_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_dout(uint32_t cc, + uint32_t end, + uint32_t src1, + uint32_t src0, + uint32_t dst) +{ + uint32_t encoded = 0; + + encoded |= PVR_ROGUE_PDSINST_OPCODEC_DOUT + << PVR_ROGUE_PDSINST_DOUT_OPCODE_SHIFT; + encoded |= ((dst & PVR_ROGUE_PDSINST_DSTDOUT_MASK) + << PVR_ROGUE_PDSINST_DOUT_DST_SHIFT); + encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64_MASK) + << PVR_ROGUE_PDSINST_DOUT_SRC0_SHIFT); + encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32_MASK) + << PVR_ROGUE_PDSINST_DOUT_SRC1_SHIFT); + encoded |= ((end & 1U) << PVR_ROGUE_PDSINST_DOUT_END_SHIFT); + encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_DOUT_CC_SHIFT); + + PVR_PDS_PRINT_INST(encoded); + + return encoded; +} + +#endif /* PVR_ROGUE_PDS_ENCODE_H */ diff --git a/src/imagination/vulkan/pds/pvr_xgl_pds.c b/src/imagination/vulkan/pds/pvr_xgl_pds.c new file mode 100644 index 00000000000..e153712c2c6 --- /dev/null +++ b/src/imagination/vulkan/pds/pvr_xgl_pds.c @@ -0,0 +1,1725 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "pvr_device_info.h" +#include "pvr_pds.h" +#include "pvr_rogue_pds_defs.h" +#include "pvr_rogue_pds_disasm.h" +#include "pvr_rogue_pds_encode.h" +#include "util/log.h" +#include "util/macros.h" + +#define R32_C(x) ((x) + PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER) +#define R32_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER) +#define R32_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER) + +#define R32TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER) +#define R32TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER) + +#define R64_C(x) ((x) + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER) +#define R64_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER) +#define R64_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER) + +#define R64TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER) +#define R64TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER) + +/* 32-bit PTemp index for draw indirect base instance. */ +#define PVR_INDIRECT_BASE_INSTANCE_PTEMP 1U + +/* Number of constants to reserve per DDMAD instruction in the PDS Vertex. */ +#define PVR_PDS_DDMAD_NUM_CONSTS 8 + +#if defined(TRACE_PDS) +/* Some macros for a pretty printing. */ + +# define pvr_debug_pds_const(reg, size, annotation) \ + mesa_logd("const[%d] @ (%dbits) %s", reg, size, annotation) +# define pvr_debug_pds_temp(reg, size, annotation) \ + mesa_logd("temp[%d] @ (%dbits) %s", reg, size, annotation) +# define pvr_debug_pds_note(...) mesa_logd(" // " __VA_ARGS__) +# define pvr_debug_pds_flag(flags, flag) \ + { \ + if ((flags & flag) == flag) \ + mesa_logd(" > " #flag); \ + } +# define pvr_debug(annotation) mesa_logd(annotation) + +#else +# define pvr_debug_pds_const(reg, size, annotation) +# define pvr_debug_pds_temp(reg, size, annotation) +# define pvr_debug_pds_note(...) +# define pvr_debug_pds_flag(flags, flag) +# define pvr_debug(annotation) +#endif + +struct pvr_pds_const_map_entry_write_state { + const struct pvr_pds_info *PDS_info; + struct pvr_const_map_entry *entry; + size_t size_of_last_entry_in_bytes; + uint32_t entry_count; + size_t entries_size_in_bytes; +}; + +static void pvr_init_pds_const_map_entry_write_state( + struct pvr_pds_info *PDS_info, + struct pvr_pds_const_map_entry_write_state *entry_write_state) +{ + entry_write_state->PDS_info = PDS_info; + entry_write_state->entry = PDS_info->entries; + entry_write_state->size_of_last_entry_in_bytes = 0; + entry_write_state->entry_count = 0; + entry_write_state->entries_size_in_bytes = 0; +} + +/* Returns a pointer to the next struct pvr_const_map_entry. */ +static void *pvr_prepare_next_pds_const_map_entry( + struct pvr_pds_const_map_entry_write_state *entry_write_state, + size_t size_of_next_entry_in_bytes) +{ + /* Move on to the next entry. */ + uint8_t *next_entry = ((uint8_t *)entry_write_state->entry + + entry_write_state->size_of_last_entry_in_bytes); + entry_write_state->entry = (struct pvr_const_map_entry *)next_entry; + + entry_write_state->size_of_last_entry_in_bytes = size_of_next_entry_in_bytes; + entry_write_state->entry_count++; + entry_write_state->entries_size_in_bytes += size_of_next_entry_in_bytes; + + /* Check if we can write into the next entry. */ + assert(entry_write_state->entries_size_in_bytes <= + entry_write_state->PDS_info->entries_size_in_bytes); + + return entry_write_state->entry; +} + +static void pvr_write_pds_const_map_entry_vertex_attribute_address( + struct pvr_pds_const_map_entry_write_state *entry_write_state, + const struct pvr_pds_vertex_dma *DMA, + uint32_t const_val, + bool use_robust_vertex_fetch) +{ + pvr_debug_pds_note("DMA %d dwords, stride %d, offset %d, bindingIdx %d", + DMA->size_in_dwords, + DMA->stride, + DMA->offset, + DMA->binding_index); + + if (use_robust_vertex_fetch) { + struct pvr_const_map_entry_robust_vertex_attribute_address + *robust_attribute_entry; + + robust_attribute_entry = + pvr_prepare_next_pds_const_map_entry(entry_write_state, + sizeof(*robust_attribute_entry)); + robust_attribute_entry->type = + PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS; + robust_attribute_entry->const_offset = const_val; + robust_attribute_entry->binding_index = DMA->binding_index; + robust_attribute_entry->component_size_in_bytes = + DMA->component_size_in_bytes; + robust_attribute_entry->offset = DMA->offset; + robust_attribute_entry->stride = DMA->stride; + robust_attribute_entry->size_in_dwords = DMA->size_in_dwords; + robust_attribute_entry->robustness_buffer_offset = + DMA->robustness_buffer_offset; + } else { + struct pvr_const_map_entry_vertex_attribute_address *attribute_entry; + + attribute_entry = + pvr_prepare_next_pds_const_map_entry(entry_write_state, + sizeof(*attribute_entry)); + attribute_entry->type = + PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS; + attribute_entry->const_offset = const_val; + attribute_entry->binding_index = DMA->binding_index; + attribute_entry->offset = DMA->offset; + attribute_entry->stride = DMA->stride; + attribute_entry->size_in_dwords = DMA->size_in_dwords; + } +} + +static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc, + uint32_t end, + uint32_t src0) +{ + return pvr_pds_inst_encode_dout(cc, + end, + 0, + src0, + PVR_ROGUE_PDSINST_DSTDOUT_DOUTU); +} + +static uint32_t +pvr_encode_burst(struct pvr_pds_const_map_entry_write_state *entry_write_state, + bool last_DMA, + bool halt, + unsigned int const32, + unsigned int const64, + unsigned int dma_size_in_dwords, + unsigned int destination, + unsigned int store) +{ + uint32_t literal_value; + + /* Encode literal value. */ + literal_value = dma_size_in_dwords + << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT; + literal_value |= destination + << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT; + literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED | + store; + + if (last_DMA) + literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN; + + /* Create const map entry. */ + struct pvr_const_map_entry_literal32 *literal_entry; + + literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state, + sizeof(*literal_entry)); + literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32; + literal_entry->const_offset = const32; + literal_entry->literal_value = literal_value; + + /* Encode DOUTD */ + return pvr_pds_inst_encode_dout(0, + halt, + R32_C(const32), + R64_C(const64), + PVR_ROGUE_PDSINST_DSTDOUT_DOUTD); +} + +#define pvr_encode_burst_cs(psDataEntry, \ + last_DMA, \ + halt, \ + const32, \ + const64, \ + dma_size_in_dwords, \ + destination) \ + pvr_encode_burst( \ + psDataEntry, \ + last_DMA, \ + halt, \ + const32, \ + const64, \ + dma_size_in_dwords, \ + destination, \ + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE) + +static uint32_t pvr_encode_direct_write( + struct pvr_pds_const_map_entry_write_state *entry_write_state, + bool last_DMA, + bool halt, + unsigned int const32, + unsigned int const64, + uint32_t data_mask, + unsigned int destination, + uint32_t destination_store, + const struct pvr_device_info *dev_info) +{ + struct pvr_const_map_entry_literal32 *literal_entry; + + uint32_t instruction = + pvr_pds_inst_encode_dout(0, + halt, + const32, + const64, + PVR_ROGUE_PDSINST_DSTDOUT_DOUTW); + + literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state, + sizeof(*literal_entry)); + literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32; + literal_entry->const_offset = const32; + literal_entry->literal_value = destination_store; + + if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) { + literal_entry->literal_value |= + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED; + } + + literal_entry->literal_value |= + destination << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT; + + if (data_mask == 0x1) { + literal_entry->literal_value |= + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER; + } else if (data_mask == 0x2) { + literal_entry->literal_value |= + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER; + } else { + literal_entry->literal_value |= + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64; + } + + if (last_DMA) { + literal_entry->literal_value |= + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; + } + + return instruction; +} + +/* Constant and Temporary register allocation + * - reserve space for a 32-bit register or a 64-bit register + * - returned indices are offsets to 32-bit register locations + * - 64-bit registers need to be aligned to even indices. + */ +#define RESERVE_32BIT 1U +#define RESERVE_64BIT 2U + +#if defined(DEBUG) +# define pvr_find_constant(usage, words, name) \ + pvr_find_constant2(usage, words, name) +# define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, name) +#else +# define pvr_find_constant(usage, words, name) \ + pvr_find_constant2(usage, words, NULL); +# define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, NULL) +#endif + +static uint32_t +pvr_find_constant2(uint8_t *const_usage, uint8_t words, const char *const_name) +{ + uint32_t const_index = ~0U; + uint32_t step = words; + uint8_t mask = (1 << words) - 1; + + assert(words == 1 || words == 2); + + /* Find a register at 'step' alignment that satisfies the mask. */ + for (uint32_t i = 0; i < PVR_MAX_VERTEX_ATTRIB_DMAS; i++) { + for (uint32_t b = 0; b < PVR_PDS_DDMAD_NUM_CONSTS; b += step) { + if ((const_usage[i] & (mask << b)) != 0) + continue; + const_usage[i] |= (mask << b); + const_index = i * 8 + b; + pvr_debug_pds_const(const_index, words * 32, const_name); + return const_index; + } + } + + unreachable("Unexpected: Space cannot be found for constant"); + return ~0U; +} + +#define PVR_MAX_PDS_TEMPS 32 +struct pvr_temp_usage { + uint32_t temp_usage; + uint8_t temp_used; + uint8_t temps_needed; +}; + +#define PVR_INVALID_TEMP UINT8_C(~0) + +static uint8_t pvr_get_temps2(struct pvr_temp_usage *temps, + uint8_t temps_needed, + const char *temp_name) +{ + uint8_t step = temps_needed; + uint8_t mask = (1 << temps_needed) - 1; + + assert(temps_needed == 1 || temps_needed == 2); + assert(temps->temp_used + temps_needed <= PVR_MAX_PDS_TEMPS); + + for (uint8_t i = 0; i < PVR_MAX_PDS_TEMPS; i += step) { + if ((temps->temp_usage & (mask << i)) != 0) + continue; + + const size_t clzBits = 8 * sizeof(unsigned int); + + temps->temp_usage |= (mask << i); + temps->temp_used += temps_needed; + temps->temps_needed = + clzBits - __builtin_clz((unsigned int)temps->temp_usage); + + pvr_debug_pds_temp(i, temps_needed * 32, temp_name); + + return i; + } + + unreachable("Unexpected: Space cannot be found for temps"); + return PVR_INVALID_TEMP; +} + +/** + * Wrapper macro to add a toggle for "data mode", allowing us to calculate the + * size of a PDS program without actually attempting to store it. + * + * \param dest The array/memory pointer where the PDS program should be stored. + * If the given code is NULL, automatically switch to count mode + * instead of attempting to fill in unallocated memory. + * \param counter The local counter that holds the total instruction count. + * \param statement What function call/value should be stored at dest[counter] + * when condition is false. + */ + +#define PVR_PDS_MODE_TOGGLE(dest, counter, statement) \ + if (!dest) { \ + counter++; \ + } else { \ + dest[counter++] = statement; \ + PVR_PDS_PRINT_INST(statement); \ + } + +/** + * Generates the PDS vertex primary program for the dma's listed in the input + * structure. Produces the constant map for the Vulkan driver based upon the + * requirements of the instructions added to the program. + * + * PDS Data Layout + * --------------- + * + * The PDS data is optimized for the DDMAD layout, with the data for those + * instructions laid out first. The data required for other instructions is laid + * out in the entries unused by the DDMADs. + * + * DDMAD layout + * \verbatim + * bank | index | usage + * 0 | 0:1 | temps (current index)[-] + * 2 | 2:3 | stride[32] + * 1 | 4:5 | base address[64] + * 3 | 6:7 | ctrl[64] + * \endverbatim + * + * Each DMA whose stride > 0 requires one entry, laid out as above. We stride + * over the banks to ensure that each ddmad reads each of its operands from a + * different bank (i.e. remove bank clashes) + * + * Note: This is "wasting" const[0:1] and const[2], however these free + * registers will be used by other, non-ddmad instructions. + * + * The const register usage is maintained in the au8ConstUsage array, the + * DDMAD instructions, for example, will utilize the top 5 registers in each + * block of 8 hence a 'usage mask' of 0xF8 (0b11111000). + * + * Constant Map + * ------------ + * + * The constant map is built up as we add PDS instructions and passed back + * for the driver to fill in the PDS data section with the correct parameters + * for each draw call. + * + * \param input_program PDS Program description. + * \param code Buffer to be filled in with the PDS program. If NULL is provided, + * automatically switch to count-mode, preventing writes to + * unallocated memory. + * \param info PDS info structure filled in for the driver, contains the + * constant map. + * \param use_robust_vertex_fetch Do vertex fetches apply range checking. + * \param dev_info pvr device information struct. + */ +void pvr_pds_generate_vertex_primary_program( + struct pvr_pds_vertex_primary_program_input *input_program, + uint32_t *code, + struct pvr_pds_info *info, + bool use_robust_vertex_fetch, + const struct pvr_device_info *dev_info) +{ + struct pvr_pds_const_map_entry_write_state entry_write_state; + struct pvr_const_map_entry_doutu_address *doutu_address_entry; + + uint32_t instruction = 0; /* index into code */ + uint32_t index; /* index used for current attribute, either vertex or + * instance. + */ + + uint32_t total_dma_count = 0; + uint32_t running_dma_count = 0; + + uint32_t write_instance_control = ~0; + uint32_t write_vertex_control = ~0; + uint32_t write_base_instance_control = ~0; + uint32_t write_base_vertex_control = ~0; + uint32_t pvr_write_draw_index_control = ~0; + + uint32_t ddmad_count = 0; + uint32_t doutw_count = 0; + + uint32_t base_instance = 0; + uint32_t base_vertex = 0; + uint32_t draw_index = 0; + + uint8_t const_usage[PVR_MAX_VERTEX_ATTRIB_DMAS] = { 0 }; + + struct pvr_temp_usage temp_usage = { 0 }; + + uint32_t zero_temp = PVR_INVALID_TEMP; + + uint32_t max_index_temp = PVR_INVALID_TEMP; + uint32_t current_index_temp = PVR_INVALID_TEMP; + + uint32_t index_id_temp = PVR_INVALID_TEMP; + uint32_t base_instance_ID_temp = PVR_INVALID_TEMP; + uint32_t instance_ID_temp = PVR_INVALID_TEMP; + + /* Debug tracing of program flags. */ + pvr_debug("pvr_pds_generate_vertex_primary_program"); + pvr_debug("================================================="); + pvr_debug_pds_flag(input_program->flags, + PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED); + pvr_debug_pds_flag(input_program->flags, + PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED); + pvr_debug_pds_flag(input_program->flags, + PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT); + pvr_debug_pds_flag(input_program->flags, + PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT); + pvr_debug_pds_flag(input_program->flags, + PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED); + pvr_debug_pds_flag(input_program->flags, + PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED); + pvr_debug_pds_flag(input_program->flags, + PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED); + pvr_debug(" "); + + pvr_init_pds_const_map_entry_write_state(info, &entry_write_state); + + /* At a minimum we need 2 dwords for the DOUTU, but since we allocate in + * blocks of 4 we can reserve dwords for the instance/vertex DOUTW. + */ + info->data_size_in_dwords = 4; + + /* Reserve 2 temps - these are automatically filled in by the VDM + * + * For instanced draw calls we manually increment the instance id by the + * base-instance offset which is either provided as a constant, or in a + * ptemp (for draw indirect) + * + * temp - contents + * --------------- + * 0 - index id (pre-filled) + * 1 - base instance + instance id + */ + index_id_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Index id"); + instance_ID_temp = + pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Instance id"); + + /* Reserve the lowest 2 dwords for DOUTU. + * [------XX] + */ + const_usage[0] = 0x03; + + /* Reserve consts for all the DDMAD's. */ + for (uint32_t dma = 0; dma < input_program->dma_count; dma++) { + /* Mark the consts required by this ddmad "in-use". + * [XXXXX---] + */ + const_usage[ddmad_count++] |= 0xf8; + } + + /* Start off by assuming we can fit everything in the 8 dwords/ddmad + * footprint, if any DOUTD/DOUTW falls outside we will increase this + * counter. + */ + if (ddmad_count) + info->data_size_in_dwords = PVR_PDS_DDMAD_NUM_CONSTS * ddmad_count; + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) { + doutw_count++; + write_vertex_control = + pvr_find_constant(const_usage, RESERVE_32BIT, "Vertex id DOUTW Ctrl"); + } + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) { + doutw_count++; + write_instance_control = pvr_find_constant(const_usage, + RESERVE_32BIT, + "Instance id DOUTW Ctrl"); + } + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) { + doutw_count++; + write_base_instance_control = + pvr_find_constant(const_usage, + RESERVE_32BIT, + "Base Instance DOUTW Ctrl"); + } + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) { + doutw_count++; + write_base_vertex_control = pvr_find_constant(const_usage, + RESERVE_32BIT, + "Base Vertex DOUTW Ctrl"); + + /* Load base vertex from constant for non-indirect variants. */ + if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) == + 0) { + struct pvr_const_map_entry_base_vertex *psBaseVertexEntry = + (struct pvr_const_map_entry_base_vertex *)entry_write_state.entry; + + base_vertex = + pvr_find_constant(const_usage, RESERVE_32BIT, "base_vertex"); + + psBaseVertexEntry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*psBaseVertexEntry)); + psBaseVertexEntry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX; + psBaseVertexEntry->const_offset = base_vertex; + } + } + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) { + doutw_count++; + pvr_write_draw_index_control = + pvr_find_constant(const_usage, RESERVE_32BIT, "Draw Index DOUTW Ctrl"); + + /* Set draw index to 0 for non-indirect variants. */ + if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) == + 0) { + struct pvr_const_map_entry_literal32 *literal_entry; + + draw_index = + pvr_find_constant(const_usage, RESERVE_32BIT, "draw_index"); + + literal_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*literal_entry)); + literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32; + literal_entry->const_offset = draw_index; + literal_entry->literal_value = 0; + } + } + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) { + /* Load absolute instance id into uiInstanceIdTemp. */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_pds_inst_encode_add32( + /* cc */ 0, + /* alum */ 0, + /* sna */ 0, + /* src0 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP), + /* src1 */ R32_T(instance_ID_temp), + /* dst */ R32TP_T(instance_ID_temp))); + } else if (input_program->flags & + PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) { + struct pvr_const_map_entry_base_instance *base_instance_entry = + (struct pvr_const_map_entry_base_instance *)entry_write_state.entry; + + base_instance = + pvr_find_constant(const_usage, RESERVE_32BIT, "base_instance"); + + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_add32( + /* cc */ 0, + /* alum */ 0, + /* sna */ 0, + /* src0 */ R32_C(base_instance), + /* src1 */ R32_T(instance_ID_temp), + /* dst */ R32TP_T(instance_ID_temp))); + + base_instance_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*base_instance_entry)); + base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE; + base_instance_entry->const_offset = base_instance; + } else if (input_program->flags & + PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) { + struct pvr_const_map_entry_base_instance *base_instance_entry = + (struct pvr_const_map_entry_base_instance *)entry_write_state.entry; + + base_instance = pvr_find_constant(const_usage, + RESERVE_32BIT, + "base_instance (Driver Const)"); + + /* Base instance provided by the driver. */ + base_instance_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*base_instance_entry)); + base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE; + base_instance_entry->const_offset = base_instance; + } + + total_dma_count = ddmad_count; + + total_dma_count += doutw_count; + + if (use_robust_vertex_fetch) { + pvr_debug_pds_note("RobustBufferVertexFetch Initialization"); + + if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) { + zero_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "zero_temp"); + + /* Load 0 into instance_ID_temp. */ + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_limm(0, /* cc */ + zero_temp, /* SRC1 */ + 0, /* SRC0 */ + 0 /* GR */ + )); + } else { + zero_temp = pvr_get_temps(&temp_usage, RESERVE_64BIT, "zero_temp"); + + max_index_temp = + pvr_get_temps(&temp_usage, RESERVE_64BIT, "uMaxIndex"); + current_index_temp = + pvr_get_temps(&temp_usage, RESERVE_64BIT, "uCurrentIndex"); + + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_stflp64( + 0, /* cc */ + PVR_ROGUE_PDSINST_LOP_XOR, /* LOP */ + 1, /* IM */ + R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP) + */ + R64TP_T(zero_temp >> 1), /* SRC1 (REGS64TP) + */ + 0, /* SRC2 (REGS32) */ + R64TP_T(zero_temp >> 1) /* DST (REG64TP) */ + )); + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_stflp64( + 0, /* cc */ + PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */ + 1, /* IM */ + R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP) + */ + 0, /* SRC1 (REGS64TP) */ + 0, /* SRC2 (REGS32) */ + R64TP_T(current_index_temp >> 1) /* DST */ + /* (REG64TP) */ + )); + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_stflp64( + 0, /* cc */ + PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */ + 1, /* IM */ + R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP) + */ + 0, /* SRC1 (REGS64TP) */ + 0, /* SRC2 (REGS32) */ + R64TP_T(max_index_temp >> 1) /* DST */ + /* (REG64TP) */ + )); + } + } + + if (input_program->dma_count && use_robust_vertex_fetch) { + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCC */ + 0, /* Neg */ + PVR_HAS_FEATURE(dev_info, pds_ddmadt) + ? PVR_ROGUE_PDSINST_PREDICATE_OOB + : PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETC */ + 1 /* Addr */ + )); + } + + for (uint32_t dma = 0; dma < input_program->dma_count; dma++) { + uint32_t const_base = dma * PVR_PDS_DDMAD_NUM_CONSTS; + uint32_t control_word; + struct pvr_const_map_entry_literal32 *literal_entry; + + struct pvr_pds_vertex_dma *vertex_dma = &input_program->dma_list[dma]; + bool last_DMA = (++running_dma_count == total_dma_count); + + pvr_debug_pds_note("Vertex Attribute DMA %d (last=%d)", dma, last_DMA); + + /* The id we use to index into this dma. */ + if (vertex_dma->flags & PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE) { + pvr_debug_pds_note("Instance Rate (divisor = %d)", + vertex_dma->divisor); + + /* 4 - madd 0 - needs to be 64-bit aligned + * 5 - madd 1 + */ + if (vertex_dma->divisor > 1) { + const uint32_t adjusted_instance_ID_temp = + pvr_get_temps(&temp_usage, + RESERVE_64BIT, + "adjusted_instance_ID_temp"); + const uint32_t MADD_temp = + pvr_get_temps(&temp_usage, RESERVE_64BIT, "MADD_temp"); + + /* 1. Remove base instance value from temp 1 to get instance id + * 2. Divide the instance id by the divisor - Iout = (Iin * + * Multiplier) >> (shift+31) + * 3. Add the base instance back on. + * + * Need two zero temps for the add part of the later MAD. + */ + + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_add64( + /* cc */ 0, + /* alum */ 0, + /* sna */ 1, + /* src0 */ R64_T(MADD_temp >> 1), + /* src1 */ R64_T(MADD_temp >> 1), + /* dst */ R64TP_T(MADD_temp >> 1))); + + if (input_program->flags & + PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) { + /* Subtract base instance from temp 1, put into + * adjusted_instance_ID_temp. + */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_pds_inst_encode_add32( + /* cc */ 0, + /* alum */ 0, + /* sna */ 1, + /* src0 */ R32_T(instance_ID_temp), + /* src1 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP), + /* dst */ R32TP_T(adjusted_instance_ID_temp))); + } else if (input_program->flags & + PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) { + /* Subtract base instance from temp 1, put into + * adjusted_instance_ID_temp. + */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_pds_inst_encode_add32( + /* cc */ 0, + /* alum */ 0, + /* sna */ 1, + /* src0 */ R32_T(instance_ID_temp), + /* src1 */ R32_C(base_instance), + /* dst */ R32TP_T(adjusted_instance_ID_temp))); + } else { + /* Copy instance from temp 1 to adjusted_instance_ID_temp. + */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_pds_inst_encode_add32( + /* cc */ 0, + /* alum */ 0, + /* sna */ 0, + /* src0 */ R32_T(instance_ID_temp), + /* src1 */ R32_T(MADD_temp), /* MADD_temp is set + * to 0 at this point. + */ + /* dst */ R32TP_T(adjusted_instance_ID_temp))); + } + + /* shift = the bit of the next highest power of two. */ + uint32_t shift_unsigned = + (31 - __builtin_clz(vertex_dma->divisor - 1)) + 1; + int32_t shift = (int32_t)shift_unsigned; + uint32_t shift_2s_comp; + + pvr_debug_pds_note( + "Perform instance rate divide (as integer multiply and rshift)"); + + const uint32_t multipier_constant = + pvr_find_constant(const_usage, + RESERVE_32BIT, + "MultiplierConstant (for InstanceDivisor)"); + + /* multiplier = ( 2^(shift + 31) + (divisor - 1) ) / divisor, + note: the division above is integer division. */ + uint64_t multipier64 = + (uint64_t)((((uint64_t)1 << ((uint64_t)shift_unsigned + 31)) + + ((uint64_t)vertex_dma->divisor - (uint64_t)1)) / + (uint64_t)vertex_dma->divisor); + uint32_t multiplier = (uint32_t)multipier64; + + pvr_debug_pds_note(" - Value of MultiplierConstant = %u", + multiplier); + pvr_debug_pds_note(" - Value of Shift = %d", shift); + + literal_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*literal_entry)); + literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32; + literal_entry->const_offset = multipier_constant; + literal_entry->literal_value = multiplier; + + /* (Iin * Multiplier) */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_rogue_inst_encode_mad(0, /* Sign of add is positive */ + 0, /* Unsigned ALU mode */ + 0, /* Unconditional */ + R32_C(multipier_constant), + R32_T(adjusted_instance_ID_temp), + R64_T(MADD_temp / 2), + R64TP_T(MADD_temp / 2))); + + /* >> (shift + 31) */ + shift += 31; + shift *= -1; + + if (shift < -31) { + /* >> (31) */ + shift_2s_comp = 0xFFFE1; + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_stflp64( + /* cc */ 0, + /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE, + /* IM */ 1, /* enable immediate */ + /* SRC0 */ R64_T(MADD_temp / 2), + /* SRC1 */ 0, /* This won't be used + in a shift + operation. */ + /* SRC2 (Shift) */ shift_2s_comp, + /* DST */ R64TP_T(MADD_temp / 2))); + shift += 31; + } + + /* >> (shift + 31) */ + shift_2s_comp = *((uint32_t *)&shift); + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_stflp64( + /* cc */ 0, + /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE, + /* IM */ 1, /* enable immediate */ + /* SRC0 */ R64_T(MADD_temp / 2), + /* SRC1 */ 0, /* This won't be used + * in a shift + * operation. */ + /* SRC2 (Shift) */ shift_2s_comp, + /* DST */ R64TP_T(MADD_temp / 2))); + + if (input_program->flags & + PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) { + /* Add base instance. */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_pds_inst_encode_add32( + /* cc */ 0, + /* alum */ 0, + /* sna */ 0, + /* src0 */ R32_T(MADD_temp), + /* src1 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP), + /* dst */ R32TP_T(MADD_temp))); + } else if (input_program->flags & + PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) { + /* Add base instance. */ + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_add32( + /* cc */ 0, + /* alum */ 0, + /* sna */ 0, + /* src0 */ R32_T(MADD_temp), + /* src1 */ R32_C(base_instance), + /* dst */ R32TP_T(MADD_temp))); + } + + pvr_debug_pds_note( + "DMA Vertex Index will be sourced from 'MADD_temp'"); + index = MADD_temp; + } else if (vertex_dma->divisor == 0) { + if (base_instance_ID_temp == PVR_INVALID_TEMP) { + base_instance_ID_temp = pvr_get_temps(&temp_usage, + RESERVE_32BIT, + "uBaseInstanceIDTemp"); + } + + /* Load 0 into instance_ID_temp. */ + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_limm( + /* cc */ 0, + /* src1 */ base_instance_ID_temp, + /* src0 */ 0, + /* gr */ 0)); + + if (input_program->flags & + PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) { + /* Add base instance. */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_pds_inst_encode_add32( + /* cc */ 0, + /* alum */ 0, + /* sna */ 0, + /* src0 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP), + /* src1 */ R32_T(base_instance_ID_temp), + /* dst */ R32TP_T(base_instance_ID_temp))); + + } else if (input_program->flags & + PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) { + /* Add base instance. */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_pds_inst_encode_add32( + /* cc */ 0, + /* alum */ 0, + /* sna */ 0, + /* src0 */ R32_C(base_instance), + /* src1 */ R32_T(base_instance_ID_temp), + /* dst */ R32TP_T(base_instance_ID_temp))); + } + + pvr_debug_pds_note( + "DMA Vertex Index will be sourced from 'uBaseInstanceIdTemp'"); + index = base_instance_ID_temp; + } else { + pvr_debug_pds_note( + "DMA Vertex Index will be sourced from 'uInstanceIdTemp'"); + index = instance_ID_temp; + } + } else { + pvr_debug_pds_note( + "DMA Vertex Index will be sourced from 'uIndexIdTemp'"); + index = index_id_temp; + } + + /* DDMAD Const Usage [__XX_---] */ + pvr_write_pds_const_map_entry_vertex_attribute_address( + &entry_write_state, + vertex_dma, + const_base + 4, + use_robust_vertex_fetch); + + /* DDMAD Const Usage [__XXX---] */ + literal_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*literal_entry)); + literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32; + literal_entry->const_offset = const_base + 3; + literal_entry->literal_value = vertex_dma->stride; + + control_word = vertex_dma->size_in_dwords + << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT; + control_word |= vertex_dma->destination + << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT; + control_word |= (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE | + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED); + + /* DDMADT instructions will do a dummy doutd when OOB if + * PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN is set but as the driver + * would need to do another doutd after an OOB DDMADT to provide the 'in + * bounds' data the DDMADT can't be set as LAST. + * + * This requires us to include a final dummy DDMAD.LAST instruction. + * + * Pseudocode taken from SeriesXE2017.PDS Instruction Controller + * Specification.doc + * + * DDMAD src0,src1,src2,src3 + * + * calculated_source_address := src0*src1+src2 + * base_address := src2 + * dma_parameters := src3[31:0] + * buffer_size := src3[63:33] + * test := src3[32] + * + * if (test == 1) { + * // DDMAD(T) + * if (calculated_source_address[39:0] + (burst_size<<2) <= + * base_address[39:0] + buffer_size) { + * OOB := 0 + * DOUTD calculated_source_address,dma_paramters + * } else { + * OOB := 1 + * if (last_instance == 1) { + * dma_parameters[BURST_SIZE] := 0 + * DOUTD calculated_source_address,dma_paramters + * } + * } + * } else { + * // DDMAD + * DOUTD calculated_source_address,dma_paramters + * } + */ + + if (last_DMA && (!PVR_HAS_FEATURE(dev_info, pds_ddmadt) || + !use_robust_vertex_fetch)) { + pvr_debug_pds_note("LAST DDMAD"); + control_word |= PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN; + } + + /* DDMAD Const Usage [_XXXX---] */ + literal_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*literal_entry)); + literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32; + literal_entry->const_offset = (const_base + 6); + literal_entry->literal_value = control_word; + + if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) { + /* DDMAD Const Usage [XXXXX---] + * With DDMADT an extra 32bits of SRC3 contains the information for + * performing out-of-bounds tests on the DMA. + */ + + if (use_robust_vertex_fetch) { + struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size + *obb_buffer_size; + obb_buffer_size = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*obb_buffer_size)); + + obb_buffer_size->type = + PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE; + obb_buffer_size->const_offset = const_base + 7; + obb_buffer_size->binding_index = vertex_dma->binding_index; + } else { + literal_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*literal_entry)); + literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32; + literal_entry->const_offset = const_base + 7; + literal_entry->literal_value = 0; + } + + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_pds_inst_encode_ddmad(0, /* cc */ + 0, /* END */ + R32_C(const_base + 3), /* SRC0 (REGS32) */ + index, /* SRC1 (REGS32T) */ + R64_C((const_base + 4) >> 1), /* SRC2 + * (REGS64) + */ + R64_C((const_base + 6) >> 1) /* SRC3 + * (REGS64C) + */ + )); + + if (use_robust_vertex_fetch) { + /* If not out of bounds, skip next DDMAD instructions. */ + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_ddmad( + 1, /* cc */ + 0, /* END */ + R32_C(const_base + 3), /* SRC0 (REGS32) */ + R32_T(zero_temp), /* SRC1 (REGS32T) */ + R64_C((const_base + 4) >> 1), /* SRC2 + * (REGS64) + */ + R64_C((const_base + 6) >> 1) /* SRC3 + * (REGS64C) + */ + )); + + /* Now the driver must have a dummy DDMAD marked as last. */ + if (last_DMA) { + uint32_t dummy_dma_const = pvr_find_constant(const_usage, + RESERVE_64BIT, + "uDummyDMAConst"); + uint32_t zero_const = + pvr_find_constant(const_usage, RESERVE_64BIT, "uZeroConst"); + + literal_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*literal_entry)); + literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32; + literal_entry->const_offset = zero_const; + literal_entry->literal_value = 0; + + literal_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*literal_entry)); + literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32; + literal_entry->const_offset = zero_const + 1; + literal_entry->literal_value = 0; + + literal_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*literal_entry)); + literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32; + literal_entry->const_offset = dummy_dma_const; + literal_entry->literal_value = 0; + + literal_entry->literal_value |= + 0 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT; + literal_entry->literal_value |= + (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE | + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED); + literal_entry->literal_value |= + PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN; + + literal_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*literal_entry)); + literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32; + literal_entry->const_offset = dummy_dma_const + 1; + literal_entry->literal_value = 0; + + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_ddmad( + 0, /* cc */ + 0, /* END */ + R32_C(zero_const), /* SRC0 (REGS32) + */ + R32_T(zero_temp), /* SRC1 (REGS32T) + */ + R64_C((dummy_dma_const) >> 1), /* SRC2 + (REGS64) + */ + R64_C((dummy_dma_const) >> 1) /* SRC3 + (REGS64C) + */ + )); + } + } + } else { + if (use_robust_vertex_fetch) { + struct pvr_const_map_entry_vertex_attribute_max_index + *max_index_entry; + + pvr_debug("RobustVertexFetch DDMAD"); + + const uint32_t max_index_const = + pvr_find_constant(const_usage, RESERVE_32BIT, "max_index_const"); + + max_index_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*max_index_entry)); + max_index_entry->const_offset = max_index_const; + max_index_entry->type = + PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX; + max_index_entry->binding_index = vertex_dma->binding_index; + max_index_entry->offset = vertex_dma->offset; + max_index_entry->stride = vertex_dma->stride; + max_index_entry->size_in_dwords = vertex_dma->size_in_dwords; + max_index_entry->component_size_in_bytes = + vertex_dma->component_size_in_bytes; + + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_pds_inst_encode_add32(0, /* cc */ + 0, /* ALUM */ + PVR_ROGUE_PDSINST_LOP_NONE, /* SNA */ + R32_C(max_index_const), /* SRC0 + * (REGS32) + */ + R32_T(zero_temp), /* SRC1 (REGS32) */ + R32TP_T(max_index_temp) /* DST + * (REG32TP) + */ + )); + + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_stflp32( + 1, /* IM */ + 0, /* cc */ + PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */ + index, /* SRC0 (REGS32T) */ + 0, /* SRC1 (REGS32) */ + 0, /* SRC2 (REG32TP) */ + R32TP_T(current_index_temp) /* DST + * (REG32TP) + */ + )); + + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_pds_inst_encode_cmp( + 0, /* cc enable */ + PVR_ROGUE_PDSINST_COP_GT, /* Operation */ + R64TP_T(current_index_temp >> 1), /* SRC + * (REGS64TP) + */ + R64_T(max_index_temp >> 1) /* SRC1 (REGS64) */ + )); + + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_stflp32( + 1, /* IM */ + 1, /* cc */ + PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */ + zero_temp, /* SRC0 (REGS32T) */ + 0, /* SRC1 (REGS32) */ + 0, /* SRC2 (REG32TP) */ + R32TP_T(current_index_temp) /* DST + * (REG32TP) + */ + )); + + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_ddmad( + 0, /* cc */ + 0, /* END */ + R32_C(const_base + 3), /* SRC0 (REGS32) */ + current_index_temp, /* SRC1 (REGS32T) */ + R64_C((const_base + 4) >> 1), /* SRC2 + * (REGS64) + */ + (const_base + 6) >> 1 /* SRC3 (REGS64C) */ + )); + } else { + PVR_PDS_MODE_TOGGLE(code, + instruction, + pvr_pds_inst_encode_ddmad( + /* cc */ 0, + /* end */ 0, + /* src0 */ R32_C(const_base + 3), + /* src2 */ (index), + /* src1 */ R64_C((const_base + 4) >> 1), + /* src3 */ (const_base + 6) >> 1)); + } + } + } + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) { + bool last_DMA = (++running_dma_count == total_dma_count); + + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_encode_direct_write( + &entry_write_state, + last_DMA, + false, + R64_C(write_vertex_control), + R64_T(0), + 0x1, + input_program->vertex_id_register, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + dev_info)); + } + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) { + bool last_DMA = (++running_dma_count == total_dma_count); + + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_encode_direct_write( + &entry_write_state, + last_DMA, + false, + R64_C(write_instance_control), + R64_T(0), + 0x2, + input_program->instance_id_register, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + dev_info)); + } + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) { + bool last_DMA = (++running_dma_count == total_dma_count); + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) { + /* Base instance comes from ptemp 1. */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_encode_direct_write( + &entry_write_state, + last_DMA, + false, + R64_C(write_base_instance_control), + R64_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP >> 1), + 0x2, + input_program->base_instance_register, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + dev_info)); + } else { + uint32_t data_mask = (base_instance & 1) ? 0x2 : 0x1; + + /* Base instance comes from driver constant. */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_encode_direct_write( + &entry_write_state, + last_DMA, + false, + R64_C(write_base_instance_control), + R64_C(base_instance >> 1), + data_mask, + input_program->base_instance_register, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + dev_info)); + } + } + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) { + bool last_DMA = (++running_dma_count == total_dma_count); + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) { + /* Base vertex comes from ptemp 0 (initialized by PDS hardware). */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_encode_direct_write( + &entry_write_state, + last_DMA, + false, + R64_C(write_base_vertex_control), + R64_P(0), + 0x1, + input_program->base_vertex_register, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + dev_info)); + } else { + uint32_t data_mask = (base_vertex & 1) ? 0x2 : 0x1; + + /* Base vertex comes from driver constant (literal 0). */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_encode_direct_write( + &entry_write_state, + last_DMA, + false, + R64_C(write_base_vertex_control), + R64_C(base_vertex >> 1), + data_mask, + input_program->base_vertex_register, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + dev_info)); + } + } + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) { + bool last_DMA = (++running_dma_count == total_dma_count); + + if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) { + /* Draw index comes from ptemp 3. */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_encode_direct_write( + &entry_write_state, + last_DMA, + false, + R64_C(pvr_write_draw_index_control), + R64_P(1), + 0x2, + input_program->draw_index_register, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + dev_info)); + } else { + uint32_t data_mask = (draw_index & 1) ? 0x2 : 0x1; + + /* Draw index comes from constant (literal 0). */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_encode_direct_write( + &entry_write_state, + last_DMA, + false, + R64_C(pvr_write_draw_index_control), + R64_C(draw_index >> 1), + data_mask, + input_program->draw_index_register, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, + dev_info)); + } + } + + doutu_address_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*doutu_address_entry)); + doutu_address_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS; + doutu_address_entry->const_offset = 0; + doutu_address_entry->doutu_control = input_program->usc_task_control.src0; + + if (use_robust_vertex_fetch) { + /* Restore IF0 */ + PVR_PDS_MODE_TOGGLE( + code, + instruction, + pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC */ + 0, /* Neg */ + PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC */ + 1 /* Addr */ + )); + } + + PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_encode_doutu(1, 1, 0)); + PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_inst_encode_halt(0)); + + assert(running_dma_count == total_dma_count); + + for (uint32_t i = 0; i < ARRAY_SIZE(const_usage); i++) { + if (const_usage[i] == 0) + break; + + info->data_size_in_dwords = + 8 * i + (32 - __builtin_clz((uint32_t)const_usage[i])); + } + + info->temps_required = temp_usage.temps_needed; + info->entry_count = entry_write_state.entry_count; + info->entries_written_size_in_bytes = + entry_write_state.entries_size_in_bytes; + info->code_size_in_dwords = instruction; + + pvr_debug("=================================================\n"); +} + +void pvr_pds_generate_descriptor_upload_program( + struct pvr_descriptor_program_input *input_program, + uint32_t *code_section, + struct pvr_pds_info *info) +{ + unsigned int num_consts64; + unsigned int num_consts32; + unsigned int next_const64; + unsigned int next_const32; + unsigned int instruction = 0; + uint32_t compile_time_buffer_index = 0; + + unsigned int total_dma_count = 0; + unsigned int running_dma_count = 0; + + struct pvr_pds_const_map_entry_write_state entry_write_state; + + /* Calculate the total register usage so we can stick 32-bit consts + * after 64. Each DOUTD/DDMAD requires 1 32-bit constant and 1 64-bit + * constant. + */ + num_consts32 = input_program->descriptor_set_count; + num_consts64 = input_program->descriptor_set_count; + total_dma_count = input_program->descriptor_set_count; + + pvr_init_pds_const_map_entry_write_state(info, &entry_write_state); + + for (unsigned int index = 0; index < input_program->buffer_count; index++) { + struct pvr_pds_buffer *buffer = &input_program->buffers[index]; + + /* This switch statement looks pointless but we want to optimize DMAs + * that can be done as a DOUTW. + */ + switch (buffer->type) { + default: { + /* 1 DOUTD per compile time buffer: */ + num_consts32++; + num_consts64++; + total_dma_count++; + break; + } + } + } + + /* DOUTU for the secondary update program requires a 64-bit constant. */ + if (input_program->secondary_program_present) + num_consts64++; + + info->data_size_in_dwords = (num_consts64 * 2) + (num_consts32); + + /* Start counting constants. */ + next_const64 = 0; + next_const32 = num_consts64 * 2; + + /* For each descriptor set perform a DOUTD. */ + for (unsigned int descriptor_index = 0; + descriptor_index < input_program->descriptor_set_count; + descriptor_index++) { + struct pvr_const_map_entry_descriptor_set *descriptor_set_entry; + struct pvr_pds_descriptor_set *descriptor_set = + &input_program->descriptor_sets[descriptor_index]; + + bool last_DMA = (++running_dma_count == total_dma_count); + bool halt = last_DMA && !input_program->secondary_program_present; + + descriptor_set_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*descriptor_set_entry)); + descriptor_set_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET; + descriptor_set_entry->const_offset = next_const64 * 2; + descriptor_set_entry->descriptor_set = descriptor_set->descriptor_set; + descriptor_set_entry->primary = descriptor_set->primary; + descriptor_set_entry->offset_in_dwords = descriptor_set->offset_in_dwords; + + PVR_PDS_MODE_TOGGLE(code_section, + instruction, + pvr_encode_burst_cs(&entry_write_state, + last_DMA, + halt, + next_const32, + next_const64, + descriptor_set->size_in_dwords, + descriptor_set->destination)); + + next_const64++; + next_const32++; + } + + for (unsigned int index = 0; index < input_program->buffer_count; index++) { + struct pvr_pds_buffer *buffer = &input_program->buffers[index]; + + bool last_DMA = (++running_dma_count == total_dma_count); + bool halt = last_DMA && !input_program->secondary_program_present; + + switch (buffer->type) { + case PVR_BUFFER_TYPE_PUSH_CONSTS: { + struct pvr_const_map_entry_special_buffer *special_buffer_entry; + + special_buffer_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*special_buffer_entry)); + special_buffer_entry->type = + PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER; + special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_PUSH_CONSTS; + special_buffer_entry->buffer_index = buffer->source_offset; + break; + } + case PVR_BUFFER_TYPE_DYNAMIC: { + struct pvr_const_map_entry_special_buffer *special_buffer_entry; + + special_buffer_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*special_buffer_entry)); + special_buffer_entry->type = + PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER; + special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_DYNAMIC; + special_buffer_entry->buffer_index = buffer->source_offset; + break; + } + case PVR_BUFFER_TYPES_COMPILE_TIME: { + struct pvr_const_map_entry_special_buffer *special_buffer_entry; + + special_buffer_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*special_buffer_entry)); + special_buffer_entry->type = + PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER; + special_buffer_entry->buffer_type = PVR_BUFFER_TYPES_COMPILE_TIME; + special_buffer_entry->buffer_index = compile_time_buffer_index++; + break; + } + case PVR_BUFFER_TYPES_BUFFER_LENGTHS: { + struct pvr_const_map_entry_special_buffer *special_buffer_entry; + + special_buffer_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*special_buffer_entry)); + special_buffer_entry->type = + PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER; + special_buffer_entry->buffer_type = PVR_BUFFER_TYPES_BUFFER_LENGTHS; + break; + } + case PVR_BUFFER_TYPE_BLEND_CONSTS: { + struct pvr_const_map_entry_special_buffer *special_buffer_entry; + + special_buffer_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*special_buffer_entry)); + special_buffer_entry->type = + PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER; + special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_BLEND_CONSTS; + special_buffer_entry->buffer_index = + input_program->blend_constants_used_mask; + break; + } + case PVR_BUFFER_TYPE_UBO: { + struct pvr_const_map_entry_constant_buffer *constant_buffer_entry; + + constant_buffer_entry = pvr_prepare_next_pds_const_map_entry( + &entry_write_state, + sizeof(*constant_buffer_entry)); + constant_buffer_entry->type = + PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER; + constant_buffer_entry->buffer_id = buffer->buffer_id; + constant_buffer_entry->desc_set = buffer->desc_set; + constant_buffer_entry->binding = buffer->binding; + constant_buffer_entry->offset = buffer->source_offset; + constant_buffer_entry->size_in_dwords = buffer->size_in_dwords; + break; + } + case PVR_BUFFER_TYPES_UBO_ZEROING: { + struct pvr_const_map_entry_constant_buffer_zeroing + *constant_buffer_entry; + + constant_buffer_entry = pvr_prepare_next_pds_const_map_entry( + &entry_write_state, + sizeof(*constant_buffer_entry)); + constant_buffer_entry->type = + PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING; + constant_buffer_entry->buffer_id = buffer->buffer_id; + constant_buffer_entry->offset = buffer->source_offset; + constant_buffer_entry->size_in_dwords = buffer->size_in_dwords; + break; + } + } + + entry_write_state.entry->const_offset = next_const64 * 2; + + PVR_PDS_MODE_TOGGLE(code_section, + instruction, + pvr_encode_burst_cs(&entry_write_state, + last_DMA, + halt, + next_const32, + next_const64, + buffer->size_in_dwords, + buffer->destination)); + + next_const64++; + next_const32++; + } + + if (total_dma_count != running_dma_count) + fprintf(stderr, "Mismatch in DMA count\n"); + + if (input_program->secondary_program_present) { + struct pvr_const_map_entry_doutu_address *doutu_address; + + PVR_PDS_MODE_TOGGLE(code_section, + instruction, + pvr_pds_encode_doutu(false, true, next_const64)); + + doutu_address = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*doutu_address)); + doutu_address->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS; + doutu_address->const_offset = next_const64 * 2; + doutu_address->doutu_control = input_program->secondary_task_control.src0; + + next_const64++; + } + + if (instruction == 0 && input_program->must_not_be_empty) { + PVR_PDS_MODE_TOGGLE(code_section, + instruction, + pvr_pds_inst_encode_halt( + /* cc */ false)); + } + + info->entry_count = entry_write_state.entry_count; + info->entries_written_size_in_bytes = + entry_write_state.entries_size_in_bytes; + info->code_size_in_dwords = instruction; +} diff --git a/src/imagination/vulkan/pvr_blit.c b/src/imagination/vulkan/pvr_blit.c new file mode 100644 index 00000000000..142f98b4388 --- /dev/null +++ b/src/imagination/vulkan/pvr_blit.c @@ -0,0 +1,147 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "pvr_csb.h" +#include "pvr_private.h" +#include "util/list.h" +#include "vk_alloc.h" +#include "vk_command_buffer.h" +#include "vk_command_pool.h" +#include "vk_log.h" + +/* TODO: Investigate where this limit comes from. */ +#define PVR_MAX_TRANSFER_SIZE_IN_TEXELS 2048U + +void pvr_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, + const VkBlitImageInfo2KHR *pBlitImageInfo) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdCopyImageToBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2KHR *pCopyImageToBufferInfo) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, + const VkCopyImageInfo2KHR *pCopyImageInfo) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const void *pData) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdFillBuffer(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize fillSize, + uint32_t data) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdCopyBufferToImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdClearColorImage(VkCommandBuffer commandBuffer, + VkImage _image, + VkImageLayout imageLayout, + const VkClearColorValue *pColor, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearDepthStencilValue *pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2KHR *pCopyBufferInfo) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferInfo->srcBuffer); + PVR_FROM_HANDLE(pvr_buffer, dst, pCopyBufferInfo->dstBuffer); + const size_t regions_size = + pCopyBufferInfo->regionCount * sizeof(*pCopyBufferInfo->pRegions); + struct pvr_transfer_cmd *transfer_cmd; + + PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); + + transfer_cmd = vk_alloc(&cmd_buffer->vk.pool->alloc, + sizeof(*transfer_cmd) + regions_size, + 8U, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!transfer_cmd) { + cmd_buffer->state.status = + vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); + + return; + } + + transfer_cmd->src = src; + transfer_cmd->dst = dst; + transfer_cmd->region_count = pCopyBufferInfo->regionCount; + memcpy(transfer_cmd->regions, pCopyBufferInfo->pRegions, regions_size); + + pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd); +} + +void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment *pAttachments, + uint32_t rectCount, + const VkClearRect *pRects) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdResolveImage2KHR(VkCommandBuffer commandBuffer, + const VkResolveImageInfo2KHR *pResolveImageInfo) +{ + assert(!"Unimplemented"); +} diff --git a/src/imagination/vulkan/pvr_bo.c b/src/imagination/vulkan/pvr_bo.c new file mode 100644 index 00000000000..a7450805889 --- /dev/null +++ b/src/imagination/vulkan/pvr_bo.c @@ -0,0 +1,199 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "pvr_bo.h" +#include "pvr_private.h" +#include "pvr_winsys.h" +#include "vk_alloc.h" +#include "vk_log.h" + +static uint32_t pvr_bo_alloc_to_winsys_flags(uint64_t flags) +{ + uint32_t ws_flags = 0; + + if (flags & PVR_BO_ALLOC_FLAG_CPU_ACCESS) + ws_flags |= PVR_WINSYS_BO_FLAG_CPU_ACCESS; + + if (flags & PVR_BO_ALLOC_FLAG_GPU_UNCACHED) + ws_flags |= PVR_WINSYS_BO_FLAG_GPU_UNCACHED; + + if (flags & PVR_BO_ALLOC_FLAG_PM_FW_PROTECT) + ws_flags |= PVR_WINSYS_BO_FLAG_PM_FW_PROTECT; + + if (flags & PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC) + ws_flags |= PVR_WINSYS_BO_FLAG_ZERO_ON_ALLOC; + + return ws_flags; +} + +/** + * \brief Helper interface to allocate a GPU buffer and map it to both host and + * device virtual memory. Host mapping is conditional and is controlled by + * flags. + * + * \param[in] device Logical device pointer. + * \param[in] heap Heap to allocate device virtual address from. + * \param[in] size Size of buffer to allocate. + * \param[in] alignment Required alignment of the allocation. Must be a power + * of two. + * \param[in] flags Controls allocation, CPU and GPU mapping behavior + * using PVR_BO_ALLOC_FLAG_*. + * \param[out] pvr_bo_out On success output buffer is returned in this pointer. + * \return VK_SUCCESS on success, or error code otherwise. + * + * \sa #pvr_bo_free() + */ +VkResult pvr_bo_alloc(struct pvr_device *device, + struct pvr_winsys_heap *heap, + uint64_t size, + uint64_t alignment, + uint64_t flags, + struct pvr_bo **const pvr_bo_out) +{ + const uint32_t ws_flags = pvr_bo_alloc_to_winsys_flags(flags); + struct pvr_bo *pvr_bo; + pvr_dev_addr_t addr; + VkResult result; + + pvr_bo = vk_alloc(&device->vk.alloc, + sizeof(*pvr_bo), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!pvr_bo) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + result = device->ws->ops->buffer_create(device->ws, + size, + alignment, + PVR_WINSYS_BO_TYPE_GPU, + ws_flags, + &pvr_bo->bo); + if (result != VK_SUCCESS) + goto err_vk_free; + + if (flags & PVR_BO_ALLOC_FLAG_CPU_MAPPED) { + void *map = device->ws->ops->buffer_map(pvr_bo->bo); + if (!map) { + result = VK_ERROR_MEMORY_MAP_FAILED; + goto err_buffer_destroy; + } + } + + pvr_bo->vma = device->ws->ops->heap_alloc(heap, size, alignment); + if (!pvr_bo->vma) { + result = VK_ERROR_OUT_OF_DEVICE_MEMORY; + goto err_buffer_unmap; + } + + addr = device->ws->ops->vma_map(pvr_bo->vma, pvr_bo->bo, 0, size); + if (!addr.addr) { + result = VK_ERROR_MEMORY_MAP_FAILED; + goto err_heap_free; + } + + *pvr_bo_out = pvr_bo; + + return VK_SUCCESS; + +err_heap_free: + device->ws->ops->heap_free(pvr_bo->vma); + +err_buffer_unmap: + if (flags & PVR_BO_ALLOC_FLAG_CPU_MAPPED) + device->ws->ops->buffer_unmap(pvr_bo->bo); + +err_buffer_destroy: + device->ws->ops->buffer_destroy(pvr_bo->bo); + +err_vk_free: + vk_free(&device->vk.alloc, pvr_bo); + + return result; +} + +/** + * \brief Interface to map the buffer into host virtual address space. + * + * Buffer should have been created with the #PVR_BO_ALLOC_FLAG_CPU_ACCESS + * flag. It should also not already be mapped or it should have been unmapped + * using #pvr_bo_cpu_unmap() before mapping again. + * + * \param[in] device Logical device pointer. + * \param[in] pvr_bo Buffer to map. + * \return Valid host virtual address on success, or NULL otherwise. + * + * \sa #pvr_bo_alloc(), #PVR_BO_ALLOC_FLAG_CPU_MAPPED + */ +void *pvr_bo_cpu_map(struct pvr_device *device, struct pvr_bo *pvr_bo) +{ + assert(!pvr_bo->bo->map); + + return device->ws->ops->buffer_map(pvr_bo->bo); +} + +/** + * \brief Interface to unmap the buffer from host virtual address space. + * + * Buffer should have a valid mapping, created either using #pvr_bo_cpu_map() or + * by passing #PVR_BO_ALLOC_FLAG_CPU_MAPPED flag to #pvr_bo_alloc() at + * allocation time. + * + * Buffer can be remapped using #pvr_bo_cpu_map(). + * + * \param[in] device Logical device pointer. + * \param[in] pvr_bo Buffer to unmap. + */ +void pvr_bo_cpu_unmap(struct pvr_device *device, struct pvr_bo *pvr_bo) +{ + assert(pvr_bo->bo->map); + device->ws->ops->buffer_unmap(pvr_bo->bo); +} + +/** + * \brief Interface to free the buffer object. + * + * \param[in] device Logical device pointer. + * \param[in] pvr_bo Buffer to free. + * + * \sa #pvr_bo_alloc() + */ +void pvr_bo_free(struct pvr_device *device, struct pvr_bo *pvr_bo) +{ + if (!pvr_bo) + return; + + device->ws->ops->vma_unmap(pvr_bo->vma); + device->ws->ops->heap_free(pvr_bo->vma); + + if (pvr_bo->bo->map) + device->ws->ops->buffer_unmap(pvr_bo->bo); + + device->ws->ops->buffer_destroy(pvr_bo->bo); + + vk_free(&device->vk.alloc, pvr_bo); +} diff --git a/src/imagination/vulkan/pvr_bo.h b/src/imagination/vulkan/pvr_bo.h new file mode 100644 index 00000000000..2a00c24e369 --- /dev/null +++ b/src/imagination/vulkan/pvr_bo.h @@ -0,0 +1,91 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_BO_H +#define PVR_BO_H + +#include +#include +#include + +#include "util/list.h" +#include "util/macros.h" + +struct pvr_device; +struct pvr_winsys_bo; +struct pvr_winsys_vma; +struct pvr_winsys_heap; + +struct pvr_bo { + /* Since multiple components (csb, caching logic, etc) can make use of + * linking buffers in a list, we add 'link' in pvr_bo to avoid an extra + * level of structure inheritance. It's the responsibility of the buffer + * user to manage the list and remove the buffer from the list before + * freeing it. + */ + struct list_head link; + + struct pvr_winsys_bo *bo; + struct pvr_winsys_vma *vma; +}; + +/** + * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer should be + * CPU accessible. This is required in order to map a buffer with + * #pvr_bo_cpu_map(). + */ +#define PVR_BO_ALLOC_FLAG_CPU_ACCESS BITFIELD_BIT(0U) +/** + * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer should + * be mapped to the CPU. Implies #PVR_BO_ALLOC_FLAG_CPU_ACCESS. + */ +#define PVR_BO_ALLOC_FLAG_CPU_MAPPED \ + (BITFIELD_BIT(1U) | PVR_BO_ALLOC_FLAG_CPU_ACCESS) +/** + * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer should be + * mapped to the GPU as uncached. + */ +#define PVR_BO_ALLOC_FLAG_GPU_UNCACHED BITFIELD_BIT(2U) +/** + * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer GPU mapping + * should be restricted to only allow access to the Parameter Manager unit and + * firmware processor. + */ +#define PVR_BO_ALLOC_FLAG_PM_FW_PROTECT BITFIELD_BIT(3U) +/** + * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer should be + * zeroed at allocation time. + */ +#define PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC BITFIELD_BIT(4U) + +VkResult pvr_bo_alloc(struct pvr_device *device, + struct pvr_winsys_heap *heap, + uint64_t size, + uint64_t alignment, + uint64_t flags, + struct pvr_bo **const bo_out); +void *pvr_bo_cpu_map(struct pvr_device *device, struct pvr_bo *bo); +void pvr_bo_cpu_unmap(struct pvr_device *device, struct pvr_bo *bo); +void pvr_bo_free(struct pvr_device *device, struct pvr_bo *bo); + +#endif /* PVR_BO_H */ diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c new file mode 100644 index 00000000000..d1677451c22 --- /dev/null +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -0,0 +1,4602 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "c11_compat.h" +#include "hwdef/rogue_hw_defs.h" +#include "hwdef/rogue_hw_utils.h" +#include "pvr_bo.h" +#include "pvr_csb.h" +#include "pvr_device_info.h" +#include "pvr_end_of_tile.h" +#include "pvr_formats.h" +#include "pvr_hw_pass.h" +#include "pvr_job_common.h" +#include "pvr_job_render.h" +#include "pvr_limits.h" +#include "pvr_pds.h" +#include "pvr_private.h" +#include "pvr_winsys.h" +#include "util/compiler.h" +#include "util/list.h" +#include "util/macros.h" +#include "util/u_dynarray.h" +#include "util/u_pack_color.h" +#include "vk_alloc.h" +#include "vk_command_buffer.h" +#include "vk_command_pool.h" +#include "vk_format.h" +#include "vk_log.h" +#include "vk_object.h" +#include "vk_util.h" + +/* Structure used to pass data into pvr_compute_generate_control_stream() + * function. + */ +struct pvr_compute_kernel_info { + pvr_dev_addr_t indirect_buffer_addr; + bool global_offsets_present; + uint32_t usc_common_size; + uint32_t usc_unified_size; + uint32_t pds_temp_size; + uint32_t pds_data_size; + bool usc_target_any; + bool is_fence; + uint32_t pds_data_offset; + uint32_t pds_code_offset; + enum PVRX(CDMCTRL_SD_TYPE) sd_type; + bool usc_common_shared; + uint32_t local_size[3]; + uint32_t global_size[3]; + uint32_t max_instances; +}; + +static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, + struct pvr_sub_cmd *sub_cmd) +{ + switch (sub_cmd->type) { + case PVR_SUB_CMD_TYPE_GRAPHICS: + pvr_csb_finish(&sub_cmd->gfx.control_stream); + pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.depth_bias_bo); + pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.scissor_bo); + break; + + case PVR_SUB_CMD_TYPE_COMPUTE: + pvr_csb_finish(&sub_cmd->compute.control_stream); + break; + + case PVR_SUB_CMD_TYPE_TRANSFER: + list_for_each_entry_safe (struct pvr_transfer_cmd, + transfer_cmd, + &sub_cmd->transfer.transfer_cmds, + link) { + list_del(&transfer_cmd->link); + vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd); + } + break; + + default: + pvr_finishme("Unsupported sub-command type %d", sub_cmd->type); + break; + } + + list_del(&sub_cmd->link); + vk_free(&cmd_buffer->vk.pool->alloc, sub_cmd); +} + +static void pvr_cmd_buffer_free_sub_cmds(struct pvr_cmd_buffer *cmd_buffer) +{ + list_for_each_entry_safe (struct pvr_sub_cmd, + sub_cmd, + &cmd_buffer->sub_cmds, + link) { + pvr_cmd_buffer_free_sub_cmd(cmd_buffer, sub_cmd); + } +} + +static void pvr_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer) +{ + struct pvr_cmd_buffer *cmd_buffer = + container_of(vk_cmd_buffer, struct pvr_cmd_buffer, vk); + + vk_free(&cmd_buffer->vk.pool->alloc, + cmd_buffer->state.render_pass_info.attachments); + vk_free(&cmd_buffer->vk.pool->alloc, + cmd_buffer->state.render_pass_info.clear_values); + + pvr_cmd_buffer_free_sub_cmds(cmd_buffer); + + list_for_each_entry_safe (struct pvr_bo, bo, &cmd_buffer->bo_list, link) { + list_del(&bo->link); + pvr_bo_free(cmd_buffer->device, bo); + } + + util_dynarray_fini(&cmd_buffer->scissor_array); + util_dynarray_fini(&cmd_buffer->depth_bias_array); + + vk_command_buffer_finish(&cmd_buffer->vk); + vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer); +} + +static VkResult pvr_cmd_buffer_create(struct pvr_device *device, + struct vk_command_pool *pool, + VkCommandBufferLevel level, + VkCommandBuffer *pCommandBuffer) +{ + struct pvr_cmd_buffer *cmd_buffer; + VkResult result; + + cmd_buffer = vk_zalloc(&pool->alloc, + sizeof(*cmd_buffer), + 8U, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!cmd_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + result = vk_command_buffer_init(&cmd_buffer->vk, pool, level); + if (result != VK_SUCCESS) { + vk_free(&pool->alloc, cmd_buffer); + return result; + } + + cmd_buffer->vk.destroy = pvr_cmd_buffer_destroy; + cmd_buffer->device = device; + + util_dynarray_init(&cmd_buffer->depth_bias_array, NULL); + util_dynarray_init(&cmd_buffer->scissor_array, NULL); + + cmd_buffer->state.status = VK_SUCCESS; + cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INITIAL; + + list_inithead(&cmd_buffer->sub_cmds); + list_inithead(&cmd_buffer->bo_list); + + *pCommandBuffer = pvr_cmd_buffer_to_handle(cmd_buffer); + + return VK_SUCCESS; +} + +VkResult +pvr_AllocateCommandBuffers(VkDevice _device, + const VkCommandBufferAllocateInfo *pAllocateInfo, + VkCommandBuffer *pCommandBuffers) +{ + VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool); + PVR_FROM_HANDLE(pvr_device, device, _device); + VkResult result = VK_SUCCESS; + uint32_t i; + + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { + result = pvr_cmd_buffer_create(device, + pool, + pAllocateInfo->level, + &pCommandBuffers[i]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + while (i--) { + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, pCommandBuffers[i]); + pvr_cmd_buffer_destroy(cmd_buffer); + } + + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) + pCommandBuffers[i] = VK_NULL_HANDLE; + } + + return result; +} + +static void pvr_cmd_buffer_update_barriers(struct pvr_cmd_buffer *cmd_buffer, + enum pvr_sub_cmd_type type) +{ + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + uint32_t barriers; + + switch (type) { + case PVR_SUB_CMD_TYPE_GRAPHICS: + barriers = PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT; + break; + + case PVR_SUB_CMD_TYPE_COMPUTE: + barriers = PVR_PIPELINE_STAGE_COMPUTE_BIT; + break; + + case PVR_SUB_CMD_TYPE_TRANSFER: + barriers = PVR_PIPELINE_STAGE_TRANSFER_BIT; + break; + + default: + barriers = 0; + pvr_finishme("Unsupported sub-command type %d", type); + break; + } + + for (uint32_t i = 0; i < ARRAY_SIZE(state->barriers_needed); i++) + state->barriers_needed[i] |= barriers; +} + +static VkResult pvr_cmd_buffer_upload_tables(struct pvr_device *device, + struct pvr_cmd_buffer *cmd_buffer) +{ + struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd; + const uint32_t cache_line_size = + rogue_get_slc_cache_line_size(&device->pdevice->dev_info); + VkResult result; + + assert(!sub_cmd->gfx.depth_bias_bo && !sub_cmd->gfx.scissor_bo); + + if (cmd_buffer->depth_bias_array.size > 0) { + result = + pvr_gpu_upload(device, + device->heaps.general_heap, + util_dynarray_begin(&cmd_buffer->depth_bias_array), + cmd_buffer->depth_bias_array.size, + cache_line_size, + &sub_cmd->gfx.depth_bias_bo); + if (result != VK_SUCCESS) + return result; + } + + if (cmd_buffer->scissor_array.size > 0) { + result = pvr_gpu_upload(device, + device->heaps.general_heap, + util_dynarray_begin(&cmd_buffer->scissor_array), + cmd_buffer->scissor_array.size, + cache_line_size, + &sub_cmd->gfx.scissor_bo); + if (result != VK_SUCCESS) + goto err_free_depth_bias_bo; + } + + util_dynarray_clear(&cmd_buffer->depth_bias_array); + util_dynarray_clear(&cmd_buffer->scissor_array); + + return VK_SUCCESS; + +err_free_depth_bias_bo: + pvr_bo_free(device, sub_cmd->gfx.depth_bias_bo); + sub_cmd->gfx.depth_bias_bo = NULL; + + return result; +} + +static VkResult pvr_cmd_buffer_emit_ppp_state(struct pvr_cmd_buffer *cmd_buffer) +{ + struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd; + struct pvr_framebuffer *framebuffer = + cmd_buffer->state.render_pass_info.framebuffer; + + pvr_csb_emit (&sub_cmd->gfx.control_stream, VDMCTRL_PPP_STATE0, state0) { + state0.addrmsb = framebuffer->ppp_state_bo->vma->dev_addr; + state0.word_count = framebuffer->ppp_state_size; + } + + pvr_csb_emit (&sub_cmd->gfx.control_stream, VDMCTRL_PPP_STATE1, state1) { + state1.addrlsb = framebuffer->ppp_state_bo->vma->dev_addr; + } + + return VK_SUCCESS; +} + +static VkResult +pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer, + const void *const data, + const size_t size, + struct pvr_bo **const pvr_bo_out) +{ + struct pvr_device *const device = cmd_buffer->device; + const uint32_t cache_line_size = + rogue_get_slc_cache_line_size(&device->pdevice->dev_info); + struct pvr_bo *pvr_bo; + VkResult result; + + result = pvr_gpu_upload(device, + device->heaps.general_heap, + data, + size, + cache_line_size, + &pvr_bo); + if (result != VK_SUCCESS) + return result; + + list_add(&pvr_bo->link, &cmd_buffer->bo_list); + + *pvr_bo_out = pvr_bo; + + return VK_SUCCESS; +} + +static VkResult +pvr_cmd_buffer_upload_usc(struct pvr_cmd_buffer *const cmd_buffer, + const void *const code, + const size_t code_size, + uint64_t code_alignment, + struct pvr_bo **const pvr_bo_out) +{ + struct pvr_device *const device = cmd_buffer->device; + const uint32_t cache_line_size = + rogue_get_slc_cache_line_size(&device->pdevice->dev_info); + struct pvr_bo *pvr_bo; + VkResult result; + + code_alignment = MAX2(code_alignment, cache_line_size); + + result = + pvr_gpu_upload_usc(device, code, code_size, code_alignment, &pvr_bo); + if (result != VK_SUCCESS) + return result; + + list_add(&pvr_bo->link, &cmd_buffer->bo_list); + + *pvr_bo_out = pvr_bo; + + return VK_SUCCESS; +} + +static VkResult +pvr_cmd_buffer_upload_pds(struct pvr_cmd_buffer *const cmd_buffer, + const uint32_t *data, + uint32_t data_size_dwords, + uint32_t data_alignment, + const uint32_t *code, + uint32_t code_size_dwords, + uint32_t code_alignment, + uint64_t min_alignment, + struct pvr_pds_upload *const pds_upload_out) +{ + struct pvr_device *const device = cmd_buffer->device; + VkResult result; + + result = pvr_gpu_upload_pds(device, + data, + data_size_dwords, + data_alignment, + code, + code_size_dwords, + code_alignment, + min_alignment, + pds_upload_out); + if (result != VK_SUCCESS) + return result; + + list_add(&pds_upload_out->pvr_bo->link, &cmd_buffer->bo_list); + + return VK_SUCCESS; +} + +static inline VkResult +pvr_cmd_buffer_upload_pds_data(struct pvr_cmd_buffer *const cmd_buffer, + const uint32_t *data, + uint32_t data_size_dwords, + uint32_t data_alignment, + struct pvr_pds_upload *const pds_upload_out) +{ + return pvr_cmd_buffer_upload_pds(cmd_buffer, + data, + data_size_dwords, + data_alignment, + NULL, + 0, + 0, + data_alignment, + pds_upload_out); +} + +static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload( + struct pvr_cmd_buffer *const cmd_buffer, + const uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS], + struct pvr_pds_upload *const pds_upload_out) +{ + struct pvr_pds_event_program pixel_event_program = { + /* No data to DMA, just a DOUTU needed. */ + .num_emit_word_pairs = 0, + }; + const uint32_t staging_buffer_size = + cmd_buffer->device->pixel_event_data_size_in_dwords * sizeof(uint32_t); + const VkAllocationCallbacks *const allocator = &cmd_buffer->vk.pool->alloc; + struct pvr_device *const device = cmd_buffer->device; + /* FIXME: This should come from the compiler for the USC pixel program. */ + const uint32_t usc_temp_count = 0; + struct pvr_bo *usc_eot_program; + uint8_t *usc_eot_program_ptr; + uint32_t *staging_buffer; + VkResult result; + + result = pvr_cmd_buffer_upload_usc(cmd_buffer, + pvr_end_of_tile_program, + sizeof(pvr_end_of_tile_program), + 4, + &usc_eot_program); + if (result != VK_SUCCESS) + return result; + + assert((pbe_cs_words[1] & 0x3F) == 0x20); + + /* FIXME: Stop patching the framebuffer address (this will require the + * end-of-tile program to be generated at run-time). + */ + pvr_bo_cpu_map(device, usc_eot_program); + usc_eot_program_ptr = usc_eot_program->bo->map; + usc_eot_program_ptr[6] = (pbe_cs_words[0] >> 0) & 0xFF; + usc_eot_program_ptr[7] = (pbe_cs_words[0] >> 8) & 0xFF; + usc_eot_program_ptr[8] = (pbe_cs_words[0] >> 16) & 0xFF; + usc_eot_program_ptr[9] = (pbe_cs_words[0] >> 24) & 0xFF; + pvr_bo_cpu_unmap(device, usc_eot_program); + + pvr_pds_setup_doutu(&pixel_event_program.task_control, + usc_eot_program->vma->dev_addr.addr, + usc_temp_count, + PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE), + false); + + /* TODO: We could skip allocating this and generate directly into the device + * buffer thus removing one allocation and memcpy() per job. Would this + * speed up things in a noticeable way? + */ + staging_buffer = vk_alloc(allocator, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!staging_buffer) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_free_usc_pixel_program; + } + + /* Generate the data segment. The code segment was uploaded earlier when + * setting up the PDS static heap data. + */ + pvr_pds_generate_pixel_event_data_segment(&pixel_event_program, + staging_buffer, + &device->pdevice->dev_info); + + result = pvr_cmd_buffer_upload_pds_data( + cmd_buffer, + staging_buffer, + cmd_buffer->device->pixel_event_data_size_in_dwords, + 4, + pds_upload_out); + if (result != VK_SUCCESS) + goto err_free_pixel_event_staging_buffer; + + vk_free(allocator, staging_buffer); + + return VK_SUCCESS; + +err_free_pixel_event_staging_buffer: + vk_free(allocator, staging_buffer); + +err_free_usc_pixel_program: + list_del(&usc_eot_program->link); + pvr_bo_free(device, usc_eot_program); + + return result; +} + +static uint32_t pvr_get_hw_clear_color(VkFormat vk_format, + const VkClearValue *clear_value) +{ + union util_color uc = { .ui = 0 }; + + switch (vk_format) { + case VK_FORMAT_B8G8R8A8_UNORM: + util_pack_color(clear_value->color.float32, + PIPE_FORMAT_R8G8B8A8_UNORM, + &uc); + break; + + default: + assert(!"Unsupported format"); + uc.ui[0] = 0; + break; + } + + return uc.ui[0]; +} + +static VkResult +pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer, + uint32_t idx, + pvr_dev_addr_t *const addr_out) +{ + const struct pvr_render_pass_info *render_pass_info = + &cmd_buffer->state.render_pass_info; + const struct pvr_render_pass *pass = render_pass_info->pass; + const struct pvr_renderpass_hwsetup_render *hw_render = + &pass->hw_setup->renders[idx]; + ASSERTED const struct pvr_load_op *load_op = hw_render->client_data; + const struct pvr_renderpass_colorinit *color_init = + &hw_render->color_init[0]; + const struct pvr_render_pass_attachment *attachment = + &pass->attachments[color_init->driver_id]; + const VkClearValue *clear_value = + &render_pass_info->clear_values[color_init->driver_id]; + uint32_t hw_clear_value; + struct pvr_bo *clear_bo; + VkResult result; + + pvr_finishme("Add missing load op data support"); + + assert(load_op->is_hw_object); + assert(hw_render->color_init_count == 1); + + /* FIXME: add support for RENDERPASS_SURFACE_INITOP_LOAD. */ + assert(color_init->op == RENDERPASS_SURFACE_INITOP_CLEAR); + + /* FIXME: do this at the point we store the clear values? */ + hw_clear_value = pvr_get_hw_clear_color(attachment->vk_format, clear_value); + + result = pvr_cmd_buffer_upload_general(cmd_buffer, + &hw_clear_value, + sizeof(hw_clear_value), + &clear_bo); + if (result != VK_SUCCESS) + return result; + + *addr_out = clear_bo->vma->dev_addr; + + return VK_SUCCESS; +} + +static VkResult pvr_load_op_pds_data_create_and_upload( + struct pvr_cmd_buffer *cmd_buffer, + uint32_t idx, + pvr_dev_addr_t constants_addr, + struct pvr_pds_upload *const pds_upload_out) +{ + const struct pvr_render_pass_info *render_pass_info = + &cmd_buffer->state.render_pass_info; + const struct pvr_load_op *load_op = + render_pass_info->pass->hw_setup->renders[idx].client_data; + struct pvr_device *device = cmd_buffer->device; + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + struct pvr_pds_pixel_shader_sa_program program = { 0 }; + uint32_t staging_buffer_size; + uint32_t *staging_buffer; + VkResult result; + + program.num_texture_dma_kicks = 1; + + pvr_csb_pack (&program.texture_dma_address[0], + PDSINST_DOUT_FIELDS_DOUTD_SRC0, + value) { + value.sbase = constants_addr; + } + + pvr_csb_pack (&program.texture_dma_control[0], + PDSINST_DOUT_FIELDS_DOUTD_SRC1, + value) { + value.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE); + value.a0 = load_op->shareds_dest_offset; + value.bsize = load_op->shareds_count; + } + + pvr_pds_set_sizes_pixel_shader_sa_texture_data(&program, dev_info); + + staging_buffer_size = program.data_size * sizeof(*staging_buffer); + + staging_buffer = vk_alloc(&cmd_buffer->vk.pool->alloc, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!staging_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + pvr_pds_generate_pixel_shader_sa_texture_state_data(&program, + staging_buffer, + dev_info); + + result = pvr_cmd_buffer_upload_pds_data(cmd_buffer, + staging_buffer, + program.data_size, + 1, + pds_upload_out); + if (result != VK_SUCCESS) { + vk_free(&cmd_buffer->vk.pool->alloc, staging_buffer); + return result; + } + + vk_free(&cmd_buffer->vk.pool->alloc, staging_buffer); + + return VK_SUCCESS; +} + +/* FIXME: Should this function be specific to the HW background object, in + * which case its name should be changed, or should it have the load op + * structure passed in? + */ +static VkResult +pvr_load_op_data_create_and_upload(struct pvr_cmd_buffer *cmd_buffer, + uint32_t idx, + struct pvr_pds_upload *const pds_upload_out) +{ + pvr_dev_addr_t constants_addr; + VkResult result; + + result = + pvr_load_op_constants_create_and_upload(cmd_buffer, idx, &constants_addr); + if (result != VK_SUCCESS) + return result; + + return pvr_load_op_pds_data_create_and_upload(cmd_buffer, + idx, + constants_addr, + pds_upload_out); +} + +static void pvr_pds_bgnd_pack_state( + const struct pvr_load_op *load_op, + const struct pvr_pds_upload *load_op_program, + uint64_t pds_reg_values[static const ROGUE_NUM_CR_PDS_BGRND_WORDS]) +{ + pvr_csb_pack (&pds_reg_values[0], CR_PDS_BGRND0_BASE, value) { + value.shader_addr.addr = load_op->pds_frag_prog.data_offset; + value.texunicode_addr.addr = load_op->pds_tex_state_prog.code_offset; + } + + pvr_csb_pack (&pds_reg_values[1], CR_PDS_BGRND1_BASE, value) { + value.texturedata_addr.addr = load_op_program->data_offset; + } + + pvr_csb_pack (&pds_reg_values[2], CR_PDS_BGRND3_SIZEINFO, value) { + value.usc_sharedsize = + DIV_ROUND_UP(load_op->const_shareds_count, + PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE)); + value.pds_texturestatesize = DIV_ROUND_UP( + load_op_program->data_size, + PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE)); + value.pds_tempsize = + DIV_ROUND_UP(load_op->temps_count, + PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE)); + } +} + +/** + * \brief Calculates the stride in pixels based on the pitch in bytes and pixel + * format. + * + * \param[in] pitch Width pitch in bytes. + * \param[in] vk_format Vulkan image format. + * \return Stride in pixels. + */ +static inline uint32_t pvr_stride_from_pitch(uint32_t pitch, VkFormat vk_format) +{ + const unsigned int cpp = vk_format_get_blocksize(vk_format); + + assert(pitch % cpp == 0); + + return pitch / cpp; +} + +static void pvr_setup_pbe_state( + struct pvr_device *const device, + struct pvr_framebuffer *framebuffer, + uint32_t mrt_index, + const struct usc_mrt_resource *mrt_resource, + const struct pvr_image_view *const iview, + const VkRect2D *render_area, + const bool down_scale, + const uint32_t samples, + uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS], + uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS]) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + const struct pvr_image *image = iview->image; + uint32_t level_pitch = image->mip_levels[iview->vk.base_mip_level].pitch; + + struct pvr_pbe_surf_params surface_params; + struct pvr_pbe_render_params render_params; + bool with_packed_usc_channel; + const uint8_t *swizzle; + uint32_t position; + + /* down_scale should be true when performing a resolve, in which case there + * should be more than one sample. + */ + assert((down_scale && samples > 1U) || (!down_scale && samples == 1U)); + + /* Setup surface parameters. */ + + if (PVR_HAS_FEATURE(dev_info, usc_f16sop_u8)) { + switch (iview->vk.format) { + case VK_FORMAT_B8G8R8A8_UNORM: + with_packed_usc_channel = true; + break; + case VK_FORMAT_D32_SFLOAT: + with_packed_usc_channel = false; + break; + default: + unreachable("Unsupported Vulkan image format"); + } + } else { + with_packed_usc_channel = false; + } + + swizzle = pvr_get_format_swizzle(iview->vk.format); + memcpy(surface_params.swizzle, swizzle, sizeof(surface_params.swizzle)); + + pvr_pbe_get_src_format_and_gamma(iview->vk.format, + PVR_PBE_GAMMA_NONE, + with_packed_usc_channel, + &surface_params.source_format, + &surface_params.gamma); + + surface_params.is_normalized = vk_format_is_normalized(iview->vk.format); + surface_params.pbe_packmode = pvr_get_pbe_packmode(iview->vk.format); + surface_params.nr_components = vk_format_get_nr_components(iview->vk.format); + + /* FIXME: Should we have an inline function to return the address of a mip + * level? + */ + surface_params.addr.addr = + image->vma->dev_addr.addr + + image->mip_levels[iview->vk.base_mip_level].offset; + + surface_params.mem_layout = image->memlayout; + surface_params.stride = pvr_stride_from_pitch(level_pitch, iview->vk.format); + surface_params.depth = iview->vk.extent.depth; + surface_params.width = iview->vk.extent.width; + surface_params.height = iview->vk.extent.height; + surface_params.z_only_render = false; + surface_params.down_scale = down_scale; + surface_params.msaa_mode = samples; + + /* Setup render parameters. */ + + if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_MEMORY) { + position = mrt_resource->u.mem.offset_in_dwords; + } else { + assert(mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER); + assert(mrt_resource->u.reg.offset == 0); + + position = mrt_resource->u.reg.out_reg; + } + + assert(position <= 3 || PVR_HAS_FEATURE(dev_info, eight_output_registers)); + + switch (position) { + case 0: + case 4: + render_params.source_start = PVR_PBE_STARTPOS_BIT0; + break; + case 1: + case 5: + render_params.source_start = PVR_PBE_STARTPOS_BIT32; + break; + case 2: + case 6: + render_params.source_start = PVR_PBE_STARTPOS_BIT64; + break; + case 3: + case 7: + render_params.source_start = PVR_PBE_STARTPOS_BIT96; + break; + default: + assert(!"Invalid output register"); + break; + } + + render_params.min_x_clip = MAX2(0, render_area->offset.x); + render_params.min_y_clip = MAX2(0, render_area->offset.y); + render_params.max_x_clip = + MIN2(framebuffer->width, + render_area->offset.x + render_area->extent.width) - + 1; + render_params.max_y_clip = + MIN2(framebuffer->height, + render_area->offset.y + render_area->extent.height) - + 1; + + render_params.slice = 0; + render_params.mrt_index = mrt_index; + + pvr_pbe_pack_state(device, + &surface_params, + &render_params, + pbe_cs_words, + pbe_reg_words); +} + +static struct pvr_render_target * +pvr_get_render_target(const struct pvr_render_pass *pass, + const struct pvr_framebuffer *framebuffer, + uint32_t idx) +{ + const struct pvr_renderpass_hwsetup_render *hw_render = + &pass->hw_setup->renders[idx]; + uint32_t rt_idx = 0; + + switch (hw_render->sample_count) { + case 1: + case 2: + case 4: + case 8: + rt_idx = util_logbase2(hw_render->sample_count); + break; + + default: + unreachable("Unsupported sample count"); + break; + } + + return &framebuffer->render_targets[rt_idx]; +} + +static uint32_t +pvr_pass_get_pixel_output_width(const struct pvr_render_pass *pass, + uint32_t idx, + const struct pvr_device_info *dev_info) +{ + const struct pvr_renderpass_hwsetup_render *hw_render = + &pass->hw_setup->renders[idx]; + /* Default value based on the maximum value found in all existing cores. The + * maximum is used as this is being treated as a lower bound, making it a + * "safer" choice than the minimum value found in all existing cores. + */ + const uint32_t min_output_regs = + PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 2U); + const uint32_t width = MAX2(hw_render->output_regs_count, min_output_regs); + + return util_next_power_of_two(width); +} + +static VkResult pvr_sub_cmd_gfx_job_init(struct pvr_device *device, + struct pvr_cmd_buffer *cmd_buffer, + struct pvr_sub_cmd *sub_cmd) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + struct pvr_render_pass_info *render_pass_info = + &cmd_buffer->state.render_pass_info; + const struct pvr_renderpass_hwsetup_render *hw_render = + &render_pass_info->pass->hw_setup->renders[sub_cmd->gfx.hw_render_idx]; + struct pvr_render_job *job = &sub_cmd->gfx.job; + struct pvr_pds_upload pds_pixel_event_program; + + uint32_t pbe_cs_words[PVR_MAX_COLOR_ATTACHMENTS] + [ROGUE_NUM_PBESTATE_STATE_WORDS]; + struct pvr_render_target *render_target; + VkResult result; + + assert(hw_render->eot_surface_count < ARRAY_SIZE(pbe_cs_words)); + + for (uint32_t i = 0; i < hw_render->eot_surface_count; i++) { + const struct pvr_renderpass_hwsetup_eot_surface *surface = + &hw_render->eot_surfaces[i]; + const struct pvr_image_view *iview = + render_pass_info->attachments[surface->attachment_index]; + const struct usc_mrt_resource *mrt_resource = + &hw_render->eot_setup.mrt_resources[surface->mrt_index]; + uint32_t samples = 1; + + if (surface->need_resolve) + pvr_finishme("Set up job resolve information."); + + pvr_setup_pbe_state(device, + render_pass_info->framebuffer, + surface->mrt_index, + mrt_resource, + iview, + &render_pass_info->render_area, + surface->need_resolve, + samples, + pbe_cs_words[i], + job->pbe_reg_words[i]); + } + + /* FIXME: The fragment program only supports a single surface at present. */ + assert(hw_render->eot_surface_count == 1); + result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload( + cmd_buffer, + pbe_cs_words[0], + &pds_pixel_event_program); + if (result != VK_SUCCESS) + return result; + + job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset; + + /* FIXME: Don't do this if there is a barrier load. */ + if (render_pass_info->enable_bg_tag) { + const struct pvr_load_op *load_op = hw_render->client_data; + struct pvr_pds_upload load_op_program; + + /* FIXME: Should we free the PDS pixel event data or let it be freed + * when the pool gets emptied? + */ + result = pvr_load_op_data_create_and_upload(cmd_buffer, + sub_cmd->gfx.hw_render_idx, + &load_op_program); + if (result != VK_SUCCESS) + return result; + + pvr_pds_bgnd_pack_state(load_op, + &load_op_program, + job->pds_bgnd_reg_values); + } + + job->enable_bg_tag = render_pass_info->enable_bg_tag; + job->process_empty_tiles = render_pass_info->process_empty_tiles; + + render_target = pvr_get_render_target(render_pass_info->pass, + render_pass_info->framebuffer, + sub_cmd->gfx.hw_render_idx); + job->rt_dataset = render_target->rt_dataset; + + job->ctrl_stream_addr = + pvr_csb_get_start_address(&sub_cmd->gfx.control_stream); + + /* FIXME: Need to set up the border color table at device creation + * time. Set to invalid for the time being. + */ + job->border_colour_table_addr = PVR_DEV_ADDR_INVALID; + + if (sub_cmd->gfx.depth_bias_bo) + job->depth_bias_table_addr = sub_cmd->gfx.depth_bias_bo->vma->dev_addr; + else + job->depth_bias_table_addr = PVR_DEV_ADDR_INVALID; + + if (sub_cmd->gfx.scissor_bo) + job->scissor_table_addr = sub_cmd->gfx.scissor_bo->vma->dev_addr; + else + job->scissor_table_addr = PVR_DEV_ADDR_INVALID; + + job->pixel_output_width = + pvr_pass_get_pixel_output_width(render_pass_info->pass, + sub_cmd->gfx.hw_render_idx, + dev_info); + + if (hw_render->ds_surface_id != -1) { + struct pvr_image_view *iview = + render_pass_info->attachments[hw_render->ds_surface_id]; + const struct pvr_image *image = iview->image; + + if (vk_format_has_depth(image->vk.format)) { + uint32_t level_pitch = + image->mip_levels[iview->vk.base_mip_level].pitch; + + /* FIXME: Is this sufficient for depth buffers? */ + job->depth_addr = image->dev_addr; + + job->depth_stride = + pvr_stride_from_pitch(level_pitch, iview->vk.format); + job->depth_height = iview->vk.extent.height; + job->depth_physical_width = + u_minify(image->physical_extent.width, iview->vk.base_mip_level); + job->depth_physical_height = + u_minify(image->physical_extent.height, iview->vk.base_mip_level); + job->depth_layer_size = image->layer_size; + + if (hw_render->ds_surface_id < render_pass_info->clear_value_count) { + VkClearValue *clear_values = + &render_pass_info->clear_values[hw_render->ds_surface_id]; + + job->depth_clear_value = clear_values->depthStencil.depth; + } else { + job->depth_clear_value = 1.0f; + } + + job->depth_vk_format = iview->vk.format; + + job->depth_memlayout = image->memlayout; + } else { + job->depth_addr = PVR_DEV_ADDR_INVALID; + job->depth_stride = 0; + job->depth_height = 0; + job->depth_physical_width = 0; + job->depth_physical_height = 0; + job->depth_layer_size = 0; + job->depth_clear_value = 1.0f; + job->depth_vk_format = VK_FORMAT_UNDEFINED; + job->depth_memlayout = PVR_MEMLAYOUT_LINEAR; + } + + if (vk_format_has_stencil(image->vk.format)) { + /* FIXME: Is this sufficient for stencil buffers? */ + job->stencil_addr = image->dev_addr; + } else { + job->stencil_addr = PVR_DEV_ADDR_INVALID; + } + + job->samples = image->vk.samples; + } else { + pvr_finishme("Set up correct number of samples for render job"); + + job->depth_addr = PVR_DEV_ADDR_INVALID; + job->depth_stride = 0; + job->depth_height = 0; + job->depth_physical_width = 0; + job->depth_physical_height = 0; + job->depth_layer_size = 0; + job->depth_clear_value = 1.0f; + job->depth_vk_format = VK_FORMAT_UNDEFINED; + job->depth_memlayout = PVR_MEMLAYOUT_LINEAR; + + job->stencil_addr = PVR_DEV_ADDR_INVALID; + + job->samples = 1; + } + + if (sub_cmd->gfx.max_tiles_in_flight == + PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U)) { + /* Use the default limit based on the partition store. */ + job->max_tiles_in_flight = 0U; + } else { + job->max_tiles_in_flight = sub_cmd->gfx.max_tiles_in_flight; + } + + job->frag_uses_atomic_ops = sub_cmd->gfx.frag_uses_atomic_ops; + job->disable_compute_overlap = false; + job->max_shared_registers = cmd_buffer->state.max_shared_regs; + job->run_frag = true; + job->geometry_terminate = true; + + return VK_SUCCESS; +} + +/* Number of shareds used in the Issue Data Fence(IDF)/Wait Data Fence(WDF) + * kernel. + */ +#define PVR_IDF_WDF_IN_REGISTER_CONST_COUNT 12U + +static void pvr_sub_cmd_compute_job_init(struct pvr_device *device, + struct pvr_cmd_buffer *cmd_buffer, + struct pvr_sub_cmd *sub_cmd) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + + if (sub_cmd->compute.uses_barrier) { + sub_cmd->compute.submit_info.flags |= + PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP; + } + + pvr_csb_pack (&sub_cmd->compute.submit_info.regs.cdm_ctrl_stream_base, + CR_CDM_CTRL_STREAM_BASE, + value) { + value.addr = pvr_csb_get_start_address(&sub_cmd->compute.control_stream); + } + + /* FIXME: Need to set up the border color table at device creation + * time. Set to invalid for the time being. + */ + pvr_csb_pack (&sub_cmd->compute.submit_info.regs.tpu_border_colour_table, + CR_TPU_BORDER_COLOUR_TABLE_CDM, + value) { + value.border_colour_table_address = PVR_DEV_ADDR_INVALID; + } + + sub_cmd->compute.num_shared_regs = MAX2(PVR_IDF_WDF_IN_REGISTER_CONST_COUNT, + cmd_buffer->state.max_shared_regs); + + cmd_buffer->state.max_shared_regs = 0U; + + if (PVR_HAS_FEATURE(dev_info, compute_morton_capable)) + sub_cmd->compute.submit_info.regs.cdm_item = 0; + + pvr_csb_pack (&sub_cmd->compute.submit_info.regs.tpu, CR_TPU, value) { + value.tag_cem_4k_face_packing = true; + } + + if (PVR_HAS_FEATURE(dev_info, cluster_grouping) && + PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) && + rogue_get_num_phantoms(dev_info) > 1 && + sub_cmd->compute.uses_atomic_ops) { + /* Each phantom has its own MCU, so atomicity can only be guaranteed + * when all work items are processed on the same phantom. This means we + * need to disable all USCs other than those of the first phantom, which + * has 4 clusters. + */ + pvr_csb_pack (&sub_cmd->compute.submit_info.regs.compute_cluster, + CR_COMPUTE_CLUSTER, + value) { + value.mask = 0xFU; + } + } else { + pvr_csb_pack (&sub_cmd->compute.submit_info.regs.compute_cluster, + CR_COMPUTE_CLUSTER, + value) { + value.mask = 0U; + } + } + + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) && + sub_cmd->compute.uses_atomic_ops) { + sub_cmd->compute.submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE; + } +} + +#define PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS \ + (1024 / PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)) + +static uint32_t pvr_compute_slot_size(const struct pvr_device_info *dev_info, + uint32_t coeff_regs_count, + bool use_barrier, + const uint32_t local_size[static 3U]) +{ + uint32_t max_workgroups_per_task = ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK; + uint32_t max_avail_coeff_regs = + rogue_get_cdm_max_local_mem_size_regs(dev_info); + uint32_t localstore_chunks_count = + DIV_ROUND_UP(coeff_regs_count << 2, + PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)); + uint32_t total_workitems = local_size[0U] * local_size[1U] * local_size[2U]; + + /* Ensure that we cannot have more workgroups in a slot than the available + * number of coefficients allow us to have. + */ + if (coeff_regs_count > 0U) { + /* If TA or 3D can overlap with CDM, or if the TA is running a geometry + * shader then we need to consider this in calculating max allowed + * work-groups. + */ + if (PVR_HAS_QUIRK(dev_info, 52354) && + (PVR_HAS_FEATURE(dev_info, compute_overlap) || + PVR_HAS_FEATURE(dev_info, gs_rta_support))) { + /* Solve for n (number of work-groups per task). All values are in + * size of common store alloc blocks: + * + * n + (2n + 7) * (local_memory_size_max - 1) = + * (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max) + * ==> + * n + 2n * (local_memory_size_max - 1) = + * (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max) + * - (7 * (local_memory_size_max - 1)) + * ==> + * n * (1 + 2 * (local_memory_size_max - 1)) = + * (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max) + * - (7 * (local_memory_size_max - 1)) + * ==> + * n = ((coefficient_memory_pool_size) - + * (7 * pixel_allocation_size_max) - + * (7 * (local_memory_size_max - 1)) / (1 + + * 2 * (local_memory_size_max - 1))) + */ + uint32_t max_common_store_blocks = + DIV_ROUND_UP(max_avail_coeff_regs * 4U, + PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)); + + /* (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max) + */ + max_common_store_blocks -= ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES * + PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS; + + /* - (7 * (local_memory_size_max - 1)) */ + max_common_store_blocks -= (ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES * + (localstore_chunks_count - 1U)); + + /* Divide by (1 + 2 * (local_memory_size_max - 1)) */ + max_workgroups_per_task = max_common_store_blocks / + (1U + 2U * (localstore_chunks_count - 1U)); + + max_workgroups_per_task = + MIN2(max_workgroups_per_task, + ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK); + + } else { + max_workgroups_per_task = + MIN2((max_avail_coeff_regs / coeff_regs_count), + max_workgroups_per_task); + } + } + + /* max_workgroups_per_task should at least be one. */ + assert(max_workgroups_per_task >= 1U); + + if (total_workitems >= ROGUE_MAX_INSTANCES_PER_TASK) { + /* In this case, the work group size will have been padded up to the + * next ROGUE_MAX_INSTANCES_PER_TASK so we just set max instances to be + * ROGUE_MAX_INSTANCES_PER_TASK. + */ + return ROGUE_MAX_INSTANCES_PER_TASK; + } + + /* In this case, the number of instances in the slot must be clamped to + * accommodate whole work-groups only. + */ + if (PVR_HAS_QUIRK(dev_info, 49032) || use_barrier) { + max_workgroups_per_task = + MIN2(max_workgroups_per_task, + ROGUE_MAX_INSTANCES_PER_TASK / total_workitems); + return total_workitems * max_workgroups_per_task; + } + + return MIN2(total_workitems * max_workgroups_per_task, + ROGUE_MAX_INSTANCES_PER_TASK); +} + +static void +pvr_compute_generate_control_stream(struct pvr_csb *csb, + const struct pvr_compute_kernel_info *info) +{ + /* Compute kernel 0. */ + pvr_csb_emit (csb, CDMCTRL_KERNEL0, kernel0) { + kernel0.indirect_present = !!info->indirect_buffer_addr.addr; + kernel0.global_offsets_present = info->global_offsets_present; + kernel0.usc_common_size = info->usc_common_size; + kernel0.usc_unified_size = info->usc_unified_size; + kernel0.pds_temp_size = info->pds_temp_size; + kernel0.pds_data_size = info->pds_data_size; + + if (info->usc_target_any) + kernel0.usc_target = PVRX(CDMCTRL_USC_TARGET_ANY); + else + kernel0.usc_target = PVRX(CDMCTRL_USC_TARGET_ALL); + + kernel0.fence = info->is_fence; + } + + /* Compute kernel 1. */ + pvr_csb_emit (csb, CDMCTRL_KERNEL1, kernel1) { + kernel1.data_addr.addr = info->pds_data_offset; + kernel1.sd_type = info->sd_type; + + if (!info->is_fence) + kernel1.usc_common_shared = info->usc_common_shared; + } + + /* Compute kernel 2. */ + pvr_csb_emit (csb, CDMCTRL_KERNEL2, kernel2) { + kernel2.code_addr.addr = info->pds_code_offset; + } + + if (info->indirect_buffer_addr.addr) { + /* Compute kernel 6. */ + pvr_csb_emit (csb, CDMCTRL_KERNEL6, kernel6) { + kernel6.indirect_addrmsb = info->indirect_buffer_addr; + } + + /* Compute kernel 7. */ + pvr_csb_emit (csb, CDMCTRL_KERNEL7, kernel7) { + kernel7.indirect_addrlsb = info->indirect_buffer_addr; + } + } else { + /* Compute kernel 3. */ + pvr_csb_emit (csb, CDMCTRL_KERNEL3, kernel3) { + assert(info->global_size[0U] > 0U); + kernel3.workgroup_x = info->global_size[0U] - 1U; + } + + /* Compute kernel 4. */ + pvr_csb_emit (csb, CDMCTRL_KERNEL4, kernel4) { + assert(info->global_size[1U] > 0U); + kernel4.workgroup_y = info->global_size[1U] - 1U; + } + + /* Compute kernel 5. */ + pvr_csb_emit (csb, CDMCTRL_KERNEL5, kernel5) { + assert(info->global_size[2U] > 0U); + kernel5.workgroup_z = info->global_size[2U] - 1U; + } + } + + /* Compute kernel 8. */ + pvr_csb_emit (csb, CDMCTRL_KERNEL8, kernel8) { + if (info->max_instances == ROGUE_MAX_INSTANCES_PER_TASK) + kernel8.max_instances = 0U; + else + kernel8.max_instances = info->max_instances; + + assert(info->local_size[0U] > 0U); + kernel8.workgroup_size_x = info->local_size[0U] - 1U; + assert(info->local_size[1U] > 0U); + kernel8.workgroup_size_y = info->local_size[1U] - 1U; + assert(info->local_size[2U] > 0U); + kernel8.workgroup_size_z = info->local_size[2U] - 1U; + } +} + +static void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer, + bool deallocate_shareds) +{ + const struct pvr_pds_upload *program = + &cmd_buffer->device->pds_compute_fence_program; + const struct pvr_device_info *dev_info = + &cmd_buffer->device->pdevice->dev_info; + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_csb *csb = &state->current_sub_cmd->compute.control_stream; + + struct pvr_compute_kernel_info info = { + .indirect_buffer_addr.addr = 0ULL, + .global_offsets_present = false, + .usc_common_size = 0U, + .usc_unified_size = 0U, + .pds_temp_size = 0U, + .pds_data_size = + DIV_ROUND_UP(program->data_size << 2, + PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE)), + .usc_target_any = true, + .is_fence = true, + .pds_data_offset = program->data_offset, + .sd_type = PVRX(CDMCTRL_SD_TYPE_PDS), + .usc_common_shared = deallocate_shareds, + .pds_code_offset = program->code_offset, + .global_size = { 1U, 1U, 1U }, + .local_size = { 1U, 1U, 1U }, + }; + + /* We don't need to pad work-group size for this case. */ + /* Here we calculate the slot size. This can depend on the use of barriers, + * local memory, BRN's or other factors. + */ + info.max_instances = + pvr_compute_slot_size(dev_info, 0U, false, info.local_size); + + pvr_compute_generate_control_stream(csb, &info); +} + +static VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) +{ + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_sub_cmd *sub_cmd = state->current_sub_cmd; + struct pvr_device *device = cmd_buffer->device; + VkResult result; + + /* FIXME: Is this NULL check required because this function is called from + * pvr_resolve_unemitted_resolve_attachments()? See comment about this + * function being called twice in a row in pvr_CmdEndRenderPass(). + */ + if (!sub_cmd) + return VK_SUCCESS; + + switch (sub_cmd->type) { + case PVR_SUB_CMD_TYPE_GRAPHICS: + if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + result = pvr_csb_emit_return(&sub_cmd->gfx.control_stream); + if (result != VK_SUCCESS) { + state->status = result; + return result; + } + + break; + } + + /* TODO: Check if the sub_cmd can be skipped based on + * sub_cmd->gfx.empty_cmd flag. + */ + + result = pvr_cmd_buffer_upload_tables(device, cmd_buffer); + if (result != VK_SUCCESS) { + state->status = result; + return result; + } + + result = pvr_cmd_buffer_emit_ppp_state(cmd_buffer); + if (result != VK_SUCCESS) { + state->status = result; + return result; + } + + result = pvr_csb_emit_terminate(&sub_cmd->gfx.control_stream); + if (result != VK_SUCCESS) { + state->status = result; + return result; + } + + result = pvr_sub_cmd_gfx_job_init(device, cmd_buffer, sub_cmd); + if (result != VK_SUCCESS) { + state->status = result; + return result; + } + + break; + + case PVR_SUB_CMD_TYPE_COMPUTE: + pvr_compute_generate_fence(cmd_buffer, true); + + result = pvr_csb_emit_terminate(&sub_cmd->compute.control_stream); + if (result != VK_SUCCESS) { + state->status = result; + return result; + } + + pvr_sub_cmd_compute_job_init(device, cmd_buffer, sub_cmd); + break; + + case PVR_SUB_CMD_TYPE_TRANSFER: + break; + + default: + pvr_finishme("Unsupported sub-command type %d", sub_cmd->type); + break; + } + + state->current_sub_cmd = NULL; + + return VK_SUCCESS; +} + +static void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer_state *state, + bool start_geom) +{ + if (start_geom) { + /* + * Initial geometry phase State. + * It's the driver's responsibility to ensure that the state of the + * hardware is correctly initialized at the start of every geometry + * phase. This is required to prevent stale state from a previous + * geometry phase erroneously affecting the next geometry phase. The + * following fields in PPP State Header, and their corresponding state + * words, must be supplied in the first PPP State Update of a geometry + * phase that contains any geometry (draw calls). Any field not listed + * below is safe to ignore. + * + * TA_PRES_STREAM_OUT_SIZE + * TA_PRES_PPPCTRL + * TA_PRES_VARYING_WORD2 + * TA_PRES_VARYING_WORD1 + * TA_PRES_VARYING_WORD0 + * TA_PRES_OUTSELECTS + * TA_PRES_WCLAMP + * TA_VIEWPORT_COUNT + * TA_PRES_VIEWPORT + * TA_PRES_REGION_CLIP + * TA_PRES_PDSSTATEPTR0 + * TA_PRES_ISPCTLFB + * TA_PRES_ISPCTLFA + * TA_PRES_ISPCTL + * + * If a geometry phase does not contain any geometry, this restriction + * can be ignored. If the first draw call in a geometry phase will only + * update the depth or stencil buffers i.e. ISP_TAGWRITEDISABLE is set + * in the ISP State Control Word, the PDS State Pointers + * (TA_PRES_PDSSTATEPTR*) in the first PPP State Update do not need to + * be supplied, since they will never reach the PDS in the fragment + * phase. + */ + + state->emit_state_bits = 0; + + state->emit_state.stream_out = true; + state->emit_state.ppp_control = true; + state->emit_state.varying_word2 = true; + state->emit_state.varying_word1 = true; + state->emit_state.varying_word0 = true; + state->emit_state.output_selects = true; + state->emit_state.wclamp = true; + state->emit_state.viewport = true; + state->emit_state.region_clip = true; + state->emit_state.pds_fragment_stateptr0 = true; + state->emit_state.isp_fb = true; + state->emit_state.isp = true; + } else { + state->emit_state.ppp_control = true; + state->emit_state.varying_word1 = true; + state->emit_state.varying_word0 = true; + state->emit_state.output_selects = true; + state->emit_state.viewport = true; + state->emit_state.region_clip = true; + state->emit_state.pds_fragment_stateptr0 = true; + state->emit_state.isp_fb = true; + state->emit_state.isp = true; + } + + memset(&state->ppp_state, 0U, sizeof(state->ppp_state)); + + state->dirty.vertex_bindings = true; + state->dirty.gfx_pipeline_binding = true; + state->dirty.viewport = true; +} + +static VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, + enum pvr_sub_cmd_type type) +{ + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_device *device = cmd_buffer->device; + struct pvr_sub_cmd *sub_cmd; + VkResult result; + + /* Check the current status of the buffer. */ + if (state->status != VK_SUCCESS) + return state->status; + + pvr_cmd_buffer_update_barriers(cmd_buffer, type); + + if (state->current_sub_cmd) { + if (state->current_sub_cmd->type == type) { + /* Continue adding to the current sub command. */ + return VK_SUCCESS; + } + + /* End the current sub command. */ + result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer); + if (result != VK_SUCCESS) + return result; + } + + sub_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc, + sizeof(*sub_cmd), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!sub_cmd) { + state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); + return state->status; + } + + sub_cmd->type = type; + + switch (type) { + case PVR_SUB_CMD_TYPE_GRAPHICS: + + sub_cmd->gfx.depth_usage = PVR_DEPTH_STENCIL_USAGE_UNDEFINED; + sub_cmd->gfx.stencil_usage = PVR_DEPTH_STENCIL_USAGE_UNDEFINED; + sub_cmd->gfx.modifies_depth = false; + sub_cmd->gfx.modifies_stencil = false; + sub_cmd->gfx.max_tiles_in_flight = + PVR_GET_FEATURE_VALUE(&device->pdevice->dev_info, + isp_max_tiles_in_flight, + 1); + sub_cmd->gfx.hw_render_idx = state->render_pass_info.current_hw_subpass; + sub_cmd->gfx.framebuffer = state->render_pass_info.framebuffer; + sub_cmd->gfx.empty_cmd = true; + + pvr_reset_graphics_dirty_state(state, true); + pvr_csb_init(device, + PVR_CMD_STREAM_TYPE_GRAPHICS, + &sub_cmd->gfx.control_stream); + break; + + case PVR_SUB_CMD_TYPE_COMPUTE: + pvr_csb_init(device, + PVR_CMD_STREAM_TYPE_COMPUTE, + &sub_cmd->compute.control_stream); + break; + + case PVR_SUB_CMD_TYPE_TRANSFER: + list_inithead(&sub_cmd->transfer.transfer_cmds); + break; + + default: + pvr_finishme("Unsupported sub-command type %d", type); + break; + } + + list_addtail(&sub_cmd->link, &cmd_buffer->sub_cmds); + state->current_sub_cmd = sub_cmd; + + return VK_SUCCESS; +} + +VkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer, + struct pvr_winsys_heap *heap, + uint64_t size, + uint32_t flags, + struct pvr_bo **const pvr_bo_out) +{ + const uint32_t cache_line_size = + rogue_get_slc_cache_line_size(&cmd_buffer->device->pdevice->dev_info); + struct pvr_bo *pvr_bo; + VkResult result; + + result = pvr_bo_alloc(cmd_buffer->device, + heap, + size, + cache_line_size, + flags, + &pvr_bo); + if (result != VK_SUCCESS) { + cmd_buffer->state.status = result; + return result; + } + + list_add(&pvr_bo->link, &cmd_buffer->bo_list); + + *pvr_bo_out = pvr_bo; + + return VK_SUCCESS; +} + +VkResult pvr_ResetCommandBuffer(VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + assert(!"Unimplemented"); + return VK_SUCCESS; +} + +static void pvr_cmd_bind_compute_pipeline( + const struct pvr_compute_pipeline *const compute_pipeline, + struct pvr_cmd_buffer *const cmd_buffer) +{ + cmd_buffer->state.compute_pipeline = compute_pipeline; + cmd_buffer->state.dirty.compute_pipeline_binding = true; +} + +static void pvr_cmd_bind_graphics_pipeline( + const struct pvr_graphics_pipeline *const gfx_pipeline, + struct pvr_cmd_buffer *const cmd_buffer) +{ + struct pvr_dynamic_state *const dest_state = + &cmd_buffer->state.dynamic.common; + const struct pvr_dynamic_state *const src_state = + &gfx_pipeline->dynamic_state; + struct pvr_cmd_buffer_state *const cmd_buffer_state = &cmd_buffer->state; + const uint32_t state_mask = src_state->mask; + + cmd_buffer_state->gfx_pipeline = gfx_pipeline; + cmd_buffer_state->dirty.gfx_pipeline_binding = true; + + /* FIXME: Handle PVR_DYNAMIC_STATE_BIT_VIEWPORT. */ + if (!(state_mask & PVR_DYNAMIC_STATE_BIT_VIEWPORT)) { + assert(!"Unimplemented"); + } + + /* FIXME: Handle PVR_DYNAMIC_STATE_BIT_SCISSOR. */ + if (!(state_mask & PVR_DYNAMIC_STATE_BIT_SCISSOR)) { + assert(!"Unimplemented"); + } + + if (!(state_mask & PVR_DYNAMIC_STATE_BIT_LINE_WIDTH)) { + dest_state->line_width = src_state->line_width; + + cmd_buffer_state->dirty.line_width = true; + } + + if (!(state_mask & PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS)) { + memcpy(&dest_state->depth_bias, + &src_state->depth_bias, + sizeof(src_state->depth_bias)); + + cmd_buffer_state->dirty.depth_bias = true; + } + + if (!(state_mask & PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS)) { + STATIC_ASSERT( + __same_type(dest_state->blend_constants, src_state->blend_constants)); + + typed_memcpy(dest_state->blend_constants, + src_state->blend_constants, + ARRAY_SIZE(dest_state->blend_constants)); + + cmd_buffer_state->dirty.blend_constants = true; + } + + if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK)) { + dest_state->compare_mask.front = src_state->compare_mask.front; + dest_state->compare_mask.back = src_state->compare_mask.back; + + cmd_buffer_state->dirty.compare_mask = true; + } + + if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK)) { + dest_state->write_mask.front = src_state->write_mask.front; + dest_state->write_mask.back = src_state->write_mask.back; + + cmd_buffer_state->dirty.write_mask = true; + } + + if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE)) { + dest_state->reference.front = src_state->reference.front; + dest_state->reference.back = src_state->reference.back; + + cmd_buffer_state->dirty.reference = true; + } +} + +void pvr_CmdBindPipeline(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + PVR_FROM_HANDLE(pvr_pipeline, pipeline, _pipeline); + + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + pvr_cmd_bind_compute_pipeline(to_pvr_compute_pipeline(pipeline), + cmd_buffer); + break; + + case VK_PIPELINE_BIND_POINT_GRAPHICS: + pvr_cmd_bind_graphics_pipeline(to_pvr_graphics_pipeline(pipeline), + cmd_buffer); + break; + + default: + unreachable("Invalid bind point."); + break; + } +} + +#if defined(DEBUG) +static void check_viewport_quirk_70165(const struct pvr_device *device, + const VkViewport *pViewport) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + float min_vertex_x, max_vertex_x, min_vertex_y, max_vertex_y; + float min_screen_space_value, max_screen_space_value; + float sign_to_unsigned_offset, fixed_point_max; + float guardband_width, guardband_height; + + if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) { + /* Max representable value in 13.4 fixed point format. + * Round-down to avoid precision issues. + * Calculated as (2 ** 13) - 2*(2 ** -4) + */ + fixed_point_max = 8192.0f - 2.0f / 16.0f; + + if (PVR_HAS_FEATURE(dev_info, screen_size8K)) { + if (pViewport->width <= 4096 && pViewport->height <= 4096) { + guardband_width = pViewport->width / 4.0f; + guardband_height = pViewport->height / 4.0f; + + /* 2k of the range is negative */ + sign_to_unsigned_offset = 2048.0f; + } else { + guardband_width = 0.0f; + guardband_height = 0.0f; + + /* For > 4k renders, the entire range is positive */ + sign_to_unsigned_offset = 0.0f; + } + } else { + guardband_width = pViewport->width / 4.0f; + guardband_height = pViewport->height / 4.0f; + + /* 2k of the range is negative */ + sign_to_unsigned_offset = 2048.0f; + } + } else { + /* Max representable value in 16.8 fixed point format + * Calculated as (2 ** 16) - (2 ** -8) + */ + fixed_point_max = 65535.99609375f; + guardband_width = pViewport->width / 4.0f; + guardband_height = pViewport->height / 4.0f; + + /* 4k/20k of the range is negative */ + sign_to_unsigned_offset = (float)PVR_MAX_NEG_OFFSCREEN_OFFSET; + } + + min_screen_space_value = -sign_to_unsigned_offset; + max_screen_space_value = fixed_point_max - sign_to_unsigned_offset; + + min_vertex_x = pViewport->x - guardband_width; + max_vertex_x = pViewport->x + pViewport->width + guardband_width; + min_vertex_y = pViewport->y - guardband_height; + max_vertex_y = pViewport->y + pViewport->height + guardband_height; + if (min_vertex_x < min_screen_space_value || + max_vertex_x > max_screen_space_value || + min_vertex_y < min_screen_space_value || + max_vertex_y > max_screen_space_value) { + mesa_logw("Viewport is affected by BRN70165, geometry outside " + "the viewport could be corrupted"); + } +} +#endif + +void pvr_CmdSetViewport(VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewport *pViewports) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + const uint32_t total_count = firstViewport + viewportCount; + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + + assert(firstViewport < PVR_MAX_VIEWPORTS && viewportCount > 0); + assert(total_count >= 1 && total_count <= PVR_MAX_VIEWPORTS); + + PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); + +#if defined(DEBUG) + if (PVR_HAS_QUIRK(&cmd_buffer->device->pdevice->dev_info, 70165)) { + for (uint32_t viewport = 0; viewport < viewportCount; viewport++) { + check_viewport_quirk_70165(cmd_buffer->device, &pViewports[viewport]); + } + } +#endif + + if (state->dynamic.common.viewport.count < total_count) + state->dynamic.common.viewport.count = total_count; + + memcpy(&state->dynamic.common.viewport.viewports[firstViewport], + pViewports, + viewportCount * sizeof(*pViewports)); + + state->dirty.viewport = true; +} + +void pvr_CmdSetScissor(VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D *pScissors) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + const uint32_t total_count = firstScissor + scissorCount; + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + + assert(firstScissor < PVR_MAX_VIEWPORTS && scissorCount > 0); + assert(total_count >= 1 && total_count <= PVR_MAX_VIEWPORTS); + + PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); + + if (state->dynamic.common.scissor.count < total_count) + state->dynamic.common.scissor.count = total_count; + + memcpy(&state->dynamic.common.scissor.scissors[firstScissor], + pScissors, + scissorCount * sizeof(*pScissors)); + + state->dirty.scissor = true; +} + +void pvr_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + + state->dynamic.common.line_width = lineWidth; + state->dirty.line_width = true; +} + +void pvr_CmdSetDepthBias(VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + + state->dynamic.common.depth_bias.constant_factor = depthBiasConstantFactor; + state->dynamic.common.depth_bias.clamp = depthBiasClamp; + state->dynamic.common.depth_bias.slope_factor = depthBiasSlopeFactor; + state->dirty.depth_bias = true; +} + +void pvr_CmdSetBlendConstants(VkCommandBuffer commandBuffer, + const float blendConstants[4]) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + + STATIC_ASSERT(ARRAY_SIZE(state->dynamic.common.blend_constants) == 4); + memcpy(state->dynamic.common.blend_constants, + blendConstants, + sizeof(state->dynamic.common.blend_constants)); + + state->dirty.blend_constants = true; +} + +void pvr_CmdSetDepthBounds(VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds) +{ + mesa_logd("No support for depth bounds testing."); +} + +void pvr_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + state->dynamic.common.compare_mask.front = compareMask; + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + state->dynamic.common.compare_mask.back = compareMask; + + state->dirty.compare_mask = true; +} + +void pvr_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + state->dynamic.common.write_mask.front = writeMask; + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + state->dynamic.common.write_mask.back = writeMask; + + state->dirty.write_mask = true; +} + +void pvr_CmdSetStencilReference(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + state->dynamic.common.reference.front = reference; + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + state->dynamic.common.reference.back = reference; + + state->dirty.reference = true; +} + +void pvr_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t *pDynamicOffsets) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_descriptor_state *descriptor_state; + + assert(firstSet + descriptorSetCount <= PVR_MAX_DESCRIPTOR_SETS); + + PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); + + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_GRAPHICS: + case VK_PIPELINE_BIND_POINT_COMPUTE: + break; + + default: + unreachable("Unsupported bind point."); + break; + } + + if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { + descriptor_state = &cmd_buffer->state.gfx_desc_state; + cmd_buffer->state.dirty.gfx_desc_dirty = true; + } else { + descriptor_state = &cmd_buffer->state.compute_desc_state; + cmd_buffer->state.dirty.compute_desc_dirty = true; + } + + for (uint32_t i = 0; i < descriptorSetCount; i++) { + PVR_FROM_HANDLE(pvr_descriptor_set, set, pDescriptorSets[i]); + uint32_t index = firstSet + i; + + if (descriptor_state->descriptor_sets[index] != set) { + descriptor_state->descriptor_sets[index] = set; + descriptor_state->valid_mask |= (1u << index); + } + } +} + +void pvr_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_vertex_binding *const vb = cmd_buffer->state.vertex_bindings; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. + */ + + assert(firstBinding < PVR_MAX_VERTEX_INPUT_BINDINGS && + bindingCount <= PVR_MAX_VERTEX_INPUT_BINDINGS); + + PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); + + for (uint32_t i = 0; i < bindingCount; i++) { + vb[firstBinding + i].buffer = pvr_buffer_from_handle(pBuffers[i]); + vb[firstBinding + i].offset = pOffsets[i]; + } + + cmd_buffer->state.dirty.vertex_bindings = true; +} + +void pvr_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + PVR_FROM_HANDLE(pvr_buffer, index_buffer, buffer); + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + + assert(offset < index_buffer->size); + assert(indexType == VK_INDEX_TYPE_UINT32 || + indexType == VK_INDEX_TYPE_UINT16); + + PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); + + state->index_buffer_binding.buffer = index_buffer; + state->index_buffer_binding.offset = offset; + state->index_buffer_binding.type = indexType; + state->dirty.index_buffer_binding = true; +} + +void pvr_CmdPushConstants(VkCommandBuffer commandBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t offset, + uint32_t size, + const void *pValues) +{ +#if defined(DEBUG) + const uint64_t ending = (uint64_t)offset + (uint64_t)size; +#endif + + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + + PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); + + pvr_assert(ending <= PVR_MAX_PUSH_CONSTANTS_SIZE); + + memcpy(&state->push_constants.data[offset], pValues, size); + + state->push_constants.dirty_stages |= stageFlags; +} + +static VkResult +pvr_cmd_buffer_setup_attachments(struct pvr_cmd_buffer *cmd_buffer, + const struct pvr_render_pass *pass, + const struct pvr_framebuffer *framebuffer) +{ + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_render_pass_info *info = &state->render_pass_info; + + assert(pass->attachment_count == framebuffer->attachment_count); + + /* Free any previously allocated attachments. */ + vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.attachments); + + if (pass->attachment_count == 0) { + info->attachments = NULL; + return VK_SUCCESS; + } + + info->attachments = + vk_zalloc(&cmd_buffer->vk.pool->alloc, + pass->attachment_count * sizeof(*info->attachments), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!info->attachments) { + /* Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */ + state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); + return state->status; + } + + if (framebuffer) { + for (uint32_t i = 0; i < pass->attachment_count; i++) + info->attachments[i] = framebuffer->attachments[i]; + } + + return VK_SUCCESS; +} + +static VkResult pvr_init_render_targets(struct pvr_device *device, + struct pvr_render_pass *pass, + struct pvr_framebuffer *framebuffer) +{ + for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { + struct pvr_render_target *render_target = + pvr_get_render_target(pass, framebuffer, i); + + pthread_mutex_lock(&render_target->mutex); + + if (!render_target->valid) { + const struct pvr_renderpass_hwsetup_render *hw_render = + &pass->hw_setup->renders[i]; + VkResult result; + + result = pvr_render_target_dataset_create(device, + framebuffer->width, + framebuffer->height, + hw_render->sample_count, + framebuffer->layers, + &render_target->rt_dataset); + if (result != VK_SUCCESS) { + pthread_mutex_unlock(&render_target->mutex); + return result; + } + + render_target->valid = true; + } + + pthread_mutex_unlock(&render_target->mutex); + } + + return VK_SUCCESS; +} + +static const struct pvr_renderpass_hwsetup_subpass * +pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass) +{ + const struct pvr_renderpass_hw_map *map = + &pass->hw_setup->subpass_map[subpass]; + + return &pass->hw_setup->renders[map->render].subpasses[map->subpass]; +} + +static void pvr_perform_start_of_render_attachment_clear( + struct pvr_cmd_buffer *cmd_buffer, + const struct pvr_framebuffer *framebuffer, + uint32_t index, + bool is_depth_stencil, + uint32_t *index_list_clear_mask) +{ + struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info; + const struct pvr_render_pass *pass = info->pass; + const struct pvr_renderpass_hwsetup_render *hw_render; + const struct pvr_renderpass_hwsetup *hw_setup; + struct pvr_image_view *iview; + uint32_t view_idx; + uint32_t height; + uint32_t width; + + hw_setup = pass->hw_setup; + hw_render = + &hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render]; + + if (is_depth_stencil) { + bool stencil_clear; + bool depth_clear; + bool is_stencil; + bool is_depth; + + assert(hw_render->ds_surface_id != -1); + assert(index == 0); + + view_idx = hw_render->ds_surface_id; + + is_depth = vk_format_has_depth(pass->attachments[view_idx].vk_format); + is_stencil = vk_format_has_stencil(pass->attachments[view_idx].vk_format); + depth_clear = hw_render->depth_init == RENDERPASS_SURFACE_INITOP_CLEAR; + stencil_clear = hw_render->stencil_init == + RENDERPASS_SURFACE_INITOP_CLEAR; + + /* Attempt to clear the ds attachment. Do not erroneously discard an + * attachment that has no depth clear but has a stencil attachment. + */ + /* if not (a ∧ c) ∨ (b ∧ d) */ + if (!((is_depth && depth_clear) || (is_stencil && stencil_clear))) + return; + } else if (hw_render->color_init[index].op != + RENDERPASS_SURFACE_INITOP_CLEAR) { + return; + } else { + view_idx = hw_render->color_init[index].driver_id; + } + + iview = info->attachments[view_idx]; + width = iview->vk.extent.width; + height = iview->vk.extent.height; + + /* FIXME: It would be nice if this function and pvr_sub_cmd_gfx_job_init() + * were doing the same check (even if it's just an assert) to determine if a + * clear is needed. + */ + /* If this is single-layer fullscreen, we already do the clears in + * pvr_sub_cmd_gfx_job_init(). + */ + if (info->render_area.offset.x == 0 && info->render_area.offset.y == 0 && + info->render_area.extent.width == width && + info->render_area.extent.height == height && framebuffer->layers == 1) { + return; + } + + pvr_finishme("Unimplemented path!"); +} + +static void +pvr_perform_start_of_render_clears(struct pvr_cmd_buffer *cmd_buffer) +{ + struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info; + const struct pvr_framebuffer *framebuffer = info->framebuffer; + const struct pvr_render_pass *pass = info->pass; + const struct pvr_renderpass_hwsetup *hw_setup = pass->hw_setup; + const struct pvr_renderpass_hwsetup_render *hw_render; + + /* Mask of attachment clears using index lists instead of background object + * to clear. + */ + uint32_t index_list_clear_mask = 0; + + hw_render = + &hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render]; + if (!hw_render) { + info->process_empty_tiles = false; + info->enable_bg_tag = false; + return; + } + + for (uint32_t i = 0; i < hw_render->color_init_count; i++) { + pvr_perform_start_of_render_attachment_clear(cmd_buffer, + framebuffer, + i, + false, + &index_list_clear_mask); + } + + info->enable_bg_tag = !!hw_render->color_init_count; + + /* If we're not using index list for all clears/loads then we need to run + * the background object on empty tiles. + */ + if (hw_render->color_init_count && + index_list_clear_mask != ((1u << hw_render->color_init_count) - 1u)) { + info->process_empty_tiles = true; + } else { + info->process_empty_tiles = false; + } + + if (hw_render->ds_surface_id != -1) { + uint32_t ds_index_list = 0; + + pvr_perform_start_of_render_attachment_clear(cmd_buffer, + framebuffer, + 0, + true, + &ds_index_list); + } + + if (index_list_clear_mask) + pvr_finishme("Add support for generating loadops shaders!"); +} + +static void pvr_stash_depth_format(struct pvr_cmd_buffer_state *state) +{ + const struct pvr_render_pass *pass = state->render_pass_info.pass; + const struct pvr_renderpass_hwsetup_render *hw_render = + &pass->hw_setup->renders[state->current_sub_cmd->gfx.hw_render_idx]; + + if (hw_render->ds_surface_id != -1) { + struct pvr_image_view **iviews = state->render_pass_info.attachments; + + state->depth_format = iviews[hw_render->ds_surface_id]->vk.format; + } +} + +static bool pvr_loadops_contain_clear(struct pvr_renderpass_hwsetup *hw_setup) +{ + for (uint32_t i = 0; i < hw_setup->render_count; i++) { + struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i]; + uint32_t render_targets_count = + hw_render->init_setup.render_targets_count; + + for (uint32_t j = 0; + j < (hw_render->color_init_count * render_targets_count); + j += render_targets_count) { + for (uint32_t k = 0; k < hw_render->init_setup.render_targets_count; + k++) { + if (hw_render->color_init[j + k].op == + RENDERPASS_SURFACE_INITOP_CLEAR) { + return true; + } + } + } + if (hw_render->depth_init == RENDERPASS_SURFACE_INITOP_CLEAR || + hw_render->stencil_init == RENDERPASS_SURFACE_INITOP_CLEAR) { + return true; + } + } + + return false; +} + +static VkResult +pvr_cmd_buffer_set_clear_values(struct pvr_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *pRenderPassBegin) +{ + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + + /* Free any previously allocated clear values. */ + vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.clear_values); + + if (pRenderPassBegin->clearValueCount) { + const size_t size = pRenderPassBegin->clearValueCount * + sizeof(*state->render_pass_info.clear_values); + + state->render_pass_info.clear_values = + vk_zalloc(&cmd_buffer->vk.pool->alloc, + size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!state->render_pass_info.clear_values) { + state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); + return state->status; + } + + memcpy(state->render_pass_info.clear_values, + pRenderPassBegin->pClearValues, + size); + } else { + state->render_pass_info.clear_values = NULL; + } + + state->render_pass_info.clear_value_count = + pRenderPassBegin->clearValueCount; + + return VK_SUCCESS; +} + +void pvr_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo *pRenderPassBeginInfo, + const VkSubpassBeginInfoKHR *pSubpassBeginInfo) +{ + PVR_FROM_HANDLE(pvr_framebuffer, + framebuffer, + pRenderPassBeginInfo->framebuffer); + PVR_FROM_HANDLE(pvr_render_pass, pass, pRenderPassBeginInfo->renderPass); + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + const struct pvr_renderpass_hwsetup_subpass *hw_subpass; + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + VkResult result; + + PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); + + assert(!state->render_pass_info.pass); + assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + /* FIXME: Create a separate function for everything using pass->subpasses, + * look at cmd_buffer_begin_subpass() for example. */ + state->render_pass_info.pass = pass; + state->render_pass_info.framebuffer = framebuffer; + state->render_pass_info.subpass_idx = 0; + state->render_pass_info.render_area = pRenderPassBeginInfo->renderArea; + state->render_pass_info.current_hw_subpass = 0; + state->render_pass_info.pipeline_bind_point = + pass->subpasses[0].pipeline_bind_point; + state->render_pass_info.userpass_spawn = pass->subpasses[0].userpass_spawn; + state->dirty.userpass_spawn = true; + + result = pvr_cmd_buffer_setup_attachments(cmd_buffer, pass, framebuffer); + if (result != VK_SUCCESS) + return; + + state->status = + pvr_init_render_targets(cmd_buffer->device, pass, framebuffer); + if (state->status != VK_SUCCESS) + return; + + result = pvr_cmd_buffer_set_clear_values(cmd_buffer, pRenderPassBeginInfo); + if (result != VK_SUCCESS) + return; + + assert(pass->subpasses[0].pipeline_bind_point == + VK_PIPELINE_BIND_POINT_GRAPHICS); + + result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS); + if (result != VK_SUCCESS) + return; + + /* Run subpass 0 "soft" background object after the actual background + * object. + */ + hw_subpass = pvr_get_hw_subpass(pass, 0); + if (hw_subpass->client_data) + pvr_finishme("Unimplemented path!"); + + pvr_perform_start_of_render_clears(cmd_buffer); + pvr_stash_depth_format(&cmd_buffer->state); + + if (!pvr_loadops_contain_clear(pass->hw_setup)) { + state->dynamic.scissor_accum_state = PVR_SCISSOR_ACCUM_CHECK_FOR_CLEAR; + state->dynamic.scissor_accum_bounds.offset.x = 0; + state->dynamic.scissor_accum_bounds.offset.y = 0; + state->dynamic.scissor_accum_bounds.extent.width = 0; + state->dynamic.scissor_accum_bounds.extent.height = 0; + } else { + state->dynamic.scissor_accum_state = PVR_SCISSOR_ACCUM_DISABLED; + } +} + +static void pvr_cmd_buffer_reset(struct pvr_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->status != PVR_CMD_BUFFER_STATUS_INITIAL) { + /* FIXME: For now we always free all resources as if + * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT was set. + */ + pvr_cmd_buffer_free_sub_cmds(cmd_buffer); + + list_for_each_entry_safe (struct pvr_bo, bo, &cmd_buffer->bo_list, link) { + list_del(&bo->link); + pvr_bo_free(cmd_buffer->device, bo); + } + + util_dynarray_clear(&cmd_buffer->scissor_array); + util_dynarray_clear(&cmd_buffer->depth_bias_array); + + cmd_buffer->state.status = VK_SUCCESS; + cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INITIAL; + } +} + +VkResult pvr_BeginCommandBuffer(VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo *pBeginInfo) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *state; + VkResult result; + + pvr_cmd_buffer_reset(cmd_buffer); + + cmd_buffer->usage_flags = pBeginInfo->flags; + state = &cmd_buffer->state; + + /* VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT must be ignored for + * primary level command buffers. + * + * From the Vulkan 1.0 spec: + * + * VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT specifies that a + * secondary command buffer is considered to be entirely inside a render + * pass. If this is a primary command buffer, then this bit is ignored. + */ + if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + cmd_buffer->usage_flags &= + ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; + } + + if (cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { + const VkCommandBufferInheritanceInfo *inheritance_info = + pBeginInfo->pInheritanceInfo; + struct pvr_render_pass *pass; + + pass = pvr_render_pass_from_handle(inheritance_info->renderPass); + state->render_pass_info.pass = pass; + state->render_pass_info.framebuffer = + pvr_framebuffer_from_handle(inheritance_info->framebuffer); + state->render_pass_info.subpass_idx = inheritance_info->subpass; + state->render_pass_info.userpass_spawn = + pass->subpasses[inheritance_info->subpass].userpass_spawn; + + result = + pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS); + if (result != VK_SUCCESS) + return result; + } + + memset(state->barriers_needed, + 0xFF, + sizeof(*state->barriers_needed) * ARRAY_SIZE(state->barriers_needed)); + + cmd_buffer->status = PVR_CMD_BUFFER_STATUS_RECORDING; + + return VK_SUCCESS; +} + +VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer, + struct pvr_transfer_cmd *transfer_cmd) +{ + VkResult result; + + result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_TRANSFER); + if (result != VK_SUCCESS) + return result; + + list_addtail(&transfer_cmd->link, + &cmd_buffer->state.current_sub_cmd->transfer.transfer_cmds); + + return VK_SUCCESS; +} + +void pvr_CmdDispatch(VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdDraw(VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + assert(!"Unimplemented"); +} + +static void +pvr_update_draw_state(struct pvr_cmd_buffer_state *const state, + const struct pvr_cmd_buffer_draw_state *const draw_state) +{ + /* We don't have a state to tell us that base_instance is being used so it + * gets used as a boolean - 0 means we'll use a pds program that skips the + * base instance addition. If the base_instance gets used (and the last + * draw's base_instance was 0) then we switch to the BASE_INSTANCE attrib + * program. + * + * If base_instance changes then we only need to update the data section. + * + * The only draw call state that doesn't really matter is the start vertex + * as that is handled properly in the VDM state in all cases. + */ + if ((state->draw_state.draw_indexed != draw_state->draw_indexed) || + (state->draw_state.draw_indirect != draw_state->draw_indirect) || + (state->draw_state.base_instance == 0 && + draw_state->base_instance != 0)) { + state->dirty.draw_variant = true; + } else if (state->draw_state.base_instance != draw_state->base_instance) { + state->dirty.draw_base_instance = true; + } + + state->draw_state = *draw_state; +} + +static uint32_t pvr_calc_shared_regs_count( + const struct pvr_graphics_pipeline *const gfx_pipeline) +{ + const struct pvr_pipeline_stage_state *const vertex_state = + &gfx_pipeline->vertex_shader_state.stage_state; + uint32_t shared_regs = vertex_state->const_shared_reg_count + + vertex_state->const_shared_reg_offset; + + if (gfx_pipeline->fragment_shader_state.bo) { + const struct pvr_pipeline_stage_state *const fragment_state = + &gfx_pipeline->fragment_shader_state.stage_state; + uint32_t fragment_regs = fragment_state->const_shared_reg_count + + fragment_state->const_shared_reg_offset; + + shared_regs = MAX2(shared_regs, fragment_regs); + } + + return shared_regs; +} + +#define PVR_WRITE(_buffer, _value, _offset, _max) \ + do { \ + __typeof__(_value) __value = _value; \ + uint64_t __offset = _offset; \ + uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \ + static_assert(__same_type(*_buffer, __value), \ + "Buffer and value type mismatch"); \ + assert((__offset + __nr_dwords) <= (_max)); \ + assert((__offset % __nr_dwords) == 0U); \ + _buffer[__offset / __nr_dwords] = __value; \ + } while (0) + +static VkResult +pvr_setup_vertex_buffers(struct pvr_cmd_buffer *cmd_buffer, + const struct pvr_graphics_pipeline *const gfx_pipeline) +{ + const struct pvr_vertex_shader_state *const vertex_state = + &gfx_pipeline->vertex_shader_state; + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + const struct pvr_pds_info *const pds_info = state->pds_shader.info; + const uint8_t *entries; + uint32_t *dword_buffer; + uint64_t *qword_buffer; + struct pvr_bo *pvr_bo; + VkResult result; + + result = pvr_cmd_buffer_alloc_mem(cmd_buffer, + cmd_buffer->device->heaps.pds_heap, + pds_info->data_size_in_dwords, + PVR_BO_ALLOC_FLAG_CPU_MAPPED, + &pvr_bo); + if (result != VK_SUCCESS) + return result; + + dword_buffer = (uint32_t *)pvr_bo->bo->map; + qword_buffer = (uint64_t *)pvr_bo->bo->map; + + entries = (uint8_t *)pds_info->entries; + + for (uint32_t i = 0; i < pds_info->entry_count; i++) { + const struct pvr_const_map_entry *const entry_header = + (struct pvr_const_map_entry *)entries; + + switch (entry_header->type) { + case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: { + const struct pvr_const_map_entry_literal32 *const literal = + (struct pvr_const_map_entry_literal32 *)entries; + + PVR_WRITE(dword_buffer, + literal->literal_value, + literal->const_offset, + pds_info->data_size_in_dwords); + + entries += sizeof(*literal); + break; + } + + case PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS: { + const struct pvr_const_map_entry_doutu_address *const doutu_addr = + (struct pvr_const_map_entry_doutu_address *)entries; + pvr_dev_addr_t exec_addr = vertex_state->bo->vma->dev_addr; + uint64_t addr = 0ULL; + + exec_addr.addr += vertex_state->entry_offset; + pvr_set_usc_execution_address64(&addr, exec_addr.addr); + + PVR_WRITE(qword_buffer, + addr | doutu_addr->doutu_control, + doutu_addr->const_offset, + pds_info->data_size_in_dwords); + + entries += sizeof(*doutu_addr); + break; + } + + case PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE: { + const struct pvr_const_map_entry_base_instance *const base_instance = + (struct pvr_const_map_entry_base_instance *)entries; + + PVR_WRITE(dword_buffer, + state->draw_state.base_instance, + base_instance->const_offset, + pds_info->data_size_in_dwords); + + entries += sizeof(*base_instance); + break; + } + + case PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS: { + const struct pvr_const_map_entry_vertex_attribute_address + *const attribute = + (struct pvr_const_map_entry_vertex_attribute_address *)entries; + const struct pvr_vertex_binding *const binding = + &state->vertex_bindings[attribute->binding_index]; + uint64_t addr = binding->buffer->dev_addr.addr; + + addr += binding->offset; + addr += attribute->offset; + + PVR_WRITE(qword_buffer, + addr, + attribute->const_offset, + pds_info->data_size_in_dwords); + + entries += sizeof(*attribute); + break; + } + + default: + unreachable("Unsupported data section map"); + break; + } + } + + state->pds_vertex_attrib_offset = + pvr_bo->vma->dev_addr.addr - + cmd_buffer->device->heaps.pds_heap->base_addr.addr; + + pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo); + + return VK_SUCCESS; +} + +static VkResult pvr_setup_descriptor_mappings( + struct pvr_cmd_buffer *const cmd_buffer, + enum pvr_stage_allocation stage, + const struct pvr_stage_allocation_uniform_state *uniform_state, + uint32_t *const uniform_data_offset_out) +{ + const struct pvr_pds_info *const pds_info = &uniform_state->pds_info; + const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + const struct pvr_descriptor_state *desc_state; + const uint8_t *entries; + uint32_t *dword_buffer; + uint64_t *qword_buffer; + struct pvr_bo *pvr_bo; + VkResult result; + + if (!pds_info->data_size_in_dwords) + return VK_SUCCESS; + + result = pvr_cmd_buffer_alloc_mem(cmd_buffer, + cmd_buffer->device->heaps.pds_heap, + pds_info->data_size_in_dwords, + PVR_BO_ALLOC_FLAG_CPU_MAPPED, + &pvr_bo); + if (result != VK_SUCCESS) + return result; + + dword_buffer = (uint32_t *)pvr_bo->bo->map; + qword_buffer = (uint64_t *)pvr_bo->bo->map; + + entries = (uint8_t *)pds_info->entries; + + switch (stage) { + case PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY: + case PVR_STAGE_ALLOCATION_FRAGMENT: + desc_state = &cmd_buffer->state.gfx_desc_state; + break; + + case PVR_STAGE_ALLOCATION_COMPUTE: + desc_state = &cmd_buffer->state.compute_desc_state; + break; + + default: + unreachable("Unsupported stage."); + break; + } + + for (uint32_t i = 0; i < pds_info->entry_count; i++) { + const struct pvr_const_map_entry *const entry_header = + (struct pvr_const_map_entry *)entries; + + switch (entry_header->type) { + case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: { + const struct pvr_const_map_entry_literal32 *const literal = + (struct pvr_const_map_entry_literal32 *)entries; + + PVR_WRITE(dword_buffer, + literal->literal_value, + literal->const_offset, + pds_info->data_size_in_dwords); + + entries += sizeof(*literal); + break; + } + + case PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER: { + const struct pvr_const_map_entry_constant_buffer *const_buffer_entry = + (struct pvr_const_map_entry_constant_buffer *)entries; + const uint32_t desc_set = const_buffer_entry->desc_set; + const uint32_t binding = const_buffer_entry->binding; + const struct pvr_descriptor_set *descriptor_set; + const struct pvr_descriptor *descriptor; + pvr_dev_addr_t buffer_addr; + + /* TODO: Handle push descriptors. */ + + assert(desc_set < PVR_MAX_DESCRIPTOR_SETS); + descriptor_set = state->gfx_desc_state.descriptor_sets[desc_set]; + + /* TODO: Handle dynamic buffers. */ + descriptor = &descriptor_set->descriptors[binding]; + assert(descriptor->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); + + assert(descriptor->buffer_desc_range == + const_buffer_entry->size_in_dwords * sizeof(uint32_t)); + assert(descriptor->buffer_create_info_size == + const_buffer_entry->size_in_dwords * sizeof(uint32_t)); + + buffer_addr = descriptor->buffer_dev_addr; + buffer_addr.addr += const_buffer_entry->offset * sizeof(uint32_t); + + PVR_WRITE(qword_buffer, + buffer_addr.addr, + const_buffer_entry->const_offset, + pds_info->data_size_in_dwords); + + entries += sizeof(*const_buffer_entry); + break; + } + + case PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET: { + const struct pvr_const_map_entry_descriptor_set *desc_set_entry = + (struct pvr_const_map_entry_descriptor_set *)entries; + const uint32_t desc_set_num = desc_set_entry->descriptor_set; + const struct pvr_descriptor_set *descriptor_set; + pvr_dev_addr_t desc_set_addr; + + assert(desc_set_num < PVR_MAX_DESCRIPTOR_SETS); + + /* TODO: Remove this when the compiler provides us with usage info? + */ + /* We skip DMAing unbound descriptor sets. */ + if (!(desc_state->valid_mask & BITFIELD_BIT(desc_set_num))) { + const struct pvr_const_map_entry_literal32 *literal; + uint32_t zero_literal_value; + + entries += sizeof(*desc_set_entry); + literal = (struct pvr_const_map_entry_literal32 *)entries; + + /* TODO: Is there any guarantee that a literal will follow the + * descriptor set entry? + */ + assert(literal->type == PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32); + + /* We zero out the DMA size so the DMA isn't performed. */ + zero_literal_value = + literal->literal_value & + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_CLRMSK; + + PVR_WRITE(qword_buffer, + 0UL, + desc_set_entry->const_offset, + pds_info->data_size_in_dwords); + + PVR_WRITE(dword_buffer, + zero_literal_value, + desc_set_entry->const_offset, + pds_info->data_size_in_dwords); + + entries += sizeof(*literal); + i++; + continue; + } + + descriptor_set = desc_state->descriptor_sets[desc_set_num]; + + pvr_finishme("Handle push descriptor entry."); + + desc_set_addr = descriptor_set->pvr_bo->vma->dev_addr; + + if (desc_set_entry->primary) { + desc_set_addr.addr += + descriptor_set->layout->memory_layout_in_dwords_per_stage[stage] + .primary_offset + << 2U; + } else { + desc_set_addr.addr += + descriptor_set->layout->memory_layout_in_dwords_per_stage[stage] + .secondary_offset + << 2U; + } + + desc_set_addr.addr += (uint64_t)desc_set_entry->offset_in_dwords << 2U; + + PVR_WRITE(qword_buffer, + desc_set_addr.addr, + desc_set_entry->const_offset, + pds_info->data_size_in_dwords); + + entries += sizeof(*desc_set_entry); + break; + } + + default: + unreachable("Unsupported map entry type."); + } + } + + pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo); + + *uniform_data_offset_out = + pvr_bo->vma->dev_addr.addr - + cmd_buffer->device->heaps.pds_heap->base_addr.addr; + + return VK_SUCCESS; +} + +#undef PVR_WRITE + +static void +pvr_emit_dirty_pds_state(const struct pvr_cmd_buffer *const cmd_buffer, + const uint32_t pds_vertex_uniform_data_offset) +{ + const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + const struct pvr_stage_allocation_uniform_state *const vertex_uniform_state = + &state->gfx_pipeline->vertex_shader_state.uniform_state; + const struct pvr_pipeline_stage_state *const vertex_stage_state = + &state->gfx_pipeline->vertex_shader_state.stage_state; + struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream; + + if (!vertex_uniform_state->pds_info.code_size_in_dwords) + return; + + pvr_csb_emit (csb, VDMCTRL_PDS_STATE0, state0) { + state0.usc_target = PVRX(VDMCTRL_USC_TARGET_ALL); + + state0.usc_common_size = + DIV_ROUND_UP(vertex_stage_state->const_shared_reg_count << 2, + PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE)); + + state0.pds_data_size = + DIV_ROUND_UP(vertex_uniform_state->pds_info.data_size_in_dwords << 2, + PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE)); + } + + pvr_csb_emit (csb, VDMCTRL_PDS_STATE1, state1) { + state1.pds_data_addr.addr = pds_vertex_uniform_data_offset; + state1.sd_type = PVRX(VDMCTRL_SD_TYPE_NONE); + } + + pvr_csb_emit (csb, VDMCTRL_PDS_STATE2, state2) { + state2.pds_code_addr.addr = vertex_uniform_state->pds_code.code_offset; + } +} + +static void pvr_setup_output_select(struct pvr_cmd_buffer *const cmd_buffer) +{ + struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state; + const struct pvr_graphics_pipeline *const gfx_pipeline = + cmd_buffer->state.gfx_pipeline; + struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state; + const struct pvr_vertex_shader_state *const vertex_state = + &gfx_pipeline->vertex_shader_state; + uint32_t output_selects; + + /* TODO: Handle vertex and fragment shader state flags. */ + + pvr_csb_pack (&output_selects, TA_OUTPUT_SEL, state) { + const VkPrimitiveTopology topology = + gfx_pipeline->input_asm_state.topology; + + state.rhw_pres = true; + state.vtxsize = DIV_ROUND_UP(vertex_state->vertex_output_size, 4U); + state.psprite_size_pres = (topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST); + } + + if (ppp_state->output_selects != output_selects) { + ppp_state->output_selects = output_selects; + emit_state->output_selects = true; + } + + if (ppp_state->varying_word[0] != vertex_state->varying[0]) { + ppp_state->varying_word[0] = vertex_state->varying[0]; + emit_state->varying_word0 = true; + } + + if (ppp_state->varying_word[1] != vertex_state->varying[1]) { + ppp_state->varying_word[1] = vertex_state->varying[1]; + emit_state->varying_word1 = true; + } +} + +/* clang-format off */ +static enum PVRX(TA_OBJTYPE) +pvr_ppp_state_get_ispa_objtype_from_vk(const VkPrimitiveTopology topology) +/* clang-format on */ +{ + switch (topology) { + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: + return PVRX(TA_OBJTYPE_SPRITE_01UV); + + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: + return PVRX(TA_OBJTYPE_LINE); + + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: + return PVRX(TA_OBJTYPE_TRIANGLE); + + default: + unreachable("Invalid topology."); + return 0; + } +} + +static void pvr_setup_isp_faces_and_control( + struct pvr_cmd_buffer *const cmd_buffer, + struct pvr_cmd_struct(TA_STATE_ISPA) *const ispa_out) +{ + struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state; + const struct pvr_graphics_pipeline *const gfx_pipeline = + cmd_buffer->state.gfx_pipeline; + struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state; + const struct pvr_dynamic_state *const dynamic_state = + &cmd_buffer->state.dynamic.common; + const struct pvr_render_pass_info *const pass_info = + &cmd_buffer->state.render_pass_info; + const uint32_t subpass_idx = pass_info->subpass_idx; + const uint32_t *depth_stencil_attachment_idx = + pass_info->pass->subpasses[subpass_idx].depth_stencil_attachment; + const struct pvr_image_view *const attachment = + (!depth_stencil_attachment_idx) + ? NULL + : pass_info->attachments[*depth_stencil_attachment_idx]; + + const VkCullModeFlags cull_mode = gfx_pipeline->raster_state.cull_mode; + const bool raster_discard_enabled = + gfx_pipeline->raster_state.discard_enable; + const bool disable_all = raster_discard_enabled || !attachment; + + const VkPrimitiveTopology topology = gfx_pipeline->input_asm_state.topology; + const enum PVRX(TA_OBJTYPE) + obj_type = pvr_ppp_state_get_ispa_objtype_from_vk(topology); + + const bool disable_stencil_write = disable_all; + const bool disable_stencil_test = + disable_all || !vk_format_has_stencil(attachment->vk.format); + + const bool disable_depth_write = disable_all; + const bool disable_depth_test = disable_all || + !vk_format_has_depth(attachment->vk.format); + + uint32_t ispb_stencil_off; + bool is_two_sided = false; + uint32_t isp_control; + + uint32_t line_width; + uint32_t common_a; + uint32_t front_a; + uint32_t front_b; + uint32_t back_a; + uint32_t back_b; + + /* Convert to 4.4 fixed point format. */ + line_width = util_unsigned_fixed(dynamic_state->line_width, 4); + + /* Subtract 1 to shift values from range [0=0,256=16] to [0=1/16,255=16]. + * If 0 it stays at 0, otherwise we subtract 1. + */ + line_width = (!!line_width) * (line_width - 1); + + line_width = MIN2(line_width, PVRX(TA_STATE_ISPA_POINTLINEWIDTH_SIZE_MAX)); + + /* TODO: Part of the logic in this function is duplicated in another part + * of the code. E.g. the dcmpmode, and sop1/2/3. Could we do this earlier? + */ + + pvr_csb_pack (&common_a, TA_STATE_ISPA, ispa) { + ispa.pointlinewidth = line_width; + + if (disable_depth_test) + ispa.dcmpmode = PVRX(TA_CMPMODE_ALWAYS); + else + ispa.dcmpmode = gfx_pipeline->depth_compare_op; + + /* FIXME: Can we just have this and remove the assignment above? + * The user provides a depthTestEnable at vkCreateGraphicsPipelines() + * should we be using that? + */ + ispa.dcmpmode |= gfx_pipeline->depth_compare_op; + + ispa.dwritedisable = disable_depth_test || disable_depth_write; + /* FIXME: Can we just have this and remove the assignment above? */ + ispa.dwritedisable = ispa.dwritedisable || + gfx_pipeline->depth_write_disable; + + ispa.passtype = gfx_pipeline->fragment_shader_state.pass_type; + + ispa.objtype = obj_type; + + /* Return unpacked ispa structure. dcmpmode, dwritedisable, passtype and + * objtype are needed by pvr_setup_triangle_merging_flag. + */ + if (ispa_out) + *ispa_out = ispa; + } + + /* FIXME: This logic should be redone and improved. Can we also get rid of + * the front and back variants? + */ + + pvr_csb_pack (&front_a, TA_STATE_ISPA, ispa) { + ispa.sref = (!disable_stencil_test) * dynamic_state->reference.front; + } + front_a |= common_a; + + pvr_csb_pack (&back_a, TA_STATE_ISPA, ispa) { + ispa.sref = (!disable_stencil_test) * dynamic_state->compare_mask.back; + } + back_a |= common_a; + + /* TODO: Does this actually represent the ispb control word on stencil off? + * If not, rename the variable. + */ + pvr_csb_pack (&ispb_stencil_off, TA_STATE_ISPB, ispb) { + ispb.sop3 = PVRX(TA_ISPB_STENCILOP_KEEP); + ispb.sop2 = PVRX(TA_ISPB_STENCILOP_KEEP); + ispb.sop1 = PVRX(TA_ISPB_STENCILOP_KEEP); + ispb.scmpmode = PVRX(TA_CMPMODE_ALWAYS); + } + + if (disable_stencil_test) { + back_b = front_b = ispb_stencil_off; + } else { + pvr_csb_pack (&front_b, TA_STATE_ISPB, ispb) { + ispb.swmask = + (!disable_stencil_write) * dynamic_state->write_mask.front; + ispb.scmpmask = dynamic_state->compare_mask.front; + + ispb.sop3 = gfx_pipeline->stencil_front.pass_op; + ispb.sop2 = gfx_pipeline->stencil_front.depth_fail_op; + ispb.sop1 = gfx_pipeline->stencil_front.fail_op; + + ispb.scmpmode = gfx_pipeline->stencil_front.compare_op; + } + + pvr_csb_pack (&back_b, TA_STATE_ISPB, ispb) { + ispb.swmask = + (!disable_stencil_write) * dynamic_state->write_mask.back; + ispb.scmpmask = dynamic_state->compare_mask.back; + + ispb.sop3 = gfx_pipeline->stencil_back.pass_op; + ispb.sop2 = gfx_pipeline->stencil_back.depth_fail_op; + ispb.sop1 = gfx_pipeline->stencil_back.fail_op; + + ispb.scmpmode = gfx_pipeline->stencil_back.compare_op; + } + } + + if (front_a != back_a || front_b != back_b) { + if (cull_mode & VK_CULL_MODE_BACK_BIT) { + /* Single face, using front state. */ + } else if (cull_mode & VK_CULL_MODE_FRONT_BIT) { + /* Single face, using back state. */ + + front_a = back_a; + front_b = back_b; + } else { + /* Both faces. */ + + emit_state->isp_ba = is_two_sided = true; + + if (gfx_pipeline->raster_state.front_face == + VK_FRONT_FACE_COUNTER_CLOCKWISE) { + uint32_t tmp = front_a; + + front_a = back_a; + back_a = tmp; + + tmp = front_b; + front_b = back_b; + back_b = tmp; + } + + /* HW defaults to stencil off. */ + if (back_b != ispb_stencil_off) + emit_state->isp_fb = emit_state->isp_bb = true; + } + } + + if (!disable_stencil_test && front_b != ispb_stencil_off) + emit_state->isp_fb = true; + + pvr_csb_pack (&isp_control, TA_STATE_ISPCTL, ispctl) { + ispctl.upass = pass_info->userpass_spawn; + + /* TODO: is bo ever NULL? Figure out what to do. */ + ispctl.tagwritedisable = raster_discard_enabled || + !gfx_pipeline->fragment_shader_state.bo; + + ispctl.two_sided = is_two_sided; + ispctl.bpres = emit_state->isp_fb || emit_state->isp_bb; + + ispctl.dbenable = !raster_discard_enabled && + gfx_pipeline->raster_state.depth_bias_enable && + obj_type == PVRX(TA_OBJTYPE_TRIANGLE); + ispctl.scenable = !raster_discard_enabled; + + ppp_state->isp.control_struct = ispctl; + } + + emit_state->isp = true; + + ppp_state->isp.control = isp_control; + ppp_state->isp.front_a = front_a; + ppp_state->isp.front_b = front_b; + ppp_state->isp.back_a = back_a; + ppp_state->isp.back_b = back_b; +} + +static void pvr_get_viewport_scissor_overlap(const VkViewport *const viewport, + const VkRect2D *const scissor, + VkRect2D *const rect_out) +{ + /* TODO: See if we can remove this struct. */ + struct pvr_rect { + int32_t x0, y0; + int32_t x1, y1; + }; + + /* TODO: Worry about overflow? */ + const struct pvr_rect scissor_rect = { + .x0 = scissor->offset.x, + .y0 = scissor->offset.y, + .x1 = scissor->offset.x + scissor->extent.width, + .y1 = scissor->offset.y + scissor->extent.height + }; + struct pvr_rect viewport_rect = { 0 }; + + assert(viewport->width >= 0.0f); + assert(scissor_rect.x0 >= 0); + assert(scissor_rect.y0 >= 0); + + if (scissor->extent.width == 0 || scissor->extent.height == 0) { + *rect_out = (VkRect2D){ 0 }; + return; + } + + viewport_rect.x0 = (int32_t)viewport->x; + viewport_rect.x1 = (int32_t)viewport->x + (int32_t)viewport->width; + + /* TODO: Is there a mathematical way of doing all this and then clamp at + * the end? + */ + /* We flip the y0 and y1 when height is negative. */ + viewport_rect.y0 = (int32_t)viewport->y + MIN2(0, (int32_t)viewport->height); + viewport_rect.y1 = (int32_t)viewport->y + MAX2(0, (int32_t)viewport->height); + + if (scissor_rect.x1 <= viewport_rect.x0 || + scissor_rect.y1 <= viewport_rect.y0 || + scissor_rect.x0 >= viewport_rect.x1 || + scissor_rect.y0 >= viewport_rect.y1) { + *rect_out = (VkRect2D){ 0 }; + return; + } + + /* Determine the overlapping rectangle. */ + viewport_rect.x0 = MAX2(viewport_rect.x0, scissor_rect.x0); + viewport_rect.y0 = MAX2(viewport_rect.y0, scissor_rect.y0); + viewport_rect.x1 = MIN2(viewport_rect.x1, scissor_rect.x1); + viewport_rect.y1 = MIN2(viewport_rect.y1, scissor_rect.y1); + + /* TODO: Is this conversion safe? Is this logic right? */ + rect_out->offset.x = (uint32_t)viewport_rect.x0; + rect_out->offset.y = (uint32_t)viewport_rect.y0; + rect_out->extent.height = (uint32_t)(viewport_rect.y1 - viewport_rect.y0); + rect_out->extent.width = (uint32_t)(viewport_rect.x1 - viewport_rect.x0); +} + +static inline uint32_t +pvr_get_geom_region_clip_align_size(struct pvr_device_info *const dev_info) +{ + /* TODO: This should come from rogue_ppp.xml. */ + return 16U + 16U * (!PVR_HAS_FEATURE(dev_info, tile_size_16x16)); +} + +/* FIXME: Remove device param when PVR_HAS_FEATURE() accepts const dev_info */ +static void +pvr_setup_isp_depth_bias_scissor_state(struct pvr_cmd_buffer *const cmd_buffer) +{ + struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state; + struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state; + const struct pvr_dynamic_state *const dynamic_state = + &cmd_buffer->state.dynamic.common; + const struct pvr_cmd_struct(TA_STATE_ISPCTL) *const ispctl = + &ppp_state->isp.control_struct; + struct pvr_device_info *const dev_info = + &cmd_buffer->device->pdevice->dev_info; + + if (ispctl->dbenable) + assert(!"Unimplemented"); + + if (ispctl->scenable) { + const uint32_t region_clip_align_size = + pvr_get_geom_region_clip_align_size(dev_info); + const VkViewport *const viewport = &dynamic_state->viewport.viewports[0]; + const VkRect2D *const scissor = &dynamic_state->scissor.scissors[0]; + VkRect2D overlap_rect; + uint32_t scissor_words[2]; + uint32_t height; + uint32_t width; + uint32_t x; + uint32_t y; + + /* For region clip. */ + uint32_t bottom; + uint32_t right; + uint32_t left; + uint32_t top; + + /* We don't support multiple viewport calculations. */ + assert(dynamic_state->viewport.count == 1); + /* We don't support multiple scissor calculations. */ + assert(dynamic_state->scissor.count == 1); + + pvr_get_viewport_scissor_overlap(viewport, scissor, &overlap_rect); + + x = overlap_rect.offset.x; + y = overlap_rect.offset.y; + width = overlap_rect.extent.width; + height = overlap_rect.extent.height; + + pvr_csb_pack (&scissor_words[0], IPF_SCISSOR_WORD_0, word0) { + word0.scw0_xmax = x + width; + word0.scw0_xmin = x; + } + + pvr_csb_pack (&scissor_words[1], IPF_SCISSOR_WORD_1, word1) { + word1.scw1_ymax = y + height; + word1.scw1_ymin = y; + } + + if (cmd_buffer->scissor_array.size && + cmd_buffer->scissor_words[0] == scissor_words[0] && + cmd_buffer->scissor_words[1] == scissor_words[1]) { + return; + } + + cmd_buffer->scissor_words[0] = scissor_words[0]; + cmd_buffer->scissor_words[1] = scissor_words[1]; + + /* Calculate region clip. */ + + left = x / region_clip_align_size; + top = y / region_clip_align_size; + + /* We prevent right=-1 with the multiplication. */ + /* TODO: Is there a better way of doing this? */ + if ((x + width) != 0U) + right = DIV_ROUND_UP(x + width, region_clip_align_size) - 1; + else + right = 0; + + if ((y + height) != 0U) + bottom = DIV_ROUND_UP(y + height, region_clip_align_size) - 1; + else + bottom = 0U; + + /* Setup region clip to clip everything outside what was calculated. */ + + /* FIXME: Should we mask to prevent writing over other words? */ + pvr_csb_pack (&ppp_state->region_clipping.word0, TA_REGION_CLIP0, word0) { + word0.right = right; + word0.left = left; + word0.mode = PVRX(TA_REGION_CLIP_MODE_OUTSIDE); + } + + pvr_csb_pack (&ppp_state->region_clipping.word1, TA_REGION_CLIP1, word1) { + word1.bottom = bottom; + word1.top = top; + } + + ppp_state->depthbias_scissor_indices.scissor_index = + util_dynarray_num_elements(&cmd_buffer->scissor_array, + __typeof__(cmd_buffer->scissor_words)); + + memcpy(util_dynarray_grow_bytes(&cmd_buffer->scissor_array, + 1, + sizeof(cmd_buffer->scissor_words)), + cmd_buffer->scissor_words, + sizeof(cmd_buffer->scissor_words)); + + emit_state->isp_dbsc = true; + emit_state->region_clip = true; + } +} + +static void +pvr_setup_triangle_merging_flag(struct pvr_cmd_buffer *const cmd_buffer, + struct pvr_cmd_struct(TA_STATE_ISPA) * ispa) +{ + struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state; + struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state; + uint32_t merge_word; + uint32_t mask; + + pvr_csb_pack (&merge_word, TA_STATE_PDS_SIZEINFO2, size_info) { + /* Disable for lines or punch-through or for DWD and depth compare + * always. + */ + if (ispa->objtype == PVRX(TA_OBJTYPE_LINE) || + ispa->passtype == PVRX(TA_PASSTYPE_PUNCH_THROUGH) || + (ispa->dwritedisable && ispa->dcmpmode == PVRX(TA_CMPMODE_ALWAYS))) { + size_info.pds_tri_merge_disable = true; + } + } + + pvr_csb_pack (&mask, TA_STATE_PDS_SIZEINFO2, size_info) { + size_info.pds_tri_merge_disable = true; + } + + merge_word |= ppp_state->pds.size_info2 & ~mask; + + if (merge_word != ppp_state->pds.size_info2) { + ppp_state->pds.size_info2 = merge_word; + emit_state->pds_fragment_stateptr0 = true; + } +} + +/* TODO: See if this function can be improved once fully implemented. */ +static uint32_t pvr_calc_fscommon_size_and_tiles_in_flight( + const struct pvr_device_info *dev_info, + uint32_t fs_common_size, + uint32_t min_tiles_in_flight) +{ + uint32_t max_tiles_in_flight; + uint32_t num_allocs; + + if (PVR_HAS_FEATURE(dev_info, s8xe)) { + num_allocs = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U); + } else { + uint32_t num_phantoms = rogue_get_num_phantoms(dev_info); + uint32_t min_cluster_per_phantom; + + if (num_phantoms > 1) { + pvr_finishme("Unimplemented path!!"); + } else { + min_cluster_per_phantom = + PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U); + } + + if (num_phantoms > 1) + pvr_finishme("Unimplemented path!!"); + + if (num_phantoms > 2) + pvr_finishme("Unimplemented path!!"); + + if (num_phantoms > 3) + pvr_finishme("Unimplemented path!!"); + + if (min_cluster_per_phantom >= 4) + num_allocs = 1; + else if (min_cluster_per_phantom == 2) + num_allocs = 2; + else + num_allocs = 4; + } + + max_tiles_in_flight = + PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U); + + if (fs_common_size == UINT_MAX) { + uint32_t max_common_size; + + num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight); + + if (!PVR_HAS_ERN(dev_info, 38748)) { + /* Hardware needs space for one extra shared allocation. */ + num_allocs += 1; + } + + max_common_size = rogue_get_reserved_shared_size(dev_info) - + rogue_get_max_coeffs(dev_info); + + /* Double resource requirements to deal with fragmentation. */ + max_common_size /= num_allocs * 2; + max_common_size = + ROUND_DOWN_TO(max_common_size, + PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE)); + + return max_common_size; + } else if (fs_common_size == 0) { + return max_tiles_in_flight; + } + + pvr_finishme("Unimplemented path!!"); + + return 0; +} + +static void +pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer) +{ + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + const struct pvr_stage_allocation_uniform_state *uniform_shader_state = + &state->gfx_pipeline->fragment_shader_state.uniform_state; + const struct pvr_pds_upload *pds_coeff_program = + &state->gfx_pipeline->fragment_shader_state.pds_coeff_program; + const struct pvr_pipeline_stage_state *fragment_state = + &state->gfx_pipeline->fragment_shader_state.stage_state; + struct pvr_device_info *const dev_info = + &cmd_buffer->device->pdevice->dev_info; + struct pvr_emit_state *const emit_state = &state->emit_state; + struct pvr_ppp_state *const ppp_state = &state->ppp_state; + struct pvr_sub_cmd *sub_cmd = state->current_sub_cmd; + + const uint32_t pds_uniform_size = + DIV_ROUND_UP(uniform_shader_state->pds_info.data_size_in_dwords, + PVRX(TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE)); + + const uint32_t pds_varying_state_size = + DIV_ROUND_UP(pds_coeff_program->data_size, + PVRX(TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE)); + + const uint32_t usc_varying_size = + DIV_ROUND_UP(fragment_state->coefficient_size, + PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE)); + + const uint32_t pds_temp_size = + DIV_ROUND_UP(fragment_state->temps_count, + PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE)); + + const uint32_t usc_shared_size = + DIV_ROUND_UP(fragment_state->const_shared_reg_count, + PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE)); + + const uint32_t max_tiles_in_flight = + pvr_calc_fscommon_size_and_tiles_in_flight( + dev_info, + usc_shared_size * + PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE), + 1); + uint32_t size_info_mask; + uint32_t size_info2; + + if (max_tiles_in_flight < sub_cmd->gfx.max_tiles_in_flight) + sub_cmd->gfx.max_tiles_in_flight = max_tiles_in_flight; + + pvr_csb_pack (&ppp_state->pds.pixel_shader_base, + TA_STATE_PDS_SHADERBASE, + shader_base) { + const struct pvr_pds_upload *const pds_upload = + &state->gfx_pipeline->fragment_shader_state.pds_fragment_program; + + shader_base.addr.addr = pds_upload->data_offset; + } + + if (uniform_shader_state->pds_code.pvr_bo) { + pvr_csb_pack (&ppp_state->pds.texture_uniform_code_base, + TA_STATE_PDS_TEXUNICODEBASE, + tex_base) { + tex_base.addr.addr = uniform_shader_state->pds_code.code_offset; + } + } else { + ppp_state->pds.texture_uniform_code_base = 0U; + } + + pvr_csb_pack (&ppp_state->pds.size_info1, TA_STATE_PDS_SIZEINFO1, info1) { + info1.pds_uniformsize = pds_uniform_size; + info1.pds_texturestatesize = 0U; + info1.pds_varyingsize = pds_varying_state_size; + info1.usc_varyingsize = usc_varying_size; + info1.pds_tempsize = pds_temp_size; + } + + pvr_csb_pack (&size_info_mask, TA_STATE_PDS_SIZEINFO2, mask) { + mask.pds_tri_merge_disable = true; + } + + ppp_state->pds.size_info2 &= size_info_mask; + + pvr_csb_pack (&size_info2, TA_STATE_PDS_SIZEINFO2, info2) { + info2.usc_sharedsize = usc_shared_size; + } + + ppp_state->pds.size_info2 |= size_info2; + + if (pds_coeff_program->pvr_bo) { + state->emit_state.pds_fragment_stateptr1 = true; + + pvr_csb_pack (&ppp_state->pds.varying_base, + TA_STATE_PDS_VARYINGBASE, + base) { + base.addr.addr = pds_coeff_program->data_offset; + } + } else { + ppp_state->pds.varying_base = 0U; + } + + pvr_csb_pack (&ppp_state->pds.uniform_state_data_base, + TA_STATE_PDS_UNIFORMDATABASE, + base) { + base.addr.addr = state->pds_fragment_uniform_data_offset; + } + + emit_state->pds_fragment_stateptr0 = true; + emit_state->pds_fragment_stateptr3 = true; +} + +static void pvr_setup_viewport(struct pvr_cmd_buffer *const cmd_buffer) +{ + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + struct pvr_emit_state *const emit_state = &state->emit_state; + struct pvr_ppp_state *const ppp_state = &state->ppp_state; + + if (ppp_state->viewport_count != state->dynamic.common.viewport.count) { + ppp_state->viewport_count = state->dynamic.common.viewport.count; + emit_state->viewport = true; + } + + if (state->gfx_pipeline->raster_state.discard_enable) { + /* We don't want to emit any viewport data as it'll just get thrown + * away. It's after the previous condition because we still want to + * stash the viewport_count as it's our trigger for when + * rasterizer discard gets disabled. + */ + emit_state->viewport = false; + return; + } + + for (uint32_t i = 0; i < ppp_state->viewport_count; i++) { + VkViewport *viewport = &state->dynamic.common.viewport.viewports[i]; + uint32_t x_scale = fui(viewport->width * 0.5f); + uint32_t y_scale = fui(viewport->height * 0.5f); + uint32_t z_scale = fui(viewport->maxDepth - viewport->minDepth); + uint32_t x_center = fui(viewport->x + viewport->width * 0.5f); + uint32_t y_center = fui(viewport->y + viewport->height * 0.5f); + uint32_t z_center = fui(viewport->minDepth); + + if (ppp_state->viewports[i].a0 != x_center || + ppp_state->viewports[i].m0 != x_scale || + ppp_state->viewports[i].a1 != y_center || + ppp_state->viewports[i].m1 != y_scale || + ppp_state->viewports[i].a2 != z_center || + ppp_state->viewports[i].m2 != z_scale) { + ppp_state->viewports[i].a0 = x_center; + ppp_state->viewports[i].m0 = x_scale; + ppp_state->viewports[i].a1 = y_center; + ppp_state->viewports[i].m1 = y_scale; + ppp_state->viewports[i].a2 = z_center; + ppp_state->viewports[i].m2 = z_scale; + + emit_state->viewport = true; + } + } +} + +static void pvr_setup_ppp_control(struct pvr_cmd_buffer *const cmd_buffer) +{ + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline; + struct pvr_emit_state *const emit_state = &state->emit_state; + struct pvr_ppp_state *const ppp_state = &state->ppp_state; + uint32_t ppp_control; + + pvr_csb_pack (&ppp_control, TA_STATE_PPP_CTRL, control) { + const struct pvr_raster_state *raster_state = &gfx_pipeline->raster_state; + VkPrimitiveTopology topology = gfx_pipeline->input_asm_state.topology; + control.drawclippededges = true; + control.wclampen = true; + + if (topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) + control.flatshade_vtx = PVRX(TA_FLATSHADE_VTX_VERTEX_1); + else + control.flatshade_vtx = PVRX(TA_FLATSHADE_VTX_VERTEX_0); + + if (raster_state->depth_clamp_enable) + control.clip_mode = PVRX(TA_CLIP_MODE_NO_FRONT_OR_REAR); + else + control.clip_mode = PVRX(TA_CLIP_MODE_FRONT_REAR); + + /* +--- FrontIsCCW? + * | +--- Cull Front? + * v v + * 0|0 CULLMODE_CULL_CCW, + * 0|1 CULLMODE_CULL_CW, + * 1|0 CULLMODE_CULL_CW, + * 1|1 CULLMODE_CULL_CCW, + */ + switch (raster_state->cull_mode) { + case VK_CULL_MODE_BACK_BIT: + case VK_CULL_MODE_FRONT_BIT: + if ((raster_state->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE) ^ + (raster_state->cull_mode == VK_CULL_MODE_FRONT_BIT)) { + control.cullmode = PVRX(TA_CULLMODE_CULL_CW); + } else { + control.cullmode = PVRX(TA_CULLMODE_CULL_CCW); + } + + break; + + case VK_CULL_MODE_NONE: + control.cullmode = PVRX(TA_CULLMODE_NO_CULLING); + break; + + default: + unreachable("Unsupported cull mode!"); + } + } + + if (ppp_control != ppp_state->ppp_control) { + ppp_state->ppp_control = ppp_control; + emit_state->ppp_control = true; + } +} + +/* Largest valid PPP State update in words = 31 + * 1 - Header + * 3 - Stream Out Config words 0, 1 and 2 + * 1 - PPP Control word + * 3 - Varying Config words 0, 1 and 2 + * 1 - Output Select + * 1 - WClamp + * 6 - Viewport Transform words + * 2 - Region Clip words + * 3 - PDS State for fragment phase (PDSSTATEPTR 1-3) + * 4 - PDS State for fragment phase (PDSSTATEPTR0) + * 6 - ISP Control Words + */ +#define PVR_MAX_PPP_STATE_DWORDS 31 + +static VkResult pvr_emit_ppp_state(struct pvr_cmd_buffer *const cmd_buffer) +{ + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + struct pvr_emit_state *const emit_state = &state->emit_state; + struct pvr_ppp_state *const ppp_state = &state->ppp_state; + struct pvr_csb *const control_stream = + &state->current_sub_cmd->gfx.control_stream; + uint32_t ppp_state_words[PVR_MAX_PPP_STATE_DWORDS]; + uint32_t ppp_state_words_count; + uint32_t ppp_state_header; + bool deferred_secondary; + struct pvr_bo *pvr_bo; + uint32_t *buffer_ptr; + VkResult result; + + buffer_ptr = ppp_state_words; + + pvr_csb_pack (&ppp_state_header, TA_STATE_HEADER, header) { + header.view_port_count = (ppp_state->viewport_count == 0) + ? 0U + : (ppp_state->viewport_count - 1); + + /* Skip over header. */ + buffer_ptr++; + + /* Set ISP state. */ + if (emit_state->isp) { + header.pres_ispctl = true; + *buffer_ptr++ = ppp_state->isp.control; + header.pres_ispctl_fa = true; + *buffer_ptr++ = ppp_state->isp.front_a; + + if (emit_state->isp_fb) { + header.pres_ispctl_fb = true; + *buffer_ptr++ = ppp_state->isp.front_b; + } + + if (emit_state->isp_ba) { + header.pres_ispctl_ba = true; + *buffer_ptr++ = ppp_state->isp.back_a; + } + + if (emit_state->isp_bb) { + header.pres_ispctl_bb = true; + *buffer_ptr++ = ppp_state->isp.back_b; + } + } + + /* Depth bias / scissor + * If deferred_secondary is true then we do a separate state update + * which gets patched in ExecuteDeferredCommandBuffer. + */ + /* TODO: Update above comment when we port ExecuteDeferredCommandBuffer. + */ + deferred_secondary = + cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && + cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; + + if (emit_state->isp_dbsc && !deferred_secondary) { + header.pres_ispctl_dbsc = true; + + pvr_csb_pack (buffer_ptr++, TA_STATE_ISPDBSC, ispdbsc) { + ispdbsc.dbindex = + ppp_state->depthbias_scissor_indices.depthbias_index; + ispdbsc.scindex = + ppp_state->depthbias_scissor_indices.scissor_index; + } + } + + /* PDS state. */ + if (emit_state->pds_fragment_stateptr0) { + header.pres_pds_state_ptr0 = true; + + *buffer_ptr++ = ppp_state->pds.pixel_shader_base; + *buffer_ptr++ = ppp_state->pds.texture_uniform_code_base; + *buffer_ptr++ = ppp_state->pds.size_info1; + *buffer_ptr++ = ppp_state->pds.size_info2; + } + + if (emit_state->pds_fragment_stateptr1) { + header.pres_pds_state_ptr1 = true; + *buffer_ptr++ = ppp_state->pds.varying_base; + } + + /* We don't use the pds_fragment_stateptr2 (texture state programs) + * control word, but this doesn't mean we need to set it to 0. This is + * because the hardware runs the texture state program only when the + * pds_texture state field of PDS_SIZEINFO1 is non-zero. + */ + + if (emit_state->pds_fragment_stateptr3) { + header.pres_pds_state_ptr3 = true; + *buffer_ptr++ = ppp_state->pds.uniform_state_data_base; + } + + /* Region clip. */ + if (emit_state->region_clip) { + header.pres_region_clip = true; + *buffer_ptr++ = ppp_state->region_clipping.word0; + *buffer_ptr++ = ppp_state->region_clipping.word1; + } + + /* Viewport. */ + if (emit_state->viewport) { + const uint32_t viewports = MAX2(1, ppp_state->viewport_count); + + header.pres_viewport = true; + for (uint32_t i = 0; i < viewports; i++) { + *buffer_ptr++ = ppp_state->viewports[i].a0; + *buffer_ptr++ = ppp_state->viewports[i].m0; + *buffer_ptr++ = ppp_state->viewports[i].a1; + *buffer_ptr++ = ppp_state->viewports[i].m1; + *buffer_ptr++ = ppp_state->viewports[i].a2; + *buffer_ptr++ = ppp_state->viewports[i].m2; + } + } + + /* W clamp. */ + if (emit_state->wclamp) { + const float wclamp = 0.00001f; + + header.pres_wclamp = true; + *buffer_ptr++ = fui(wclamp); + } + + /* Output selects. */ + if (emit_state->output_selects) { + header.pres_outselects = true; + *buffer_ptr++ = ppp_state->output_selects; + } + + /* Varying words. */ + if (emit_state->varying_word0) { + header.pres_varying_word0 = true; + *buffer_ptr++ = ppp_state->varying_word[0]; + } + + if (emit_state->varying_word1) { + header.pres_varying_word1 = true; + *buffer_ptr++ = ppp_state->varying_word[1]; + } + + if (emit_state->varying_word2) { + /* We only emit this on the first draw of a render job to prevent us + * from inheriting a non-zero value set elsewhere. + */ + header.pres_varying_word2 = true; + *buffer_ptr++ = 0; + } + + /* PPP control. */ + if (emit_state->ppp_control) { + header.pres_ppp_ctrl = true; + *buffer_ptr++ = ppp_state->ppp_control; + } + + if (emit_state->stream_out) { + /* We only emit this on the first draw of a render job to prevent us + * from inheriting a non-zero value set elsewhere. + */ + header.pres_stream_out_size = true; + *buffer_ptr++ = 0; + } + } + + if (!ppp_state_header) + return VK_SUCCESS; + + ppp_state_words_count = buffer_ptr - ppp_state_words; + ppp_state_words[0] = ppp_state_header; + + result = pvr_cmd_buffer_alloc_mem(cmd_buffer, + cmd_buffer->device->heaps.general_heap, + ppp_state_words_count * sizeof(uint32_t), + PVR_BO_ALLOC_FLAG_CPU_MAPPED, + &pvr_bo); + if (result != VK_SUCCESS) + return result; + + memcpy(pvr_bo->bo->map, + ppp_state_words, + ppp_state_words_count * sizeof(uint32_t)); + + /* Write the VDM state update into the VDM control stream. */ + pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE0, state0) { + state0.word_count = ppp_state_words_count; + state0.addrmsb = pvr_bo->vma->dev_addr; + } + + pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE1, state1) { + state1.addrlsb = pvr_bo->vma->dev_addr; + } + + if (emit_state->isp_dbsc && + cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + pvr_finishme("Unimplemented path!!"); + } + + state->emit_state_bits = 0; + + return VK_SUCCESS; +} + +static VkResult +pvr_emit_dirty_ppp_state(struct pvr_cmd_buffer *const cmd_buffer) +{ + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline; + const bool dirty_stencil = state->dirty.compare_mask || + state->dirty.write_mask || state->dirty.reference; + VkResult result; + + if (!(dirty_stencil || state->dirty.depth_bias || + state->dirty.fragment_descriptors || state->dirty.line_width || + state->dirty.gfx_pipeline_binding || state->dirty.scissor || + state->dirty.userpass_spawn || state->dirty.viewport || + state->emit_state_bits)) { + return VK_SUCCESS; + } + + if (state->dirty.gfx_pipeline_binding) { + struct pvr_cmd_struct(TA_STATE_ISPA) ispa; + + pvr_setup_output_select(cmd_buffer); + pvr_setup_isp_faces_and_control(cmd_buffer, &ispa); + pvr_setup_triangle_merging_flag(cmd_buffer, &ispa); + } else if (dirty_stencil || state->dirty.line_width || + state->dirty.userpass_spawn) { + pvr_setup_isp_faces_and_control(cmd_buffer, NULL); + } + + if (!gfx_pipeline->raster_state.discard_enable && + state->dirty.fragment_descriptors && + gfx_pipeline->fragment_shader_state.bo) { + pvr_setup_fragment_state_pointers(cmd_buffer); + } + + pvr_setup_isp_depth_bias_scissor_state(cmd_buffer); + + if (state->dirty.viewport) + pvr_setup_viewport(cmd_buffer); + + pvr_setup_ppp_control(cmd_buffer); + + if (gfx_pipeline->raster_state.cull_mode == VK_CULL_MODE_FRONT_AND_BACK) { + /* FIXME: Port SetNegativeViewport(). */ + } + + result = pvr_emit_ppp_state(cmd_buffer); + if (result != VK_SUCCESS) + return result; + + return VK_SUCCESS; +} + +static void +pvr_validate_push_descriptors(struct pvr_cmd_buffer *cmd_buffer, + bool *const push_descriptors_dirty_out) +{ + /* TODO: Implement this function, based on ValidatePushDescriptors. */ + pvr_finishme("Add support for push descriptors!"); + *push_descriptors_dirty_out = false; +} + +static void +pvr_calculate_vertex_cam_size(const struct pvr_device_info *dev_info, + const uint32_t vs_output_size, + const bool raster_enable, + uint32_t *const cam_size_out, + uint32_t *const vs_max_instances_out) +{ + /* First work out the size of a vertex in the UVS and multiply by 4 for + * column ordering. + */ + const uint32_t uvs_vertex_vector_size_in_dwords = + (vs_output_size + 1U + raster_enable * 4U) * 4U; + const uint32_t vdm_cam_size = + PVR_GET_FEATURE_VALUE(dev_info, vdm_cam_size, 32U); + + /* This is a proxy for 8XE. */ + if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) && + vdm_cam_size < 96U) { + /* Comparisons are based on size including scratch per vertex vector. */ + if (uvs_vertex_vector_size_in_dwords < (14U * 4U)) { + *cam_size_out = MIN2(31U, vdm_cam_size - 1U); + *vs_max_instances_out = 16U; + } else if (uvs_vertex_vector_size_in_dwords < (20U * 4U)) { + *cam_size_out = 15U; + *vs_max_instances_out = 16U; + } else if (uvs_vertex_vector_size_in_dwords < (28U * 4U)) { + *cam_size_out = 11U; + *vs_max_instances_out = 12U; + } else if (uvs_vertex_vector_size_in_dwords < (44U * 4U)) { + *cam_size_out = 7U; + *vs_max_instances_out = 8U; + } else if (PVR_HAS_FEATURE(dev_info, + simple_internal_parameter_format_v2) || + uvs_vertex_vector_size_in_dwords < (64U * 4U)) { + *cam_size_out = 7U; + *vs_max_instances_out = 4U; + } else { + *cam_size_out = 3U; + *vs_max_instances_out = 2U; + } + } else { + /* Comparisons are based on size including scratch per vertex vector. */ + if (uvs_vertex_vector_size_in_dwords <= (32U * 4U)) { + /* output size <= 27 + 5 scratch. */ + *cam_size_out = MIN2(95U, vdm_cam_size - 1U); + *vs_max_instances_out = 0U; + } else if (uvs_vertex_vector_size_in_dwords <= 48U * 4U) { + /* output size <= 43 + 5 scratch */ + *cam_size_out = 63U; + if (PVR_GET_FEATURE_VALUE(dev_info, uvs_vtx_entries, 144U) < 288U) + *vs_max_instances_out = 16U; + else + *vs_max_instances_out = 0U; + } else if (uvs_vertex_vector_size_in_dwords <= 64U * 4U) { + /* output size <= 59 + 5 scratch. */ + *cam_size_out = 31U; + if (PVR_GET_FEATURE_VALUE(dev_info, uvs_vtx_entries, 144U) < 288U) + *vs_max_instances_out = 16U; + else + *vs_max_instances_out = 0U; + } else { + *cam_size_out = 15U; + *vs_max_instances_out = 16U; + } + } +} + +static void +pvr_emit_dirty_vdm_state(const struct pvr_cmd_buffer *const cmd_buffer) +{ + /* FIXME: Assume all state is dirty for the moment. */ + struct pvr_device_info *const dev_info = + &cmd_buffer->device->pdevice->dev_info; + ASSERTED const uint32_t max_user_vertex_output_components = + pvr_get_max_user_vertex_output_components(dev_info); + struct pvr_cmd_struct(VDMCTRL_VDM_STATE0) + header = { pvr_cmd_header(VDMCTRL_VDM_STATE0) }; + const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline; + struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream; + uint32_t vs_output_size; + uint32_t max_instances; + uint32_t cam_size; + + assert(gfx_pipeline); + + /* CAM Calculations and HW state take vertex size aligned to DWORDS. */ + vs_output_size = + DIV_ROUND_UP(gfx_pipeline->vertex_shader_state.vertex_output_size, + PVRX(VDMCTRL_VDM_STATE4_VS_OUTPUT_SIZE_UNIT_SIZE)); + + assert(vs_output_size <= max_user_vertex_output_components); + + pvr_calculate_vertex_cam_size(dev_info, + vs_output_size, + true, + &cam_size, + &max_instances); + + pvr_csb_emit (csb, VDMCTRL_VDM_STATE0, state0) { + state0.cam_size = cam_size; + + if (gfx_pipeline->input_asm_state.primitive_restart) { + state0.cut_index_enable = true; + state0.cut_index_present = true; + } + + switch (gfx_pipeline->input_asm_state.topology) { + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: + state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_1); + break; + + default: + state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_0); + break; + } + + /* If we've bound a different vertex buffer, or this draw-call requires + * a different PDS attrib data-section from the last draw call (changed + * base_instance) then we need to specify a new data section. This is + * also the case if we've switched pipeline or attrib program as the + * data-section layout will be different. + */ + state0.vs_data_addr_present = + state->dirty.gfx_pipeline_binding || state->dirty.vertex_bindings || + state->dirty.draw_base_instance || state->dirty.draw_variant; + + /* Need to specify new PDS Attrib program if we've bound a different + * pipeline or we needed a different PDS Attrib variant for this + * draw-call. + */ + state0.vs_other_present = state->dirty.gfx_pipeline_binding || + state->dirty.draw_variant; + + /* UVB_SCRATCH_SELECT_ONE with no rasterization is only valid when + * stream output is enabled. We use UVB_SCRATCH_SELECT_FIVE because + * Vulkan doesn't support stream output and the vertex position is + * always emitted to the UVB. + */ + state0.uvs_scratch_size_select = + PVRX(VDMCTRL_UVS_SCRATCH_SIZE_SELECT_FIVE); + + header = state0; + } + + if (header.cut_index_present) { + pvr_csb_emit (csb, VDMCTRL_VDM_STATE1, state1) { + switch (state->index_buffer_binding.type) { + case VK_INDEX_TYPE_UINT32: + /* FIXME: Defines for these? These seem to come from the Vulkan + * spec. for VkPipelineInputAssemblyStateCreateInfo + * primitiveRestartEnable. + */ + state1.cut_index = 0xFFFFFFFF; + break; + + case VK_INDEX_TYPE_UINT16: + state1.cut_index = 0xFFFF; + break; + + default: + unreachable(!"Invalid index type"); + } + } + } + + if (header.vs_data_addr_present) { + pvr_csb_emit (csb, VDMCTRL_VDM_STATE2, state2) { + state2.vs_pds_data_base_addr.addr = state->pds_vertex_attrib_offset; + } + } + + if (header.vs_other_present) { + const uint32_t usc_unified_store_size_in_bytes = + gfx_pipeline->vertex_shader_state.vertex_input_size << 2; + + pvr_csb_emit (csb, VDMCTRL_VDM_STATE3, state3) { + state3.vs_pds_code_base_addr.addr = state->pds_shader.code_offset; + } + + pvr_csb_emit (csb, VDMCTRL_VDM_STATE4, state4) { + state4.vs_output_size = vs_output_size; + } + + pvr_csb_emit (csb, VDMCTRL_VDM_STATE5, state5) { + state5.vs_max_instances = max_instances; + state5.vs_usc_common_size = 0U; + state5.vs_usc_unified_size = DIV_ROUND_UP( + usc_unified_store_size_in_bytes, + PVRX(VDMCTRL_VDM_STATE5_VS_USC_UNIFIED_SIZE_UNIT_SIZE)); + state5.vs_pds_temp_size = + DIV_ROUND_UP(state->pds_shader.info->temps_required << 2, + PVRX(VDMCTRL_VDM_STATE5_VS_PDS_TEMP_SIZE_UNIT_SIZE)); + state5.vs_pds_data_size = + DIV_ROUND_UP(state->pds_shader.info->data_size_in_dwords << 2, + PVRX(VDMCTRL_VDM_STATE5_VS_PDS_DATA_SIZE_UNIT_SIZE)); + } + } +} + +static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer) +{ + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline; + const struct pvr_pipeline_stage_state *const fragment_state = + &gfx_pipeline->fragment_shader_state.stage_state; + struct pvr_sub_cmd *sub_cmd; + bool fstencil_writemask_zero; + bool bstencil_writemask_zero; + bool push_descriptors_dirty; + bool fstencil_keep; + bool bstencil_keep; + VkResult result; + + pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS); + + sub_cmd = state->current_sub_cmd; + sub_cmd->gfx.empty_cmd = false; + + /* Determine pipeline depth/stencil usage. If a pipeline uses depth or + * stencil testing, those attachments are using their loaded values, and + * the loadOps cannot be optimized out. + */ + /* Pipeline uses depth testing. */ + if (sub_cmd->gfx.depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED && + gfx_pipeline->depth_compare_op != VK_COMPARE_OP_ALWAYS) { + sub_cmd->gfx.depth_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED; + } + + /* Pipeline uses stencil testing. */ + if (sub_cmd->gfx.stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED && + (gfx_pipeline->stencil_front.compare_op != VK_COMPARE_OP_ALWAYS || + gfx_pipeline->stencil_back.compare_op != VK_COMPARE_OP_ALWAYS)) { + sub_cmd->gfx.stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED; + } + + if (PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info, + compute_overlap)) { + uint32_t coefficient_size = + DIV_ROUND_UP(fragment_state->coefficient_size, + PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE)); + + if (coefficient_size > + PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_MAX_SIZE)) + sub_cmd->gfx.disable_compute_overlap = true; + } + + sub_cmd->gfx.frag_uses_atomic_ops |= fragment_state->uses_atomic_ops; + sub_cmd->gfx.frag_has_side_effects |= fragment_state->has_side_effects; + sub_cmd->gfx.frag_uses_texture_rw |= fragment_state->uses_texture_rw; + sub_cmd->gfx.vertex_uses_texture_rw |= + gfx_pipeline->vertex_shader_state.stage_state.uses_texture_rw; + + fstencil_keep = + (gfx_pipeline->stencil_front.fail_op == VK_STENCIL_OP_KEEP) && + (gfx_pipeline->stencil_front.pass_op == VK_STENCIL_OP_KEEP); + bstencil_keep = (gfx_pipeline->stencil_back.fail_op == VK_STENCIL_OP_KEEP) && + (gfx_pipeline->stencil_back.pass_op == VK_STENCIL_OP_KEEP); + fstencil_writemask_zero = (state->dynamic.common.write_mask.front == 0); + bstencil_writemask_zero = (state->dynamic.common.write_mask.back == 0); + + /* Set stencil modified flag if: + * - Neither front nor back-facing stencil has a fail_op/pass_op of KEEP. + * - Neither front nor back-facing stencil has a write_mask of zero. + */ + if (!(fstencil_keep && bstencil_keep) && + !(fstencil_writemask_zero && bstencil_writemask_zero)) { + sub_cmd->gfx.modifies_stencil = true; + } + + /* Set depth modified flag if depth write is enabled. */ + if (!gfx_pipeline->depth_write_disable) + sub_cmd->gfx.modifies_depth = true; + + /* If either the data or code changes for pds vertex attribs, regenerate the + * data segment. + */ + if (state->dirty.vertex_bindings || state->dirty.gfx_pipeline_binding || + state->dirty.draw_variant || state->dirty.draw_base_instance) { + enum pvr_pds_vertex_attrib_program_type prog_type; + const struct pvr_pds_attrib_program *program; + + if (state->draw_state.draw_indirect) + prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT; + else if (state->draw_state.base_instance) + prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE; + else + prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC; + + program = + &gfx_pipeline->vertex_shader_state.pds_attrib_programs[prog_type]; + state->pds_shader.info = &program->info; + state->pds_shader.code_offset = program->program.code_offset; + + state->max_shared_regs = + MAX2(state->max_shared_regs, pvr_calc_shared_regs_count(gfx_pipeline)); + + pvr_setup_vertex_buffers(cmd_buffer, gfx_pipeline); + } + + /* TODO: Check for dirty push constants */ + + pvr_validate_push_descriptors(cmd_buffer, &push_descriptors_dirty); + + state->dirty.vertex_descriptors = push_descriptors_dirty || + state->dirty.gfx_pipeline_binding; + state->dirty.fragment_descriptors = state->dirty.vertex_descriptors; + + if (state->dirty.fragment_descriptors) { + result = pvr_setup_descriptor_mappings( + cmd_buffer, + PVR_STAGE_ALLOCATION_FRAGMENT, + &state->gfx_pipeline->fragment_shader_state.uniform_state, + &state->pds_fragment_uniform_data_offset); + if (result != VK_SUCCESS) { + mesa_loge("Could not setup fragment descriptor mappings."); + return result; + } + } + + if (state->dirty.vertex_descriptors) { + uint32_t pds_vertex_uniform_data_offset; + + result = pvr_setup_descriptor_mappings( + cmd_buffer, + PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY, + &state->gfx_pipeline->vertex_shader_state.uniform_state, + &pds_vertex_uniform_data_offset); + if (result != VK_SUCCESS) { + mesa_loge("Could not setup vertex descriptor mappings."); + return result; + } + + pvr_emit_dirty_pds_state(cmd_buffer, pds_vertex_uniform_data_offset); + } + + pvr_emit_dirty_ppp_state(cmd_buffer); + pvr_emit_dirty_vdm_state(cmd_buffer); + + state->dirty.gfx_desc_dirty = false; + state->dirty.blend_constants = false; + state->dirty.compare_mask = false; + state->dirty.depth_bias = false; + state->dirty.draw_base_instance = false; + state->dirty.draw_variant = false; + state->dirty.fragment_descriptors = false; + state->dirty.line_width = false; + state->dirty.gfx_pipeline_binding = false; + state->dirty.reference = false; + state->dirty.scissor = false; + state->dirty.userpass_spawn = false; + state->dirty.vertex_bindings = false; + state->dirty.viewport = false; + state->dirty.write_mask = false; + + return VK_SUCCESS; +} + +static uint32_t pvr_get_hw_primitive_topology(VkPrimitiveTopology topology) +{ + switch (topology) { + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: + return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_POINT_LIST); + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: + return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_LIST); + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: + return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_STRIP); + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: + return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST); + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: + return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP); + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: + return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_FAN); + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: + return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ); + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: + return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ); + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: + return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST_ADJ); + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: + return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP_ADJ); + case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: + return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_PATCH_LIST); + default: + unreachable("Undefined primitive topology"); + } +} + +static void pvr_emit_vdm_index_list(struct pvr_cmd_buffer *cmd_buffer, + VkPrimitiveTopology topology, + uint32_t first_vertex, + uint32_t vertex_count, + uint32_t first_index, + uint32_t index_count, + uint32_t instance_count) +{ + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream; + struct pvr_cmd_struct(VDMCTRL_INDEX_LIST0) + list_hdr = { pvr_cmd_header(VDMCTRL_INDEX_LIST0) }; + pvr_dev_addr_t index_buffer_addr = { 0 }; + unsigned int index_stride = 0; + + pvr_csb_emit (csb, VDMCTRL_INDEX_LIST0, list0) { + list0.primitive_topology = pvr_get_hw_primitive_topology(topology); + + /* First instance is not handled in the VDM state, it's implemented as + * an addition in the PDS vertex fetch. + */ + list0.index_count_present = true; + + if (instance_count > 1) + list0.index_instance_count_present = true; + + if (first_vertex != 0) + list0.index_offset_present = true; + + if (state->draw_state.draw_indexed) { + struct pvr_buffer *buffer = state->index_buffer_binding.buffer; + + switch (state->index_buffer_binding.type) { + default: + unreachable("Invalid index type"); + FALLTHROUGH; + + case VK_INDEX_TYPE_UINT32: + list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B32); + index_stride = 4; + break; + + case VK_INDEX_TYPE_UINT16: + list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B16); + index_stride = 2; + break; + } + + list0.index_addr_present = true; + index_buffer_addr.addr = buffer->dev_addr.addr; + index_buffer_addr.addr += state->index_buffer_binding.offset; + index_buffer_addr.addr += first_index * index_stride; + list0.index_base_addrmsb = index_buffer_addr; + } + + list_hdr = list0; + } + + if (list_hdr.index_addr_present) { + pvr_csb_emit (csb, VDMCTRL_INDEX_LIST1, list1) { + list1.index_base_addrlsb = index_buffer_addr; + } + } + + if (list_hdr.index_count_present) { + pvr_csb_emit (csb, VDMCTRL_INDEX_LIST2, list2) { + list2.index_count = vertex_count | index_count; + } + } + + if (list_hdr.index_instance_count_present) { + pvr_csb_emit (csb, VDMCTRL_INDEX_LIST3, list3) { + list3.instance_count = instance_count - 1; + } + } + + if (list_hdr.index_offset_present) { + pvr_csb_emit (csb, VDMCTRL_INDEX_LIST4, list4) { + list4.index_offset = first_vertex; + } + } + + /* TODO: See if we need list_words[5-9]. */ +} + +void pvr_CmdDrawIndexed(VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_cmd_buffer_draw_state draw_state; + VkResult result; + + PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); + + draw_state.base_vertex = vertexOffset; + draw_state.base_instance = firstInstance; + draw_state.draw_indirect = false; + draw_state.draw_indexed = true; + pvr_update_draw_state(&cmd_buffer->state, &draw_state); + + result = pvr_validate_draw_state(cmd_buffer); + if (result != VK_SUCCESS) + return; + + /* Write the VDM control stream for the primitive. */ + pvr_emit_vdm_index_list(cmd_buffer, + state->gfx_pipeline->input_asm_state.topology, + vertexOffset, + 0, + firstIndex, + indexCount, + instanceCount); +} + +void pvr_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdDrawIndirect(VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + assert(!"Unimplemented"); +} + +static VkResult +pvr_resolve_unemitted_resolve_attachments(struct pvr_cmd_buffer *cmd_buffer) +{ + pvr_finishme("Add attachment resolve support!"); + return pvr_cmd_buffer_end_sub_cmd(cmd_buffer); +} + +void pvr_CmdEndRenderPass2(VkCommandBuffer commandBuffer, + const VkSubpassEndInfoKHR *pSubpassEndInfo) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_image_view **attachments; + VkClearValue *clear_values; + VkResult result; + + PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); + + assert(state->render_pass_info.pass); + assert(state->render_pass_info.framebuffer); + + /* TODO: Investigate why pvr_cmd_buffer_end_sub_cmd/EndSubCommand is called + * twice in this path, one here and one from + * pvr_resolve_unemitted_resolve_attachments. + */ + result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer); + if (result != VK_SUCCESS) + return; + + result = pvr_resolve_unemitted_resolve_attachments(cmd_buffer); + if (result != VK_SUCCESS) + return; + + /* Save the required fields before clearing render_pass_info struct. */ + attachments = state->render_pass_info.attachments; + clear_values = state->render_pass_info.clear_values; + + memset(&state->render_pass_info, 0, sizeof(state->render_pass_info)); + + state->render_pass_info.attachments = attachments; + state->render_pass_info.clear_values = clear_values; +} + +void pvr_CmdExecuteCommands(VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer *pCommandBuffers) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdNextSubpass2(VkCommandBuffer commandBuffer, + const VkSubpassBeginInfo *pSubpassBeginInfo, + const VkSubpassEndInfo *pSubpassEndInfo) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdPipelineBarrier2KHR(VkCommandBuffer commandBuffer, + const VkDependencyInfoKHR *pDependencyInfo) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdResetEvent2KHR(VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags2KHR stageMask) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdSetEvent2KHR(VkCommandBuffer commandBuffer, + VkEvent _event, + const VkDependencyInfoKHR *pDependencyInfo) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdWaitEvents2KHR(VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent *pEvents, + const VkDependencyInfoKHR *pDependencyInfos) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdWriteTimestamp2KHR(VkCommandBuffer commandBuffer, + VkPipelineStageFlags2KHR stage, + VkQueryPool queryPool, + uint32_t query) +{ + unreachable("Timestamp queries are not supported."); +} + +VkResult pvr_EndCommandBuffer(VkCommandBuffer commandBuffer) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + VkResult result; + + /* From the Vulkan 1.0 spec: + * + * CommandBuffer must be in the recording state. + */ + assert(cmd_buffer->status == PVR_CMD_BUFFER_STATUS_RECORDING); + + if (state->status != VK_SUCCESS) + return state->status; + + result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer); + if (result != VK_SUCCESS) + return result; + + cmd_buffer->status = PVR_CMD_BUFFER_STATUS_EXECUTABLE; + + return VK_SUCCESS; +} diff --git a/src/imagination/vulkan/pvr_csb.c b/src/imagination/vulkan/pvr_csb.c new file mode 100644 index 00000000000..64c734d5a3b --- /dev/null +++ b/src/imagination/vulkan/pvr_csb.c @@ -0,0 +1,281 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * based in part on v3dv_cl.c which is: + * Copyright © 2019 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "hwdef/rogue_hw_utils.h" +#include "pvr_bo.h" +#include "pvr_csb.h" +#include "pvr_device_info.h" +#include "pvr_private.h" +#include "vk_log.h" + +/** + * \file pvr_csb.c + * + * \brief Contains functions to manage Control Stream Builder (csb) object. + * + * A csb object can be used to create a primary/main control stream, referred + * as control stream hereafter, or a secondary control stream, also referred as + * a sub control stream. The main difference between these is that, the control + * stream is the one directly submitted to the GPU and is terminated using + * STREAM_TERMINATE. Whereas, the secondary control stream can be thought of as + * an independent set of commands that can be referenced by a primary control + * stream to avoid duplication and is instead terminated using STREAM_RETURN, + * which means the control stream parser should return to the main stream it + * came from. + * + * Note: Sub control stream is only supported for PVR_CMD_STREAM_TYPE_GRAPHICS + * type control streams. + */ + +/** + * \brief Size of the individual csb buffer object. + */ +#define PVR_CMD_BUFFER_CSB_BO_SIZE 4096 + +/** + * \brief Initializes the csb object. + * + * \param[in] device Logical device pointer. + * \param[in] csb Control Stream Builder object to initialize. + * + * \sa #pvr_csb_finish() + */ +void pvr_csb_init(struct pvr_device *device, + enum pvr_cmd_stream_type stream_type, + struct pvr_csb *csb) +{ + csb->start = NULL; + csb->next = NULL; + csb->pvr_bo = NULL; + csb->end = NULL; + csb->device = device; + csb->stream_type = stream_type; + csb->status = VK_SUCCESS; + list_inithead(&csb->pvr_bo_list); +} + +/** + * \brief Frees the resources associated with the csb object. + * + * \param[in] csb Control Stream Builder object to free. + * + * \sa #pvr_csb_init() + */ +void pvr_csb_finish(struct pvr_csb *csb) +{ + list_for_each_entry_safe (struct pvr_bo, pvr_bo, &csb->pvr_bo_list, link) { + list_del(&pvr_bo->link); + pvr_bo_free(csb->device, pvr_bo); + } + + /* Leave the csb in a reset state to catch use after destroy instances */ + pvr_csb_init(NULL, PVR_CMD_STREAM_TYPE_INVALID, csb); +} + +/** + * \brief Helper function to extend csb memory. + * + * Allocates a new buffer object and links it with the previous buffer object + * using STREAM_LINK dwords and updates csb object to use the new buffer. + * + * To make sure that we have enough space to emit STREAM_LINK dwords in the + * current buffer, a few bytes are reserved at the end, every time a buffer is + * created. Every time we allocate a new buffer we fix the current buffer in use + * to emit the stream link dwords. This makes sure that when + * #pvr_csb_alloc_dwords() is called from #pvr_csb_emit() to add STREAM_LINK0 + * and STREAM_LINK1, it succeeds without trying to allocate new pages. + * + * \param[in] csb Control Stream Builder object to extend. + * \return true on success and false otherwise. + */ +static bool pvr_csb_buffer_extend(struct pvr_csb *csb) +{ + const uint8_t stream_link_space = (pvr_cmd_length(VDMCTRL_STREAM_LINK0) + + pvr_cmd_length(VDMCTRL_STREAM_LINK1)) * + 4; + const uint32_t cache_line_size = + rogue_get_slc_cache_line_size(&csb->device->pdevice->dev_info); + struct pvr_bo *pvr_bo; + VkResult result; + + /* Make sure extra space allocated for stream links is sufficient for both + * stream types. + */ + STATIC_ASSERT((pvr_cmd_length(VDMCTRL_STREAM_LINK0) + + pvr_cmd_length(VDMCTRL_STREAM_LINK1)) == + (pvr_cmd_length(CDMCTRL_STREAM_LINK0) + + pvr_cmd_length(CDMCTRL_STREAM_LINK1))); + + result = pvr_bo_alloc(csb->device, + csb->device->heaps.general_heap, + PVR_CMD_BUFFER_CSB_BO_SIZE, + cache_line_size, + PVR_BO_ALLOC_FLAG_CPU_MAPPED, + &pvr_bo); + if (result != VK_SUCCESS) { + vk_error(csb->device, result); + csb->status = result; + return false; + } + + /* Chain to the old BO if this is not the first BO in csb */ + if (csb->pvr_bo) { + csb->end += stream_link_space; + assert(csb->next + stream_link_space <= csb->end); + + switch (csb->stream_type) { + case PVR_CMD_STREAM_TYPE_GRAPHICS: + pvr_csb_emit (csb, VDMCTRL_STREAM_LINK0, link) { + link.link_addrmsb = pvr_bo->vma->dev_addr; + } + + pvr_csb_emit (csb, VDMCTRL_STREAM_LINK1, link) { + link.link_addrlsb = pvr_bo->vma->dev_addr; + } + + break; + + case PVR_CMD_STREAM_TYPE_COMPUTE: + pvr_csb_emit (csb, CDMCTRL_STREAM_LINK0, link) { + link.link_addrmsb = pvr_bo->vma->dev_addr; + } + + pvr_csb_emit (csb, CDMCTRL_STREAM_LINK1, link) { + link.link_addrlsb = pvr_bo->vma->dev_addr; + } + + break; + + default: + unreachable("Unknown stream type"); + break; + } + } + + csb->pvr_bo = pvr_bo; + csb->start = pvr_bo->bo->map; + + /* Reserve stream link size at the end to make sure we don't run out of + * space when a stream link is required. + */ + csb->end = csb->start + pvr_bo->bo->size - stream_link_space; + csb->next = csb->start; + + list_addtail(&pvr_bo->link, &csb->pvr_bo_list); + + return true; +} + +/** + * \brief Provides a chunk of memory from the current csb buffer. In cases where + * the buffer is not able to fulfill the required amount of memory, + * #pvr_csb_buffer_extend() is called to allocate a new buffer. Maximum size + * allocable in bytes is #PVR_CMD_BUFFER_CSB_BO_SIZE - size of STREAM_LINK0 + * and STREAM_LINK1 dwords. + * + * \param[in] csb Control Stream Builder object to allocate from. + * \param[in] num_dwords Number of dwords to allocate. + * \return Valid host virtual address or NULL otherwise. + */ +void *pvr_csb_alloc_dwords(struct pvr_csb *csb, uint32_t num_dwords) +{ + const uint32_t required_space = num_dwords * 4; + + if (csb->status != VK_SUCCESS) + return NULL; + + if (csb->next + required_space > csb->end) { + bool ret = pvr_csb_buffer_extend(csb); + if (!ret) + return NULL; + } + + void *p = csb->next; + + csb->next += required_space; + assert(csb->next <= csb->end); + + return p; +} + +/** + * \brief Adds VDMCTRL_STREAM_RETURN dword into the control stream pointed by + * csb object. Given a VDMCTRL_STREAM_RETURN marks the end of the sub control + * stream, we return the status of the control stream as well. + * + * \param[in] csb Control Stream Builder object to add VDMCTRL_STREAM_RETURN to. + * \return VK_SUCCESS on success, or error code otherwise. + */ +VkResult pvr_csb_emit_return(struct pvr_csb *csb) +{ + /* STREAM_RETURN is only supported by graphics control stream. */ + assert(csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS); + + /* clang-format off */ + pvr_csb_emit(csb, VDMCTRL_STREAM_RETURN, ret); + /* clang-format on */ + + return csb->status; +} + +/** + * \brief Adds STREAM_TERMINATE dword into the control stream pointed by csb + * object. Given a STREAM_TERMINATE marks the end of the control stream, we + * return the status of the control stream as well. + * + * \param[in] csb Control Stream Builder object to terminate. + * \return VK_SUCCESS on success, or error code otherwise. + */ +VkResult pvr_csb_emit_terminate(struct pvr_csb *csb) +{ + switch (csb->stream_type) { + case PVR_CMD_STREAM_TYPE_GRAPHICS: + /* clang-format off */ + pvr_csb_emit(csb, VDMCTRL_STREAM_TERMINATE, terminate); + /* clang-format on */ + break; + + case PVR_CMD_STREAM_TYPE_COMPUTE: + /* clang-format off */ + pvr_csb_emit(csb, CDMCTRL_STREAM_TERMINATE, terminate); + /* clang-format on */ + break; + + default: + unreachable("Unknown stream type"); + break; + } + + return csb->status; +} diff --git a/src/imagination/vulkan/pvr_csb.h b/src/imagination/vulkan/pvr_csb.h new file mode 100644 index 00000000000..581595969a9 --- /dev/null +++ b/src/imagination/vulkan/pvr_csb.h @@ -0,0 +1,205 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * based in part on v3dv_cl.h which is: + * Copyright © 2019 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_CSB_H +#define PVR_CSB_H + +#include +#include +#include +#include + +#include "pvr_bo.h" +#include "pvr_winsys.h" +#include "util/list.h" + +#define __pvr_address_type pvr_dev_addr_t +#define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr).addr + +#include "csbgen/rogue_hwdefs.h" + +struct pvr_device; + +enum pvr_cmd_stream_type { + PVR_CMD_STREAM_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */ + PVR_CMD_STREAM_TYPE_GRAPHICS, + PVR_CMD_STREAM_TYPE_COMPUTE, +}; + +struct pvr_csb { + struct pvr_device *device; + + /* Pointer to current csb buffer object */ + struct pvr_bo *pvr_bo; + + /* pointers to current bo memory */ + void *start; + void *end; + void *next; + + /* List of csb buffer objects */ + struct list_head pvr_bo_list; + + enum pvr_cmd_stream_type stream_type; + + /* Current error status of the command buffer. Used to track inconsistent + * or incomplete command buffer states that are the consequence of run-time + * errors such as out of memory scenarios. We want to track this in the + * csb because the command buffer object is not visible to some parts + * of the driver. + */ + VkResult status; +}; + +/** + * \brief Gets the status of the csb. + * + * \param[in] csb Control Stream Builder object. + * \return VK_SUCCESS if the csb hasn't encountered any error or error code + * otherwise. + */ +static inline VkResult pvr_csb_get_status(struct pvr_csb *csb) +{ + return csb->status; +} + +/** + * \brief Checks if the control stream is empty or not. + * + * \param[in] csb Control Stream Builder object. + * \return true if csb is empty false otherwise. + */ +static inline bool pvr_csb_is_empty(struct pvr_csb *csb) +{ + return list_is_empty(&csb->pvr_bo_list); +} + +static inline pvr_dev_addr_t pvr_csb_get_start_address(struct pvr_csb *csb) +{ + if (!pvr_csb_is_empty(csb)) { + struct pvr_bo *pvr_bo = + list_first_entry(&csb->pvr_bo_list, struct pvr_bo, link); + + return pvr_bo->vma->dev_addr; + } + + return PVR_DEV_ADDR_INVALID; +} + +void pvr_csb_init(struct pvr_device *device, + enum pvr_cmd_stream_type stream_type, + struct pvr_csb *csb); +void pvr_csb_finish(struct pvr_csb *csb); +void *pvr_csb_alloc_dwords(struct pvr_csb *csb, uint32_t num_dwords); +VkResult pvr_csb_emit_return(struct pvr_csb *csb); +VkResult pvr_csb_emit_terminate(struct pvr_csb *csb); + +#define PVRX(x) ROGUE_##x +#define pvr_cmd_struct(x) PVRX(x) +#define pvr_cmd_length(x) PVRX(x##_length) +#define pvr_cmd_header(x) PVRX(x##_header) +#define pvr_cmd_pack(x) PVRX(x##_pack) + +/** + * \brief Packs a command/state into one or more dwords and stores them in the + * memory pointed to by _dst. + * + * \param[out] _dst Pointer to store the packed command/state. + * \param[in] cmd Command/state type. + * \param[in,out] name Name to give to the command/state structure variable, + * which contains the information to be packed and emitted. + * This can be used by the caller to modify the command or + * state information before it's packed. + */ +#define pvr_csb_pack(_dst, cmd, name) \ + for (struct pvr_cmd_struct(cmd) name = { pvr_cmd_header(cmd) }, \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ + ({ \ + pvr_cmd_pack(cmd)((_dst), &name); \ + _loop_terminate = NULL; \ + })) + +/** + * \brief Merges dwords0 and dwords1 arrays and stores the result into the + * control stream pointed by the csb object. + * + * \param[in] csb Control Stream Builder object. + * \param[in] dwords0 Dwords0 array. + * \param[in] dwords1 Dwords1 array. + */ +#define pvr_csb_emit_merge(csb, dwords0, dwords1) \ + do { \ + uint32_t *dw; \ + STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \ + dw = pvr_csb_alloc_dwords(csb, ARRAY_SIZE(dwords0)); \ + if (!dw) \ + break; \ + for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ + dw[i] = (dwords0)[i] | (dwords1)[i]; \ + } while (0) + +/** + * \brief Packs a command/state into one or more dwords and stores them into + * the control stream pointed by the csb object. + * + * \param[in] csb Control Stream Builder object. + * \param[in] cmd Command/state type. + * \param[in,out] name Name to give to the command/state structure variable, + * which contains the information to be packed. This can be + * used by the caller to modify the command or state + * information before it's packed. + */ +#define pvr_csb_emit(csb, cmd, name) \ + for (struct pvr_cmd_struct(cmd) \ + name = { pvr_cmd_header(cmd) }, \ + *_dst = pvr_csb_alloc_dwords(csb, pvr_cmd_length(cmd)); \ + __builtin_expect(_dst != NULL, 1); \ + ({ \ + pvr_cmd_pack(cmd)(_dst, &name); \ + _dst = NULL; \ + })) + +/** + * \brief Stores dword into the control stream pointed by the csb object. + * + * \param[in] csb Control Stream Builder object. + * \param[in] dword Dword to store into control stream. + */ +#define pvr_csb_emit_dword(csb, dword) \ + do { \ + uint32_t *dw; \ + STATIC_ASSERT(sizeof(dword) == sizeof(uint32_t)); \ + dw = pvr_csb_alloc_dwords(csb, 1U); \ + if (!dw) \ + break; \ + *dw = dword; \ + } while (0) + +#endif /* PVR_CSB_H */ diff --git a/src/imagination/vulkan/pvr_descriptor_set.c b/src/imagination/vulkan/pvr_descriptor_set.c new file mode 100644 index 00000000000..85756637d99 --- /dev/null +++ b/src/imagination/vulkan/pvr_descriptor_set.c @@ -0,0 +1,1454 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "hwdef/rogue_hw_utils.h" +#include "pvr_bo.h" +#include "pvr_private.h" +#include "util/compiler.h" +#include "util/list.h" +#include "util/log.h" +#include "util/macros.h" +#include "vk_alloc.h" +#include "vk_log.h" +#include "vk_object.h" +#include "vk_util.h" + +#if defined(DEBUG) +static const struct { + const char *raw; + const char *primary; + const char *secondary; + const char *primary_dynamic; + const char *secondary_dynamic; +} stage_names[] = { + { "Vertex", + "Vertex Primary", + "Vertex Secondary", + "Vertex Dynamic Primary", + "Vertex Dynamic Secondary" }, + { "Fragment", + "Fragment Primary", + "Fragment Secondary", + "Fragment Dynamic Primary", + "Fragment Dynamic Secondary" }, + { "Compute", + "Compute Primary", + "Compute Secondary", + "Compute Dynamic Primary", + "Compute Dynamic Secondary" }, +}; + +static const char *descriptor_names[] = { "VK SAMPLER", + "VK COMBINED_IMAGE_SAMPLER", + "VK SAMPLED_IMAGE", + "VK STORAGE_IMAGE", + "VK UNIFORM_TEXEL_BUFFER", + "VK STORAGE_TEXEL_BUFFER", + "VK UNIFORM_BUFFER", + "VK STORAGE_BUFFER", + "VK UNIFORM_BUFFER_DYNAMIC", + "VK STORAGE_BUFFER_DYNAMIC", + "VK INPUT_ATTACHMENT" }; +#endif + +static void pvr_descriptor_size_info_init( + const struct pvr_device *device, + VkDescriptorType type, + struct pvr_descriptor_size_info *const size_info_out) +{ + /* UINT_MAX is a place holder. These values will be filled by calling the + * init function, and set appropriately based on device features. + */ + static const struct pvr_descriptor_size_info template_size_infos[] = { + /* VK_DESCRIPTOR_TYPE_SAMPLER */ + { 4, 0, 4 }, + /* VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER */ + { 8, UINT_MAX, 4 }, + /* VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE */ + { 4, UINT_MAX, 4 }, + /* VK_DESCRIPTOR_TYPE_STORAGE_IMAGE */ + { 4, UINT_MAX, 4 }, + /* VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER */ + { 4, UINT_MAX, 4 }, + /* VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER */ + { 4, UINT_MAX, 4 }, + /* VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER */ + { 2, UINT_MAX, 2 }, + /* VK_DESCRIPTOR_TYPE_STORAGE_BUFFER */ + { 2, 1, 2 }, + /* VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC */ + { 2, UINT_MAX, 2 }, + /* VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC */ + { 2, 1, 2 }, + /* VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT */ + { 8, UINT_MAX, 4 } + }; + + *size_info_out = template_size_infos[type]; + + switch (type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + break; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: { + const uint32_t image_secondary_offset_arraybase = 0; + const uint32_t image_secondary_size_arraybase = 2; + const uint32_t image_secondary_size_arraystride = 1; + + const uint32_t image_secondary_offset_arraystride = + image_secondary_offset_arraybase + image_secondary_size_arraybase; + + const uint32_t image_secondary_offset_arraymaxindex = + (PVR_HAS_FEATURE(&device->pdevice->dev_info, tpu_array_textures)) + ? 0 + : image_secondary_offset_arraystride + + image_secondary_size_arraystride; + + const uint32_t image_secondary_size_arraymaxindex = 1; + + const uint32_t image_secondary_size_width = 1; + const uint32_t image_secondary_size_height = 1; + const uint32_t image_secondary_size_depth = 1; + + const uint32_t image_secondary_offset_width = + image_secondary_offset_arraymaxindex + + image_secondary_size_arraymaxindex; + const uint32_t image_secondary_offset_height = + image_secondary_offset_width + image_secondary_size_width; + const uint32_t image_secondary_offset_depth = + image_secondary_offset_height + image_secondary_size_height; + const uint32_t image_secondary_total_size = + image_secondary_offset_depth + image_secondary_size_depth; + + size_info_out->secondary = image_secondary_total_size; + break; + } + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + size_info_out->secondary = (uint32_t)device->features.robustBufferAccess; + break; + + default: + unreachable("Unknown descriptor type"); + } +} + +static bool pvr_stage_matches_vk_flags(enum pvr_stage_allocation pvr_stage, + VkShaderStageFlags flags) +{ + VkShaderStageFlags flags_per_stage; + + switch (pvr_stage) { + case PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY: + flags_per_stage = VK_SHADER_STAGE_VERTEX_BIT | + VK_SHADER_STAGE_GEOMETRY_BIT; + break; + case PVR_STAGE_ALLOCATION_FRAGMENT: + flags_per_stage = VK_SHADER_STAGE_FRAGMENT_BIT; + break; + case PVR_STAGE_ALLOCATION_COMPUTE: + flags_per_stage = VK_SHADER_STAGE_COMPUTE_BIT; + break; + default: + unreachable("Unrecognized allocation stage."); + } + + return !!(flags_per_stage & flags); +} + +/* If allocator == NULL, the internal one will be used. */ +static struct pvr_descriptor_set_layout * +pvr_descriptor_set_layout_allocate(struct pvr_device *device, + const VkAllocationCallbacks *allocator, + uint32_t binding_count, + uint32_t immutable_sampler_count, + uint32_t supported_descriptors_count) +{ + struct pvr_descriptor_set_layout_binding *bindings; + struct pvr_descriptor_set_layout *layout; + __typeof__(layout->per_stage_descriptor_count) counts; + struct pvr_sampler **immutable_samplers; + + VK_MULTIALLOC(ma); + vk_multialloc_add(&ma, &layout, __typeof__(*layout), 1); + vk_multialloc_add(&ma, &bindings, __typeof__(*bindings), binding_count); + vk_multialloc_add(&ma, + &immutable_samplers, + __typeof__(*immutable_samplers), + immutable_sampler_count); + + for (uint32_t stage = 0; stage < ARRAY_SIZE(counts); stage++) { + vk_multialloc_add(&ma, + &counts[stage], + __typeof__(*counts[0]), + supported_descriptors_count); + } + + /* pvr_CreateDescriptorSetLayout() relies on this being zero allocated. */ + if (!vk_multialloc_zalloc2(&ma, + &device->vk.alloc, + allocator, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) { + return NULL; + } + + layout->bindings = bindings; + layout->immutable_samplers = immutable_samplers; + + memcpy(&layout->per_stage_descriptor_count, &counts, sizeof(counts)); + + vk_object_base_init(&device->vk, + &layout->base, + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT); + + return layout; +} + +/* If allocator == NULL, the internal one will be used. */ +static void +pvr_descriptor_set_layout_free(struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_descriptor_set_layout *layout) +{ + vk_object_base_finish(&layout->base); + vk_free2(&device->vk.alloc, allocator, layout); +} + +static int pvr_binding_compare(const void *a, const void *b) +{ + uint32_t binding_a = ((VkDescriptorSetLayoutBinding *)a)->binding; + uint32_t binding_b = ((VkDescriptorSetLayoutBinding *)b)->binding; + + if (binding_a < binding_b) + return -1; + + if (binding_a > binding_b) + return 1; + + return 0; +} + +/* If allocator == NULL, the internal one will be used. */ +static VkDescriptorSetLayoutBinding * +pvr_create_sorted_bindings(struct pvr_device *device, + const VkAllocationCallbacks *allocator, + const VkDescriptorSetLayoutBinding *bindings, + uint32_t binding_count) +{ + VkDescriptorSetLayoutBinding *sorted_bindings = + vk_alloc2(&device->vk.alloc, + allocator, + binding_count * sizeof(*sorted_bindings), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sorted_bindings) + return NULL; + + memcpy(sorted_bindings, bindings, binding_count * sizeof(*sorted_bindings)); + + qsort(sorted_bindings, + binding_count, + sizeof(*sorted_bindings), + pvr_binding_compare); + + return sorted_bindings; +} + +struct pvr_register_usage { + uint32_t primary; + uint32_t primary_dynamic; + uint32_t secondary; + uint32_t secondary_dynamic; +}; + +static void pvr_setup_in_memory_layout_sizes( + struct pvr_descriptor_set_layout *layout, + const struct pvr_register_usage reg_usage[PVR_STAGE_ALLOCATION_COUNT]) +{ + for (uint32_t stage = 0; + stage < ARRAY_SIZE(layout->memory_layout_in_dwords_per_stage); + stage++) { + layout->total_size_in_dwords = ALIGN_POT(layout->total_size_in_dwords, 4); + + layout->memory_layout_in_dwords_per_stage[stage].primary_offset = + layout->total_size_in_dwords; + layout->memory_layout_in_dwords_per_stage[stage].primary_size = + reg_usage[stage].primary; + + layout->total_size_in_dwords += reg_usage[stage].primary; + layout->total_size_in_dwords = ALIGN_POT(layout->total_size_in_dwords, 4); + + layout->memory_layout_in_dwords_per_stage[stage].secondary_offset = + layout->total_size_in_dwords; + layout->memory_layout_in_dwords_per_stage[stage].secondary_size = + reg_usage[stage].secondary; + + layout->total_size_in_dwords += reg_usage[stage].secondary; + + layout->memory_layout_in_dwords_per_stage[stage].primary_dynamic_size = + reg_usage[stage].primary_dynamic; + layout->memory_layout_in_dwords_per_stage[stage].secondary_dynamic_size = + reg_usage[stage].secondary_dynamic; + } +} + +#if defined(DEBUG) +static void +pvr_dump_in_memory_layout_sizes(const struct pvr_descriptor_set_layout *layout) +{ + mesa_logd("=== SET LAYOUT ==="); + mesa_logd("----------------------------------------------"); + mesa_logd(" in memory:"); + mesa_logd("----------------------------------------------"); + + for (uint32_t stage = 0; + stage < ARRAY_SIZE(layout->memory_layout_in_dwords_per_stage); + stage++) { + mesa_logd( + "| %-18s @ %04u |", + stage_names[stage].primary, + layout->memory_layout_in_dwords_per_stage[stage].primary_offset); + mesa_logd("----------------------------------------------"); + + /* Print primaries. */ + for (uint32_t i = 0; i < layout->binding_count; i++) { + const struct pvr_descriptor_set_layout_binding *const binding = + &layout->bindings[i]; + + if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || + binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + continue; + + mesa_logd("| %s %04u | %-26s[%3u] |", + (binding->shader_stage_mask & (1U << stage)) ? " " : "X", + binding->per_stage_offset_in_dwords[stage].primary, + descriptor_names[binding->type], + binding->descriptor_count); + } + + /* Print dynamic primaries. */ + for (uint32_t i = 0; i < layout->binding_count; i++) { + const struct pvr_descriptor_set_layout_binding *const binding = + &layout->bindings[i]; + + if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC && + binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + continue; + + mesa_logd("| * %s %04u | %-26s[%3u] |", + (binding->shader_stage_mask & (1U << stage)) ? " " : "X", + binding->per_stage_offset_in_dwords[stage].primary, + descriptor_names[binding->type], + binding->descriptor_count); + } + + mesa_logd("----------------------------------------------"); + mesa_logd( + "| %-18s @ %04u |", + stage_names[stage].secondary, + layout->memory_layout_in_dwords_per_stage[stage].secondary_offset); + mesa_logd("----------------------------------------------"); + + /* Print secondaries. */ + for (uint32_t i = 0; i < layout->binding_count; i++) { + const struct pvr_descriptor_set_layout_binding *const binding = + &layout->bindings[i]; + + if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || + binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + continue; + + mesa_logd("| %s %04u | %-26s[%3u] |", + (binding->shader_stage_mask & (1U << stage)) ? " " : "X", + binding->per_stage_offset_in_dwords[stage].secondary, + descriptor_names[binding->type], + binding->descriptor_count); + } + + /* Print dynamic secondaries. */ + for (uint32_t i = 0; i < layout->binding_count; i++) { + const struct pvr_descriptor_set_layout_binding *const binding = + &layout->bindings[i]; + + if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC && + binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + continue; + + mesa_logd("| * %s %04u | %-26s[%3u] |", + (binding->shader_stage_mask & (1U << stage)) ? " " : "X", + binding->per_stage_offset_in_dwords[stage].secondary, + descriptor_names[binding->type], + binding->descriptor_count); + } + + mesa_logd("=============================================="); + } +} +#endif + +VkResult pvr_CreateDescriptorSetLayout( + VkDevice _device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorSetLayout *pSetLayout) +{ + /* Used to accumulate sizes and set each descriptor's offsets per stage. */ + struct pvr_register_usage reg_usage[PVR_STAGE_ALLOCATION_COUNT] = { 0 }; + PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_descriptor_set_layout *layout; + VkDescriptorSetLayoutBinding *bindings; + uint32_t immutable_sampler_count; + + assert(pCreateInfo->sType == + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + vk_foreach_struct (ext, pCreateInfo->pNext) { + pvr_debug_ignored_stype(ext->sType); + } + + /* TODO: Add support for push descriptors. */ + + if (pCreateInfo->bindingCount == 0) { + layout = pvr_descriptor_set_layout_allocate(device, pAllocator, 0, 0, 0); + if (!layout) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + *pSetLayout = pvr_descriptor_set_layout_to_handle(layout); + return VK_SUCCESS; + } + + /* TODO: Instead of sorting, maybe do what anvil does? */ + bindings = pvr_create_sorted_bindings(device, + pAllocator, + pCreateInfo->pBindings, + pCreateInfo->bindingCount); + if (!bindings) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + immutable_sampler_count = 0; + for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { + /* From the Vulkan 1.1.97 spec for VkDescriptorSetLayoutBinding: + * + * "If descriptorType specifies a VK_DESCRIPTOR_TYPE_SAMPLER or + * VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER type descriptor, then + * pImmutableSamplers can be used to initialize a set of immutable + * samplers. [...] If descriptorType is not one of these descriptor + * types, then pImmutableSamplers is ignored. + * + * We need to be careful here and only parse pImmutableSamplers if we + * have one of the right descriptor types. + */ + const VkDescriptorType descriptor_type = bindings[i].descriptorType; + if ((descriptor_type == VK_DESCRIPTOR_TYPE_SAMPLER || + descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) && + bindings[i].pImmutableSamplers) + immutable_sampler_count += bindings[i].descriptorCount; + } + + /* From the Vulkan 1.2.190 spec for VkDescriptorSetLayoutCreateInfo: + * + * "The VkDescriptorSetLayoutBinding::binding members of the elements + * of the pBindings array must each have different values." + * + * So we don't worry about duplicates and just allocate for bindingCount + * amount of bindings. + */ + layout = pvr_descriptor_set_layout_allocate( + device, + pAllocator, + pCreateInfo->bindingCount, + immutable_sampler_count, + PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT); + if (!layout) { + vk_free2(&device->vk.alloc, pAllocator, bindings); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + layout->binding_count = pCreateInfo->bindingCount; + + for (uint32_t bind_num = 0; bind_num < layout->binding_count; bind_num++) { + const VkDescriptorSetLayoutBinding *const binding = &bindings[bind_num]; + struct pvr_descriptor_set_layout_binding *const internal_binding = + &layout->bindings[bind_num]; + VkShaderStageFlags shader_stages = 0; + + internal_binding->type = binding->descriptorType; + /* The binding_numbers can be non-contiguous so we ignore the user + * specified binding numbers and make them contiguous ourselves. + */ + internal_binding->binding_number = bind_num; + + /* From Vulkan spec 1.2.189: + * + * "If descriptorCount is zero this binding entry is reserved and the + * resource must not be accessed from any stage via this binding" + * + * So do not use bindings->stageFlags, use shader_stages instead. + */ + if (binding->descriptorCount) { + shader_stages = binding->stageFlags; + + internal_binding->descriptor_count = binding->descriptorCount; + internal_binding->descriptor_index = layout->descriptor_count; + layout->descriptor_count += binding->descriptorCount; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLER: + if (binding->pImmutableSamplers && binding->descriptorCount > 0) { + internal_binding->immutable_samplers_index = + layout->immutable_sampler_count; + + for (uint32_t j = 0; j < binding->descriptorCount; j++) { + PVR_FROM_HANDLE(pvr_sampler, + sampler, + bindings->pImmutableSamplers[j]); + const uint32_t next = j + layout->immutable_sampler_count; + + layout->immutable_samplers[next] = sampler; + } + + layout->immutable_sampler_count += binding->descriptorCount; + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + layout->dynamic_buffer_count += binding->descriptorCount; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + break; + + default: + unreachable("Unknown descriptor type"); + break; + } + + if (!shader_stages) + continue; + + internal_binding->shader_stages = shader_stages; + layout->shader_stages |= shader_stages; + + for (uint32_t stage = 0; + stage < ARRAY_SIZE(layout->bindings[0].per_stage_offset_in_dwords); + stage++) { + const VkDescriptorType descriptor_type = binding->descriptorType; + + if (!pvr_stage_matches_vk_flags(stage, shader_stages)) + continue; + + internal_binding->shader_stage_mask |= (1U << stage); + + /* TODO: Do we have to allocate them at the end? We could speed it + * by allocating them here if not. */ + /* We allocate dynamics primary and secondaries at the end. */ + if (descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC && + descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { + struct pvr_descriptor_size_info size_info; + + pvr_descriptor_size_info_init(device, descriptor_type, &size_info); + + STATIC_ASSERT( + ARRAY_SIZE(reg_usage) == + ARRAY_SIZE(layout->bindings[0].per_stage_offset_in_dwords)); + + reg_usage[stage].primary = + ALIGN_POT(reg_usage[stage].primary, size_info.alignment); + + internal_binding->per_stage_offset_in_dwords[stage].primary = + reg_usage[stage].primary; + reg_usage[stage].primary += + size_info.primary * internal_binding->descriptor_count; + + internal_binding->per_stage_offset_in_dwords[stage].secondary = + reg_usage[stage].secondary; + reg_usage[stage].secondary += + size_info.secondary * internal_binding->descriptor_count; + } + + STATIC_ASSERT( + ARRAY_SIZE(layout->per_stage_descriptor_count) == + ARRAY_SIZE(layout->bindings[0].per_stage_offset_in_dwords)); + + layout->per_stage_descriptor_count[stage][descriptor_type] += + internal_binding->descriptor_count; + } + } + + for (uint32_t bind_num = 0; bind_num < layout->binding_count; bind_num++) { + struct pvr_descriptor_set_layout_binding *const internal_binding = + &layout->bindings[bind_num]; + const VkDescriptorType descriptor_type = internal_binding->type; + + if (descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC && + descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + continue; + + for (uint32_t stage = 0; + stage < ARRAY_SIZE(layout->bindings[0].per_stage_offset_in_dwords); + stage++) { + struct pvr_descriptor_size_info size_info; + const VkShaderStageFlags shader_stages = + internal_binding->shader_stages; + + if (!pvr_stage_matches_vk_flags(stage, shader_stages)) + continue; + + pvr_descriptor_size_info_init(device, descriptor_type, &size_info); + + /* TODO: align primary like we did with other descriptors? */ + internal_binding->per_stage_offset_in_dwords[stage].primary = + reg_usage[stage].primary_dynamic; + reg_usage[stage].primary_dynamic += + size_info.primary * internal_binding->descriptor_count; + + internal_binding->per_stage_offset_in_dwords[stage].secondary = + reg_usage[stage].secondary_dynamic; + reg_usage[stage].secondary_dynamic += + size_info.secondary * internal_binding->descriptor_count; + } + } + + pvr_setup_in_memory_layout_sizes(layout, reg_usage); + +#if defined(DEBUG) + pvr_dump_in_memory_layout_sizes(layout); +#endif + + vk_free2(&device->vk.alloc, pAllocator, bindings); + + *pSetLayout = pvr_descriptor_set_layout_to_handle(layout); + + return VK_SUCCESS; +} + +void pvr_DestroyDescriptorSetLayout(VkDevice _device, + VkDescriptorSetLayout _set_layout, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_descriptor_set_layout, layout, _set_layout); + PVR_FROM_HANDLE(pvr_device, device, _device); + + pvr_descriptor_set_layout_free(device, pAllocator, layout); +} + +#if defined(DEBUG) +static void +pvr_dump_in_register_layout_sizes(const struct pvr_device *device, + const struct pvr_pipeline_layout *layout) +{ + mesa_logd("=== SET LAYOUT ==="); + mesa_logd("----------------------------------------------------"); + mesa_logd(" in registers:"); + mesa_logd("----------------------------------------------------"); + + for (uint32_t stage = 0; + stage < ARRAY_SIZE(layout->register_layout_in_dwords_per_stage); + stage++) { + uint32_t dynamic_offset = 0; + + mesa_logd("| %-48s |", stage_names[stage].primary_dynamic); + mesa_logd("----------------------------------------------------"); + + /* Print dynamic primaries. */ + for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) { + const struct pvr_descriptor_set_layout *const set_layout = + layout->set_layout[set_num]; + + for (uint32_t i = 0; i < set_layout->binding_count; i++) { + const struct pvr_descriptor_set_layout_binding *const binding = + &set_layout->bindings[i]; + bool valid = !!(binding->shader_stage_mask & (1U << stage)); + + if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC && + binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + continue; + + mesa_logd("| %s %04u | %u:%03u | %-26s[%3u] |", + (valid) ? " " : "X", + dynamic_offset, + set_num, + i, + descriptor_names[binding->type], + binding->descriptor_count); + + if (valid) { + struct pvr_descriptor_size_info size_info; + + pvr_descriptor_size_info_init(device, binding->type, &size_info); + + dynamic_offset += size_info.primary; + } + } + } + + mesa_logd("----------------------------------------------------"); + mesa_logd("| %-48s |", stage_names[stage].secondary_dynamic); + mesa_logd("----------------------------------------------------"); + + /* Print dynamic secondaries. */ + for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) { + const struct pvr_descriptor_set_layout *const set_layout = + layout->set_layout[set_num]; + + for (uint32_t i = 0; i < set_layout->binding_count; i++) { + const struct pvr_descriptor_set_layout_binding *const binding = + &set_layout->bindings[i]; + bool valid = !!(binding->shader_stage_mask & (1U << stage)); + + if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC && + binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + continue; + + mesa_logd("| %s %04u | %u:%03u | %-26s[%3u] |", + (valid) ? " " : "X", + dynamic_offset, + set_num, + i, + descriptor_names[binding->type], + binding->descriptor_count); + + if (valid) { + struct pvr_descriptor_size_info size_info; + + pvr_descriptor_size_info_init(device, binding->type, &size_info); + + dynamic_offset += size_info.secondary; + } + } + } + + mesa_logd("----------------------------------------------------"); + mesa_logd("| %-48s |", stage_names[stage].primary); + mesa_logd("----------------------------------------------------"); + + /* Print primaries. */ + for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) { + const struct pvr_descriptor_set_layout *const set_layout = + layout->set_layout[set_num]; + const uint32_t base = + layout->register_layout_in_dwords_per_stage[stage][set_num] + .primary_offset; + + for (uint32_t i = 0; i < set_layout->binding_count; i++) { + const struct pvr_descriptor_set_layout_binding *const binding = + &set_layout->bindings[i]; + + if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || + binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + continue; + + mesa_logd("| %s %04u | %u:%03u | %-26s[%3u] |", + (binding->shader_stage_mask & (1U << stage)) ? " " : "X", + base + binding->per_stage_offset_in_dwords[stage].primary, + set_num, + i, + descriptor_names[binding->type], + binding->descriptor_count); + } + } + + mesa_logd("----------------------------------------------------"); + mesa_logd("| %-48s |", stage_names[stage].secondary); + mesa_logd("----------------------------------------------------"); + + /* Print secondaries. */ + for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) { + const struct pvr_descriptor_set_layout *const set_layout = + layout->set_layout[set_num]; + const uint32_t base = + layout->register_layout_in_dwords_per_stage[stage][set_num] + .secondary_offset; + + for (uint32_t i = 0; i < set_layout->binding_count; i++) { + const struct pvr_descriptor_set_layout_binding *const binding = + &set_layout->bindings[i]; + + if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || + binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + continue; + + mesa_logd("| %s %04u | %u:%03u | %-26s[%3u] |", + (binding->shader_stage_mask & (1U << stage)) ? " " : "X", + base + + binding->per_stage_offset_in_dwords[stage].secondary, + set_num, + i, + descriptor_names[binding->type], + binding->descriptor_count); + } + } + + mesa_logd("===================================================="); + } +} +#endif + +/* Pipeline layouts. These have nothing to do with the pipeline. They are + * just multiple descriptor set layouts pasted together. + */ +VkResult pvr_CreatePipelineLayout(VkDevice _device, + const VkPipelineLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineLayout *pPipelineLayout) +{ + uint32_t next_free_reg[PVR_STAGE_ALLOCATION_COUNT]; + PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_pipeline_layout *layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + assert(pCreateInfo->setLayoutCount <= PVR_MAX_DESCRIPTOR_SETS); + + layout = vk_object_alloc(&device->vk, + pAllocator, + sizeof(*layout), + VK_OBJECT_TYPE_PIPELINE_LAYOUT); + if (!layout) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + layout->set_count = pCreateInfo->setLayoutCount; + layout->shader_stages = 0; + for (uint32_t stage = 0; stage < PVR_STAGE_ALLOCATION_COUNT; stage++) { + uint32_t descriptor_counts + [PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT] = { 0 }; + struct pvr_pipeline_layout_reg_info *const reg_info = + &layout->per_stage_reg_info[stage]; + + *reg_info = (struct pvr_pipeline_layout_reg_info){ 0 }; + + layout->per_stage_descriptor_masks[stage] = 0; + + for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) { + /* So we don't write these again and again. Just do it once. */ + if (stage == 0) { + PVR_FROM_HANDLE(pvr_descriptor_set_layout, + set_layout, + pCreateInfo->pSetLayouts[set_num]); + + layout->set_layout[set_num] = set_layout; + layout->shader_stages |= set_layout->shader_stages; + } + + const struct pvr_descriptor_set_layout_mem_layout *const mem_layout = + &layout->set_layout[set_num] + ->memory_layout_in_dwords_per_stage[stage]; + + /* Allocate registers counts for dynamic descriptors. */ + reg_info->primary_dynamic_size_in_dwords += + mem_layout->primary_dynamic_size; + reg_info->secondary_dynamic_size_in_dwords += + mem_layout->secondary_dynamic_size; + + for (VkDescriptorType type = 0; + type < PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT; + type++) { + uint32_t descriptor_count; + + layout->descriptor_offsets[set_num][stage][type] = + descriptor_counts[type]; + + descriptor_count = layout->set_layout[set_num] + ->per_stage_descriptor_count[stage][type]; + + if (!descriptor_count) + continue; + + switch (type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + layout->per_stage_descriptor_masks[stage] |= 1U << set_num; + descriptor_counts[type] += descriptor_count; + break; + + /* We don't need to keep track of the counts or masks for other + * descriptor types so there is no assert() here since other + * types are not invalid or unsupported. + */ + /* TODO: Improve the comment above to specify why, when we find + * out. + */ + default: + break; + } + } + } + + next_free_reg[stage] = reg_info->primary_dynamic_size_in_dwords + + reg_info->secondary_dynamic_size_in_dwords; + } + + /* Allocate registers counts for primary and secondary descriptors. */ + for (uint32_t stage = 0; stage < PVR_STAGE_ALLOCATION_COUNT; stage++) { + for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) { + const struct pvr_descriptor_set_layout_mem_layout *const mem_layout = + &layout->set_layout[set_num] + ->memory_layout_in_dwords_per_stage[stage]; + struct pvr_descriptor_set_layout_mem_layout *const reg_layout = + &layout->register_layout_in_dwords_per_stage[stage][set_num]; + + next_free_reg[stage] = ALIGN_POT(next_free_reg[stage], 4); + + reg_layout->primary_offset = next_free_reg[stage]; + reg_layout->primary_size = mem_layout->primary_size; + + next_free_reg[stage] += reg_layout->primary_size; + } + + /* To optimize the total shared layout allocation used by the shader, + * secondary descriptors come last since they're less likely to be used. + */ + for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) { + const struct pvr_descriptor_set_layout_mem_layout *const mem_layout = + &layout->set_layout[set_num] + ->memory_layout_in_dwords_per_stage[stage]; + struct pvr_descriptor_set_layout_mem_layout *const reg_layout = + &layout->register_layout_in_dwords_per_stage[stage][set_num]; + + /* Should we be aligning next_free_reg like it's done with the + * primary descriptors? + */ + + reg_layout->secondary_offset = next_free_reg[stage]; + reg_layout->secondary_size = mem_layout->secondary_size; + + next_free_reg[stage] += reg_layout->secondary_size; + } + } + + layout->push_constants_shader_stages = 0; + for (uint32_t i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) { + const VkPushConstantRange *range = &pCreateInfo->pPushConstantRanges[i]; + + layout->push_constants_shader_stages |= range->stageFlags; + } + +#if defined(DEBUG) + pvr_dump_in_register_layout_sizes(device, layout); +#endif + + *pPipelineLayout = pvr_pipeline_layout_to_handle(layout); + + return VK_SUCCESS; +} + +void pvr_DestroyPipelineLayout(VkDevice _device, + VkPipelineLayout _pipelineLayout, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_pipeline_layout, layout, _pipelineLayout); + + vk_object_free(&device->vk, pAllocator, layout); +} + +VkResult pvr_CreateDescriptorPool(VkDevice _device, + const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorPool *pDescriptorPool) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_descriptor_pool *pool; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO); + + pool = vk_object_alloc(&device->vk, + pAllocator, + sizeof(*pool), + VK_OBJECT_TYPE_DESCRIPTOR_POOL); + if (!pool) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pAllocator) + pool->alloc = *pAllocator; + else + pool->alloc = device->vk.alloc; + + pool->max_sets = pCreateInfo->maxSets; + list_inithead(&pool->descriptor_sets); + + pool->total_size_in_dwords = 0; + for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) { + struct pvr_descriptor_size_info size_info; + const uint32_t descriptor_count = + pCreateInfo->pPoolSizes[i].descriptorCount; + + pvr_descriptor_size_info_init(device, + pCreateInfo->pPoolSizes[i].type, + &size_info); + + const uint32_t secondary = ALIGN_POT(size_info.secondary, 4); + const uint32_t primary = ALIGN_POT(size_info.primary, 4); + + pool->total_size_in_dwords += descriptor_count * (primary + secondary); + } + pool->total_size_in_dwords *= PVR_STAGE_ALLOCATION_COUNT; + pool->current_size_in_dwords = 0; + + pvr_finishme("Entry tracker for allocations?"); + + *pDescriptorPool = pvr_descriptor_pool_to_handle(pool); + + return VK_SUCCESS; +} + +static void pvr_free_descriptor_set(struct pvr_device *device, + struct pvr_descriptor_pool *pool, + struct pvr_descriptor_set *set) +{ + list_del(&set->link); + pvr_bo_free(device, set->pvr_bo); + vk_object_free(&device->vk, &pool->alloc, set); +} + +void pvr_DestroyDescriptorPool(VkDevice _device, + VkDescriptorPool _pool, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_descriptor_pool, pool, _pool); + + if (!pool) + return; + + list_for_each_entry_safe (struct pvr_descriptor_set, + set, + &pool->descriptor_sets, + link) { + pvr_free_descriptor_set(device, pool, set); + } + + vk_object_free(&device->vk, pAllocator, pool); +} + +VkResult pvr_ResetDescriptorPool(VkDevice _device, + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags) +{ + assert(!"Unimplemented"); + return VK_SUCCESS; +} + +static uint16_t pvr_get_descriptor_primary_offset( + const struct pvr_device *device, + const struct pvr_descriptor_set_layout *layout, + const struct pvr_descriptor_set_layout_binding *binding, + const uint32_t stage, + const uint32_t desc_idx) +{ + struct pvr_descriptor_size_info size_info; + uint32_t offset; + + assert(stage < ARRAY_SIZE(layout->memory_layout_in_dwords_per_stage)); + assert(desc_idx < binding->descriptor_count); + + pvr_descriptor_size_info_init(device, binding->type, &size_info); + + offset = layout->memory_layout_in_dwords_per_stage[stage].primary_offset; + offset += binding->per_stage_offset_in_dwords[stage].primary; + offset += (desc_idx * size_info.primary); + + /* Offset must be less than 16bits. */ + assert(offset < UINT16_MAX); + + return (uint16_t)offset; +} + +static uint16_t pvr_get_descriptor_secondary_offset( + const struct pvr_device *device, + const struct pvr_descriptor_set_layout *layout, + const struct pvr_descriptor_set_layout_binding *binding, + const uint32_t stage, + const uint32_t desc_idx) +{ + struct pvr_descriptor_size_info size_info; + uint32_t offset; + + assert(stage < ARRAY_SIZE(layout->memory_layout_in_dwords_per_stage)); + assert(desc_idx < binding->descriptor_count); + + pvr_descriptor_size_info_init(device, binding->type, &size_info); + + offset = layout->memory_layout_in_dwords_per_stage[stage].secondary_offset; + offset += binding->per_stage_offset_in_dwords[stage].secondary; + offset += (desc_idx * size_info.secondary); + + /* Offset must be less than 16bits. */ + assert(offset < UINT16_MAX); + + return (uint16_t)offset; +} + +static void pvr_write_sampler_descriptor(uint32_t *primary, + const struct pvr_sampler *sampler) +{ + /* TODO: Implement based on WriteSamplerDescriptor. */ + pvr_finishme("Implement after vkCreateSampler API."); +} + +#define PVR_MAX_DESCRIPTOR_MEM_SIZE_IN_DWORDS (4 * 1024) + +static VkResult +pvr_descriptor_set_create(struct pvr_device *device, + struct pvr_descriptor_pool *pool, + const struct pvr_descriptor_set_layout *layout, + struct pvr_descriptor_set **const descriptor_set_out) +{ + struct pvr_descriptor_set *set; + VkResult result; + size_t size; + void *map; + + size = sizeof(*set) + sizeof(set->descriptors[0]) * layout->descriptor_count; + + /* TODO: Add support to allocate descriptors from descriptor pool, also + * check the required descriptors must not exceed max allowed descriptors. + */ + set = vk_object_zalloc(&device->vk, + &pool->alloc, + size, + VK_OBJECT_TYPE_DESCRIPTOR_SET); + if (!set) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* TODO: Add support to allocate device memory from a common pool. Look at + * something like anv. Also we can allocate a whole chunk of device memory + * for max descriptors supported by pool as done by v3dv. Also check the + * possibility if this can be removed from here and done on need basis. + */ + if (layout->binding_count > 0) { + const uint32_t cache_line_size = + rogue_get_slc_cache_line_size(&device->pdevice->dev_info); + uint64_t bo_size = MIN2(pool->total_size_in_dwords, + PVR_MAX_DESCRIPTOR_MEM_SIZE_IN_DWORDS) * + sizeof(uint32_t); + + result = pvr_bo_alloc(device, + device->heaps.general_heap, + bo_size, + cache_line_size, + PVR_BO_ALLOC_FLAG_CPU_MAPPED, + &set->pvr_bo); + if (result != VK_SUCCESS) + goto err_free_descriptor_set; + } + + set->layout = layout; + set->pool = pool; + + map = set->pvr_bo->bo->map; + for (uint32_t i = 0; i < layout->binding_count; i++) { + const struct pvr_descriptor_set_layout_binding *binding = + &layout->bindings[i]; + + if (binding->descriptor_count == 0 || + (binding->type != VK_DESCRIPTOR_TYPE_SAMPLER && + binding->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)) + continue; + + for (uint32_t stage = 0; + stage < ARRAY_SIZE(binding->per_stage_offset_in_dwords); + stage++) { + if (!(binding->shader_stage_mask & (1U << stage))) + continue; + + for (uint32_t j = 0; j < binding->descriptor_count; j++) { + uint32_t idx = binding->immutable_samplers_index + j; + struct pvr_sampler *sampler = layout->immutable_samplers[idx]; + unsigned int offset_in_dwords = + pvr_get_descriptor_primary_offset(device, + layout, + binding, + stage, + j); + + if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + offset_in_dwords += 4; + + pvr_write_sampler_descriptor(map + + offset_in_dwords * sizeof(uint32_t), + sampler); + } + } + } + + list_addtail(&set->link, &pool->descriptor_sets); + + *descriptor_set_out = set; + + return VK_SUCCESS; + +err_free_descriptor_set: + vk_object_free(&device->vk, &pool->alloc, set); + + return result; +} + +VkResult +pvr_AllocateDescriptorSets(VkDevice _device, + const VkDescriptorSetAllocateInfo *pAllocateInfo, + VkDescriptorSet *pDescriptorSets) +{ + PVR_FROM_HANDLE(pvr_descriptor_pool, pool, pAllocateInfo->descriptorPool); + PVR_FROM_HANDLE(pvr_device, device, _device); + VkResult result; + uint32_t i; + + vk_foreach_struct (ext, pAllocateInfo->pNext) { + pvr_debug_ignored_stype(ext->sType); + } + + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + PVR_FROM_HANDLE(pvr_descriptor_set_layout, + layout, + pAllocateInfo->pSetLayouts[i]); + struct pvr_descriptor_set *set = NULL; + + result = pvr_descriptor_set_create(device, pool, layout, &set); + if (result != VK_SUCCESS) + goto err_free_descriptor_sets; + + pDescriptorSets[i] = pvr_descriptor_set_to_handle(set); + } + + return VK_SUCCESS; + +err_free_descriptor_sets: + pvr_FreeDescriptorSets(_device, + pAllocateInfo->descriptorPool, + i, + pDescriptorSets); + + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) + pDescriptorSets[i] = VK_NULL_HANDLE; + + return result; +} + +VkResult pvr_FreeDescriptorSets(VkDevice _device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet *pDescriptorSets) +{ + PVR_FROM_HANDLE(pvr_descriptor_pool, pool, descriptorPool); + PVR_FROM_HANDLE(pvr_device, device, _device); + + for (uint32_t i = 0; i < count; i++) { + struct pvr_descriptor_set *set; + + if (!pDescriptorSets[i]) + continue; + + set = pvr_descriptor_set_from_handle(pDescriptorSets[i]); + pvr_free_descriptor_set(device, pool, set); + } + + return VK_SUCCESS; +} + +static int pvr_compare_layout_binding(const void *a, const void *b) +{ + uint32_t binding_a; + uint32_t binding_b; + + binding_a = ((struct pvr_descriptor_set_layout_binding *)a)->binding_number; + binding_b = ((struct pvr_descriptor_set_layout_binding *)b)->binding_number; + + if (binding_a < binding_b) + return -1; + + if (binding_a > binding_b) + return 1; + + return 0; +} + +/* This function does not assume that the binding will always exist for a + * particular binding_num. Caller should check before using the return pointer. + */ +static struct pvr_descriptor_set_layout_binding * +pvr_get_descriptor_binding(const struct pvr_descriptor_set_layout *layout, + const uint32_t binding_num) +{ + struct pvr_descriptor_set_layout_binding binding; + binding.binding_number = binding_num; + + return bsearch(&binding, + layout->bindings, + layout->binding_count, + sizeof(binding), + pvr_compare_layout_binding); +} + +static void +pvr_descriptor_update_buffer_info(const struct pvr_device *device, + const VkWriteDescriptorSet *write_set, + struct pvr_descriptor_set *set, + uint32_t *mem_ptr, + uint32_t start_stage, + uint32_t end_stage) +{ + const struct pvr_descriptor_set_layout_binding *binding; + struct pvr_descriptor_size_info size_info; + bool is_dynamic; + + binding = pvr_get_descriptor_binding(set->layout, write_set->dstBinding); + /* Binding should not be NULL. */ + assert(binding); + + is_dynamic = (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) || + (binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC); + + pvr_descriptor_size_info_init(device, binding->type, &size_info); + + /* Only need to update the buffer if it is actually being used. If it was + * not present in any stage, then the shader_stage_mask would be 0 and we + * can skip this update. + */ + if (binding->shader_stage_mask == 0) + return; + + for (uint32_t i = 0; i < write_set->descriptorCount; i++) { + const VkDescriptorBufferInfo *buffer_info = &write_set->pBufferInfo[i]; + PVR_FROM_HANDLE(pvr_buffer, buffer, buffer_info->buffer); + const uint32_t desc_idx = + binding->descriptor_index + write_set->dstArrayElement + i; + uint64_t addr = buffer->dev_addr.addr + buffer_info->offset; + uint32_t range = (buffer_info->range == VK_WHOLE_SIZE) + ? (buffer->size - buffer_info->offset) + : (buffer_info->range); + + set->descriptors[desc_idx].type = write_set->descriptorType; + set->descriptors[desc_idx].buffer_dev_addr.addr = addr; + set->descriptors[desc_idx].buffer_create_info_size = buffer->size; + set->descriptors[desc_idx].buffer_desc_range = range; + + if (is_dynamic) + continue; + + /* Update the entries in the descriptor memory for static buffer. */ + for (uint32_t j = start_stage; j < end_stage; j++) { + uint32_t primary_offset; + uint32_t secondary_offset; + + if (!(binding->shader_stage_mask & (1U << j))) + continue; + + /* Offset calculation functions expect descriptor_index to be + * binding relative not layout relative, so we have used + * write_set->dstArrayElement + i rather than desc_idx. + */ + primary_offset = + pvr_get_descriptor_primary_offset(device, + set->layout, + binding, + j, + write_set->dstArrayElement + i); + secondary_offset = + pvr_get_descriptor_secondary_offset(device, + set->layout, + binding, + j, + write_set->dstArrayElement + i); + + memcpy(mem_ptr + primary_offset, &addr, size_info.primary << 2); + memcpy(mem_ptr + secondary_offset, &range, size_info.secondary << 2); + } + } +} + +void pvr_UpdateDescriptorSets(VkDevice _device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet *pDescriptorCopies) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + + for (uint32_t i = 0; i < descriptorWriteCount; i++) { + const VkWriteDescriptorSet *write_set = &pDescriptorWrites[i]; + PVR_FROM_HANDLE(pvr_descriptor_set, set, write_set->dstSet); + uint32_t *map = set->pvr_bo->bo->map; + + vk_foreach_struct (ext, write_set->pNext) { + pvr_debug_ignored_stype(ext->sType); + } + + switch (write_set->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + pvr_finishme("Update support missing for %d descriptor type\n", + write_set->descriptorType); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + pvr_descriptor_update_buffer_info(device, + write_set, + set, + map, + 0, + PVR_STAGE_ALLOCATION_COUNT); + break; + + default: + unreachable("Unknown descriptor type"); + break; + } + } + + if (descriptorCopyCount > 0) + pvr_finishme("Descriptor copying support missing\n"); +} diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c new file mode 100644 index 00000000000..ece61f91c5d --- /dev/null +++ b/src/imagination/vulkan/pvr_device.c @@ -0,0 +1,2034 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * based in part on v3dv driver which is: + * Copyright © 2019 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hwdef/rogue_hw_utils.h" +#include "pvr_bo.h" +#include "pvr_csb.h" +#include "pvr_device_info.h" +#include "pvr_job_render.h" +#include "pvr_limits.h" +#include "pvr_pds.h" +#include "pvr_private.h" +#include "pvr_winsys.h" +#include "rogue/rogue_compiler.h" +#include "util/build_id.h" +#include "util/log.h" +#include "util/mesa-sha1.h" +#include "util/os_misc.h" +#include "util/u_math.h" +#include "vk_alloc.h" +#include "vk_log.h" +#include "vk_util.h" + +#define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U) +#define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U) +#define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U) + +/* The grow threshold is a percentage. This is intended to be 12.5%, but has + * been rounded up since the percentage is treated as an integer. + */ +#define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U + +#if defined(VK_USE_PLATFORM_DISPLAY_KHR) +# define PVR_USE_WSI_PLATFORM +#endif + +#define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION) + +static const struct vk_instance_extension_table pvr_instance_extensions = { +#if defined(VK_USE_PLATFORM_DISPLAY_KHR) + .KHR_display = true, +#endif + .KHR_external_memory_capabilities = true, + .KHR_get_physical_device_properties2 = true, +#if defined(PVR_USE_WSI_PLATFORM) + .KHR_surface = true, +#endif + .EXT_debug_report = true, + .EXT_debug_utils = true, +}; + +static void pvr_physical_device_get_supported_extensions( + const struct pvr_physical_device *pdevice, + struct vk_device_extension_table *extensions) +{ + /* clang-format off */ + *extensions = (struct vk_device_extension_table){ + .KHR_external_memory = true, + .KHR_external_memory_fd = true, +#if defined(PVR_USE_WSI_PLATFORM) + .KHR_swapchain = true, +#endif + .EXT_external_memory_dma_buf = true, + .EXT_private_data = true, + }; + /* clang-format on */ +} + +VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion) +{ + *pApiVersion = PVR_API_VERSION; + return VK_SUCCESS; +} + +VkResult +pvr_EnumerateInstanceExtensionProperties(const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) +{ + if (pLayerName) + return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); + + return vk_enumerate_instance_extension_properties(&pvr_instance_extensions, + pPropertyCount, + pProperties); +} + +VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *pInstance) +{ + struct vk_instance_dispatch_table dispatch_table; + struct pvr_instance *instance; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + + if (!pAllocator) + pAllocator = vk_default_allocator(); + + instance = vk_alloc(pAllocator, + sizeof(*instance), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!instance) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_instance_dispatch_table_from_entrypoints(&dispatch_table, + &pvr_instance_entrypoints, + true); + + vk_instance_dispatch_table_from_entrypoints(&dispatch_table, + &wsi_instance_entrypoints, + false); + + result = vk_instance_init(&instance->vk, + &pvr_instance_extensions, + &dispatch_table, + pCreateInfo, + pAllocator); + if (result != VK_SUCCESS) { + vk_free(pAllocator, instance); + return vk_error(NULL, result); + } + + instance->physical_devices_count = -1; + + VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); + + *pInstance = pvr_instance_to_handle(instance); + + return VK_SUCCESS; +} + +static void pvr_physical_device_finish(struct pvr_physical_device *pdevice) +{ + /* Be careful here. The device might not have been initialized. This can + * happen since initialization is done in vkEnumeratePhysicalDevices() but + * finish is done in vkDestroyInstance(). Make sure that you check for NULL + * before freeing or that the freeing functions accept NULL pointers. + */ + + if (pdevice->compiler) + rogue_compiler_destroy(pdevice->compiler); + + pvr_wsi_finish(pdevice); + + free(pdevice->name); + + if (pdevice->ws) + pvr_winsys_destroy(pdevice->ws); + + if (pdevice->master_fd >= 0) { + vk_free(&pdevice->vk.instance->alloc, pdevice->master_path); + close(pdevice->master_fd); + } + + if (pdevice->render_fd >= 0) { + vk_free(&pdevice->vk.instance->alloc, pdevice->render_path); + close(pdevice->render_fd); + } + vk_physical_device_finish(&pdevice->vk); +} + +void pvr_DestroyInstance(VkInstance _instance, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_instance, instance, _instance); + + if (!instance) + return; + + pvr_physical_device_finish(&instance->physical_device); + + VG(VALGRIND_DESTROY_MEMPOOL(instance)); + + vk_instance_finish(&instance->vk); + vk_free(&instance->vk.alloc, instance); +} + +static VkResult +pvr_physical_device_init_uuids(struct pvr_physical_device *pdevice) +{ + struct mesa_sha1 sha1_ctx; + unsigned build_id_len; + uint8_t sha1[20]; + uint64_t bvnc; + + const struct build_id_note *note = + build_id_find_nhdr_for_addr(pvr_physical_device_init_uuids); + if (!note) { + return vk_errorf(pdevice, + VK_ERROR_INITIALIZATION_FAILED, + "Failed to find build-id"); + } + + build_id_len = build_id_length(note); + if (build_id_len < 20) { + return vk_errorf(pdevice, + VK_ERROR_INITIALIZATION_FAILED, + "Build-id too short. It needs to be a SHA"); + } + + bvnc = pvr_get_packed_bvnc(&pdevice->dev_info); + + _mesa_sha1_init(&sha1_ctx); + _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len); + _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc)); + _mesa_sha1_final(&sha1_ctx, sha1); + memcpy(pdevice->pipeline_cache_uuid, sha1, VK_UUID_SIZE); + + return VK_SUCCESS; +} + +static uint64_t pvr_compute_heap_size(void) +{ + /* Query the total ram from the system */ + uint64_t total_ram; + if (!os_get_total_physical_memory(&total_ram)) + return 0; + + /* We don't want to burn too much ram with the GPU. If the user has 4GiB + * or less, we use at most half. If they have more than 4GiB, we use 3/4. + */ + uint64_t available_ram; + if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL) + available_ram = total_ram / 2U; + else + available_ram = total_ram * 3U / 4U; + + return available_ram; +} + +static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice, + struct pvr_instance *instance, + drmDevicePtr drm_render_device, + drmDevicePtr drm_primary_device) +{ + const char *path = drm_render_device->nodes[DRM_NODE_RENDER]; + struct vk_device_extension_table supported_extensions; + struct vk_physical_device_dispatch_table dispatch_table; + const char *primary_path; + VkResult result; + int ret; + + if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) { + return vk_errorf(instance, + VK_ERROR_INCOMPATIBLE_DRIVER, + "WARNING: powervr is not a conformant Vulkan " + "implementation. Pass " + "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know " + "what you're doing."); + } + + pvr_physical_device_get_supported_extensions(pdevice, &supported_extensions); + + vk_physical_device_dispatch_table_from_entrypoints( + &dispatch_table, + &pvr_physical_device_entrypoints, + true); + + vk_physical_device_dispatch_table_from_entrypoints( + &dispatch_table, + &wsi_physical_device_entrypoints, + false); + + result = vk_physical_device_init(&pdevice->vk, + &instance->vk, + &supported_extensions, + &dispatch_table); + if (result != VK_SUCCESS) + return result; + + pdevice->instance = instance; + + pdevice->render_fd = open(path, O_RDWR | O_CLOEXEC); + if (pdevice->render_fd < 0) { + result = vk_errorf(instance, + VK_ERROR_INCOMPATIBLE_DRIVER, + "Failed to open device %s", + path); + goto err_vk_physical_device_finish; + } + + pdevice->render_path = vk_strdup(&pdevice->vk.instance->alloc, + path, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!pdevice->render_path) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto err_close_render_fd; + } + + if (instance->vk.enabled_extensions.KHR_display) { + primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY]; + + pdevice->master_fd = open(primary_path, O_RDWR | O_CLOEXEC); + } else { + pdevice->master_fd = -1; + } + + if (pdevice->master_fd >= 0) { + pdevice->master_path = vk_strdup(&pdevice->vk.instance->alloc, + primary_path, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!pdevice->master_path) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto err_close_master_fd; + } + } else { + pdevice->master_path = NULL; + } + + pdevice->ws = pvr_winsys_create(pdevice->master_fd, + pdevice->render_fd, + &pdevice->vk.instance->alloc); + if (!pdevice->ws) { + result = VK_ERROR_INITIALIZATION_FAILED; + goto err_vk_free_master_path; + } + + ret = pdevice->ws->ops->device_info_init(pdevice->ws, &pdevice->dev_info); + if (ret) { + result = VK_ERROR_INITIALIZATION_FAILED; + goto err_pvr_winsys_destroy; + } + + result = pvr_physical_device_init_uuids(pdevice); + if (result != VK_SUCCESS) + goto err_pvr_winsys_destroy; + + if (asprintf(&pdevice->name, + "Imagination PowerVR %s %s", + pdevice->dev_info.ident.series_name, + pdevice->dev_info.ident.public_name) < 0) { + result = vk_errorf(instance, + VK_ERROR_OUT_OF_HOST_MEMORY, + "Unable to allocate memory to store device name"); + goto err_pvr_winsys_destroy; + } + + /* Setup available memory heaps and types */ + pdevice->memory.memoryHeapCount = 1; + pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size(); + pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; + + pdevice->memory.memoryTypeCount = 1; + pdevice->memory.memoryTypes[0].propertyFlags = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + pdevice->memory.memoryTypes[0].heapIndex = 0; + + result = pvr_wsi_init(pdevice); + if (result != VK_SUCCESS) { + vk_error(instance, result); + goto err_free_name; + } + + pdevice->compiler = rogue_compiler_create(&pdevice->dev_info); + if (!pdevice->compiler) { + result = vk_errorf(instance, + VK_ERROR_INITIALIZATION_FAILED, + "Failed to initialize Rogue compiler"); + goto err_wsi_finish; + } + + return VK_SUCCESS; + +err_wsi_finish: + pvr_wsi_finish(pdevice); + +err_free_name: + free(pdevice->name); + +err_pvr_winsys_destroy: + pvr_winsys_destroy(pdevice->ws); + +err_vk_free_master_path: + vk_free(&pdevice->vk.instance->alloc, pdevice->master_path); + +err_close_master_fd: + if (pdevice->master_fd >= 0) + close(pdevice->master_fd); + + vk_free(&pdevice->vk.instance->alloc, pdevice->render_path); + +err_close_render_fd: + close(pdevice->render_fd); + +err_vk_physical_device_finish: + vk_physical_device_finish(&pdevice->vk); + + return result; +} + +static VkResult pvr_enumerate_devices(struct pvr_instance *instance) +{ + /* FIXME: It should be possible to query the number of devices via + * drmGetDevices2 by passing in NULL for the 'devices' parameter. However, + * this was broken by libdrm commit + * 8cb12a2528d795c45bba5f03b3486b4040fb0f45, so, until this is fixed in + * upstream, hard-code the maximum number of devices. + */ + drmDevicePtr drm_primary_device = NULL; + drmDevicePtr drm_render_device = NULL; + drmDevicePtr drm_devices[8]; + int max_drm_devices; + VkResult result; + + instance->physical_devices_count = 0; + + max_drm_devices = drmGetDevices2(0, drm_devices, ARRAY_SIZE(drm_devices)); + if (max_drm_devices < 1) + return VK_SUCCESS; + + for (unsigned i = 0; i < (unsigned)max_drm_devices; i++) { + if (drm_devices[i]->bustype != DRM_BUS_PLATFORM) + continue; + + if (drm_devices[i]->available_nodes & (1 << DRM_NODE_RENDER)) { + char **compat; + + compat = drm_devices[i]->deviceinfo.platform->compatible; + while (*compat) { + if (strncmp(*compat, "mediatek,mt8173-gpu", 19) == 0) { + drm_render_device = drm_devices[i]; + + mesa_logd("Found compatible render device '%s'.", + drm_render_device->nodes[DRM_NODE_RENDER]); + break; + } + compat++; + } + } else if (drm_devices[i]->available_nodes & 1 << DRM_NODE_PRIMARY) { + char **compat; + + compat = drm_devices[i]->deviceinfo.platform->compatible; + while (*compat) { + if (strncmp(*compat, "mediatek-drm", 12) == 0) { + drm_primary_device = drm_devices[i]; + + mesa_logd("Found compatible primary device '%s'.", + drm_primary_device->nodes[DRM_NODE_PRIMARY]); + break; + } + compat++; + } + } + } + + if (drm_render_device && drm_primary_device) { + result = pvr_physical_device_init(&instance->physical_device, + instance, + drm_render_device, + drm_primary_device); + if (result == VK_SUCCESS) + instance->physical_devices_count = 1; + else if (result == VK_ERROR_INCOMPATIBLE_DRIVER) + result = VK_SUCCESS; + } else { + result = VK_SUCCESS; + } + + drmFreeDevices(drm_devices, max_drm_devices); + + return result; +} + +VkResult pvr_EnumeratePhysicalDevices(VkInstance _instance, + uint32_t *pPhysicalDeviceCount, + VkPhysicalDevice *pPhysicalDevices) +{ + PVR_FROM_HANDLE(pvr_instance, instance, _instance); + VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount); + VkResult result; + + if (instance->physical_devices_count < 0) { + result = pvr_enumerate_devices(instance); + if (result != VK_SUCCESS) + return result; + } + + if (instance->physical_devices_count == 0) + return VK_SUCCESS; + + assert(instance->physical_devices_count == 1); + vk_outarray_append (&out, p) { + *p = pvr_physical_device_to_handle(&instance->physical_device); + } + + return vk_outarray_status(&out); +} + +void pvr_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2 *pFeatures) +{ + PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); + + pFeatures->features = (VkPhysicalDeviceFeatures){ + .robustBufferAccess = + PVR_HAS_FEATURE(&pdevice->dev_info, robust_buffer_access), + .fullDrawIndexUint32 = true, + .imageCubeArray = true, + .independentBlend = true, + .geometryShader = false, + .tessellationShader = false, + .sampleRateShading = true, + .dualSrcBlend = false, + .logicOp = true, + .multiDrawIndirect = true, + .drawIndirectFirstInstance = true, + .depthClamp = true, + .depthBiasClamp = true, + .fillModeNonSolid = false, + .depthBounds = false, + .wideLines = true, + .largePoints = true, + .alphaToOne = true, + .multiViewport = false, + .samplerAnisotropy = true, + .textureCompressionETC2 = true, + .textureCompressionASTC_LDR = PVR_HAS_FEATURE(&pdevice->dev_info, astc), + .textureCompressionBC = false, + .occlusionQueryPrecise = true, + .pipelineStatisticsQuery = false, + .vertexPipelineStoresAndAtomics = true, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = false, + .shaderImageGatherExtended = false, + .shaderStorageImageExtendedFormats = true, + .shaderStorageImageMultisample = false, + .shaderStorageImageReadWithoutFormat = true, + .shaderStorageImageWriteWithoutFormat = false, + .shaderUniformBufferArrayDynamicIndexing = true, + .shaderSampledImageArrayDynamicIndexing = true, + .shaderStorageBufferArrayDynamicIndexing = true, + .shaderStorageImageArrayDynamicIndexing = true, + .shaderClipDistance = true, + .shaderCullDistance = true, + .shaderFloat64 = false, + .shaderInt64 = true, + .shaderInt16 = true, + .shaderResourceResidency = false, + .shaderResourceMinLod = false, + .sparseBinding = false, + .sparseResidencyBuffer = false, + .sparseResidencyImage2D = false, + .sparseResidencyImage3D = false, + .sparseResidency2Samples = false, + .sparseResidency4Samples = false, + .sparseResidency8Samples = false, + .sparseResidency16Samples = false, + .sparseResidencyAliased = false, + .variableMultisampleRate = false, + .inheritedQueries = false, + }; + + vk_foreach_struct (ext, pFeatures->pNext) { + pvr_debug_ignored_stype(ext->sType); + } +} + +/* clang-format off */ +/* FIXME: Clang-format places multiple initializers on the same line, fix this + * and remove clang-format on/off comments. + */ +static const struct pvr_descriptor_limits bvnc_4_V_2_51_descriptor_limits = { + .max_per_stage_resources = 456U, + .max_per_stage_samplers = 64U, + .max_per_stage_uniform_buffers = 96U, + .max_per_stage_storage_buffers = 96U, + .max_per_stage_sampled_images = 128U, + .max_per_stage_storage_images = 64U, + .max_per_stage_input_attachments = 8U, +}; +/* clang-format on */ + +static const struct pvr_descriptor_limits * +pvr_get_physical_device_descriptor_limits(struct pvr_physical_device *pdevice) +{ + /* Series 6XT - GX6x50 - Clyde */ + if (pdevice->dev_info.ident.b == 4 && pdevice->dev_info.ident.n == 2) + return &bvnc_4_V_2_51_descriptor_limits; + + vk_errorf(pdevice, + VK_ERROR_INCOMPATIBLE_DRIVER, + "No device ID found for BVNC %d.%d.%d.%d", + pdevice->dev_info.ident.b, + pdevice->dev_info.ident.v, + pdevice->dev_info.ident.n, + pdevice->dev_info.ident.c); + + assert(false); + + return NULL; +} + +void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties2 *pProperties) +{ + PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); + const struct pvr_descriptor_limits *descriptor_limits = + pvr_get_physical_device_descriptor_limits(pdevice); + + /* Default value based on the minimum value found in all existing cores. */ + const uint32_t max_multisample = + PVR_GET_FEATURE_VALUE(&pdevice->dev_info, max_multisample, 4); + + /* Default value based on the minimum value found in all existing cores. */ + const uint32_t uvs_banks = + PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_banks, 2); + + /* Default value based on the minimum value found in all existing cores. */ + const uint32_t uvs_pba_entries = + PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_pba_entries, 160); + + /* Default value based on the minimum value found in all existing cores. */ + const uint32_t num_user_clip_planes = + PVR_GET_FEATURE_VALUE(&pdevice->dev_info, num_user_clip_planes, 8); + + const uint32_t sub_pixel_precision = + PVR_HAS_FEATURE(&pdevice->dev_info, simple_internal_parameter_format) + ? 4U + : 8U; + + const uint32_t max_render_size = + rogue_get_render_size_max(&pdevice->dev_info); + + const uint32_t max_sample_bits = ((max_multisample << 1) - 1); + + const uint32_t max_user_vertex_components = + ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U; + + VkPhysicalDeviceLimits limits = { + .maxImageDimension1D = max_render_size, + .maxImageDimension2D = max_render_size, + .maxImageDimension3D = 2U * 1024U, + .maxImageDimensionCube = max_render_size, + .maxImageArrayLayers = 2U * 1024U, + .maxTexelBufferElements = 64U * 1024U, + .maxUniformBufferRange = 128U * 1024U * 1024U, + .maxStorageBufferRange = 128U * 1024U * 1024U, + .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE, + .maxMemoryAllocationCount = UINT32_MAX, + .maxSamplerAllocationCount = UINT32_MAX, + .bufferImageGranularity = 1U, + .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL, + + /* Maximum number of descriptor sets that can be bound at the same time. + */ + .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS, + + .maxPerStageResources = descriptor_limits->max_per_stage_resources, + .maxPerStageDescriptorSamplers = + descriptor_limits->max_per_stage_samplers, + .maxPerStageDescriptorUniformBuffers = + descriptor_limits->max_per_stage_uniform_buffers, + .maxPerStageDescriptorStorageBuffers = + descriptor_limits->max_per_stage_storage_buffers, + .maxPerStageDescriptorSampledImages = + descriptor_limits->max_per_stage_sampled_images, + .maxPerStageDescriptorStorageImages = + descriptor_limits->max_per_stage_storage_images, + .maxPerStageDescriptorInputAttachments = + descriptor_limits->max_per_stage_input_attachments, + + .maxDescriptorSetSamplers = 256U, + .maxDescriptorSetUniformBuffers = 256U, + .maxDescriptorSetUniformBuffersDynamic = 8U, + .maxDescriptorSetStorageBuffers = 256U, + .maxDescriptorSetStorageBuffersDynamic = 8U, + .maxDescriptorSetSampledImages = 256U, + .maxDescriptorSetStorageImages = 256U, + .maxDescriptorSetInputAttachments = 256U, + + /* Vertex Shader Limits */ + .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS, + .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS, + .maxVertexInputAttributeOffset = 0xFFFF, + .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U, + .maxVertexOutputComponents = max_user_vertex_components, + + /* Tessellation Limits */ + .maxTessellationGenerationLevel = 0, + .maxTessellationPatchSize = 0, + .maxTessellationControlPerVertexInputComponents = 0, + .maxTessellationControlPerVertexOutputComponents = 0, + .maxTessellationControlPerPatchOutputComponents = 0, + .maxTessellationControlTotalOutputComponents = 0, + .maxTessellationEvaluationInputComponents = 0, + .maxTessellationEvaluationOutputComponents = 0, + + /* Geometry Shader Limits */ + .maxGeometryShaderInvocations = 32U, + .maxGeometryInputComponents = max_user_vertex_components, + .maxGeometryOutputComponents = max_user_vertex_components, + .maxGeometryOutputVertices = 256U, + .maxGeometryTotalOutputComponents = 1024U, + + /* Fragment Shader Limits */ + .maxFragmentInputComponents = max_user_vertex_components, + .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS, + .maxFragmentDualSrcAttachments = 0, + .maxFragmentCombinedOutputResources = 8U, + + /* Compute Shader Limits */ + .maxComputeSharedMemorySize = 16U * 1024U, + .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U }, + .maxComputeWorkGroupInvocations = 512U, + .maxComputeWorkGroupSize = { 512U, 512U, 64U }, + + /* Rasterization Limits */ + .subPixelPrecisionBits = sub_pixel_precision, + .subTexelPrecisionBits = 8U, + .mipmapPrecisionBits = 4U, + + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U, + .maxSamplerLodBias = 15.0f, + .maxSamplerAnisotropy = 16.0f, + .maxViewports = PVR_MAX_VIEWPORTS, + + .maxViewportDimensions[0] = max_render_size, + .maxViewportDimensions[1] = max_render_size, + .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size), + .viewportBoundsRange[1] = 2U * max_render_size, + + .viewportSubPixelBits = 0, + .minMemoryMapAlignment = 64U, + .minTexelBufferOffsetAlignment = 16U, + .minUniformBufferOffsetAlignment = 4U, + .minStorageBufferOffsetAlignment = 4U, + + .minTexelOffset = -8, + .maxTexelOffset = 7U, + .minTexelGatherOffset = 0, + .maxTexelGatherOffset = 0, + .minInterpolationOffset = -0.5, + .maxInterpolationOffset = 0.5, + .subPixelInterpolationOffsetBits = 4U, + + .maxFramebufferWidth = max_render_size, + .maxFramebufferHeight = max_render_size, + .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS, + + .framebufferColorSampleCounts = max_sample_bits, + .framebufferDepthSampleCounts = max_sample_bits, + .framebufferStencilSampleCounts = max_sample_bits, + .framebufferNoAttachmentsSampleCounts = max_sample_bits, + .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS, + .sampledImageColorSampleCounts = max_sample_bits, + .sampledImageIntegerSampleCounts = max_sample_bits, + .sampledImageDepthSampleCounts = max_sample_bits, + .sampledImageStencilSampleCounts = max_sample_bits, + .storageImageSampleCounts = max_sample_bits, + .maxSampleMaskWords = 1U, + .timestampComputeAndGraphics = false, + .timestampPeriod = 0.0f, + .maxClipDistances = num_user_clip_planes, + .maxCullDistances = num_user_clip_planes, + .maxCombinedClipAndCullDistances = num_user_clip_planes, + .discreteQueuePriorities = 2U, + .pointSizeRange[0] = 1.0f, + .pointSizeRange[1] = 511.0f, + .pointSizeGranularity = 0.0625f, + .lineWidthRange[0] = 1.0f / 16.0f, + .lineWidthRange[1] = 16.0f, + .lineWidthGranularity = 1.0f / 16.0f, + .strictLines = false, + .standardSampleLocations = true, + .optimalBufferCopyOffsetAlignment = 4U, + .optimalBufferCopyRowPitchAlignment = 4U, + .nonCoherentAtomSize = 1U, + }; + + pProperties->properties = (VkPhysicalDeviceProperties){ + .apiVersion = PVR_API_VERSION, + .driverVersion = vk_get_driver_version(), + .vendorID = VK_VENDOR_ID_IMAGINATION, + .deviceID = pdevice->dev_info.ident.device_id, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + .limits = limits, + .sparseProperties = { 0 }, + }; + + snprintf(pProperties->properties.deviceName, + sizeof(pProperties->properties.deviceName), + "%s", + pdevice->name); + + memcpy(pProperties->properties.pipelineCacheUUID, + pdevice->pipeline_cache_uuid, + VK_UUID_SIZE); + + vk_foreach_struct (ext, pProperties->pNext) { + pvr_debug_ignored_stype(ext->sType); + } +} + +const static VkQueueFamilyProperties pvr_queue_family_properties = { + .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = PVR_MAX_QUEUES, + .timestampValidBits = 0, + .minImageTransferGranularity = { 1, 1, 1 }, +}; + +void pvr_GetPhysicalDeviceQueueFamilyProperties( + VkPhysicalDevice physicalDevice, + uint32_t *pCount, + VkQueueFamilyProperties *pQueueFamilyProperties) +{ + VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pCount); + + vk_outarray_append (&out, p) { + *p = pvr_queue_family_properties; + } +} + +void pvr_GetPhysicalDeviceQueueFamilyProperties2( + VkPhysicalDevice physicalDevice, + uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties2 *pQueueFamilyProperties) +{ + VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount); + + vk_outarray_append (&out, p) { + p->queueFamilyProperties = pvr_queue_family_properties; + + vk_foreach_struct (ext, p->pNext) { + pvr_debug_ignored_stype(ext->sType); + } + } +} + +void pvr_GetPhysicalDeviceMemoryProperties2( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) +{ + PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); + + pMemoryProperties->memoryProperties = pdevice->memory; + + vk_foreach_struct (ext, pMemoryProperties->pNext) { + pvr_debug_ignored_stype(ext->sType); + } +} + +PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance, + const char *pName) +{ + PVR_FROM_HANDLE(pvr_instance, instance, _instance); + return vk_instance_get_proc_addr(&instance->vk, + &pvr_instance_entrypoints, + pName); +} + +/* With version 1+ of the loader interface the ICD should expose + * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in + * apps. + */ +PUBLIC +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName) +{ + return pvr_GetInstanceProcAddr(instance, pName); +} + +/* With version 4+ of the loader interface the ICD should expose + * vk_icdGetPhysicalDeviceProcAddr(). + */ +PUBLIC +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName) +{ + PVR_FROM_HANDLE(pvr_instance, instance, _instance); + return vk_instance_get_physical_device_proc_addr(&instance->vk, pName); +} + +static VkResult pvr_device_init_compute_pds_program(struct pvr_device *device) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); + struct pvr_pds_compute_shader_program program = { 0U }; + size_t staging_buffer_size; + uint32_t *staging_buffer; + uint32_t *data_buffer; + uint32_t *code_buffer; + VkResult result; + + STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) == + ARRAY_SIZE(program.work_group_input_regs)); + STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) == + ARRAY_SIZE(program.global_input_regs)); + + /* Initialize PDS structure. */ + for (uint32_t i = 0U; i < ARRAY_SIZE(program.local_input_regs); i++) { + program.local_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED; + program.work_group_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED; + program.global_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED; + } + + program.barrier_coefficient = PVR_PDS_COMPUTE_INPUT_REG_UNUSED; + + /* Fence kernel. */ + program.fence = true; + program.clear_pds_barrier = true; + + /* Calculate how much space we'll need for the compute shader PDS program. + */ + pvr_pds_set_sizes_compute_shader(&program, dev_info); + + /* FIXME: Fix the below inconsistency of code size being in bytes whereas + * data size being in dwords. + */ + /* Code size is in bytes, data size in dwords. */ + staging_buffer_size = + program.data_size * sizeof(uint32_t) + program.code_size; + + staging_buffer = vk_alloc(&device->vk.alloc, + staging_buffer_size, + 8U, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!staging_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + data_buffer = staging_buffer; + code_buffer = pvr_pds_generate_compute_shader_data_segment(&program, + data_buffer, + dev_info); + pvr_pds_generate_compute_shader_code_segment(&program, + code_buffer, + dev_info); + result = pvr_gpu_upload_pds(device, + data_buffer, + program.data_size, + PVRX(CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT), + code_buffer, + program.code_size / sizeof(uint32_t), + PVRX(CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT), + cache_line_size, + &device->pds_compute_fence_program); + + vk_free(&device->vk.alloc, staging_buffer); + + return result; +} + +/* FIXME: We should be calculating the size when we upload the code in + * pvr_srv_setup_static_pixel_event_program(). + */ +static void pvr_device_get_pixel_event_pds_program_data_size( + uint32_t *const data_size_in_dwords_out) +{ + struct pvr_pds_event_program program = { + /* No data to DMA, just a DOUTU needed. */ + .num_emit_word_pairs = 0, + }; + + pvr_pds_set_sizes_pixel_event(&program); + + *data_size_in_dwords_out = program.data_size; +} + +VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDevice *pDevice) +{ + PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); + struct pvr_instance *instance = pdevice->instance; + struct vk_device_dispatch_table dispatch_table; + struct pvr_device *device; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + + device = vk_alloc2(&pdevice->vk.instance->alloc, + pAllocator, + sizeof(*device), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!device) + return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &pvr_device_entrypoints, + true); + + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &wsi_device_entrypoints, + false); + + result = vk_device_init(&device->vk, + &pdevice->vk, + &dispatch_table, + pCreateInfo, + pAllocator); + if (result != VK_SUCCESS) + goto err_free_device; + + device->render_fd = open(pdevice->render_path, O_RDWR | O_CLOEXEC); + if (device->render_fd < 0) { + result = vk_errorf(instance, + VK_ERROR_INITIALIZATION_FAILED, + "Failed to open device %s", + pdevice->render_path); + goto err_vk_device_finish; + } + + if (pdevice->master_path) + device->master_fd = open(pdevice->master_path, O_RDWR | O_CLOEXEC); + else + device->master_fd = -1; + + device->instance = instance; + device->pdevice = pdevice; + + if (pAllocator) + device->vk.alloc = *pAllocator; + else + device->vk.alloc = pdevice->vk.instance->alloc; + + device->ws = pvr_winsys_create(device->master_fd, + device->render_fd, + &device->vk.alloc); + if (!device->ws) { + result = VK_ERROR_INITIALIZATION_FAILED; + goto err_close_master_fd; + } + + device->ws->ops->get_heaps_info(device->ws, &device->heaps); + + result = pvr_free_list_create(device, + PVR_GLOBAL_FREE_LIST_INITIAL_SIZE, + PVR_GLOBAL_FREE_LIST_MAX_SIZE, + PVR_GLOBAL_FREE_LIST_GROW_SIZE, + PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD, + NULL /* parent_free_list */, + &device->global_free_list); + if (result != VK_SUCCESS) + goto err_pvr_winsys_destroy; + + result = pvr_queues_create(device, pCreateInfo); + if (result != VK_SUCCESS) + goto err_pvr_free_list_destroy; + + result = pvr_device_init_compute_pds_program(device); + if (result != VK_SUCCESS) + goto err_pvr_queues_destroy; + + if (pCreateInfo->pEnabledFeatures) + memcpy(&device->features, + pCreateInfo->pEnabledFeatures, + sizeof(device->features)); + + /* FIXME: Move this to a later stage and possibly somewhere other than + * pvr_device. The purpose of this is so that we don't have to get the size + * on each kick. + */ + pvr_device_get_pixel_event_pds_program_data_size( + &device->pixel_event_data_size_in_dwords); + + device->global_queue_job_count = 0; + device->global_queue_present_count = 0; + + *pDevice = pvr_device_to_handle(device); + + return VK_SUCCESS; + +err_pvr_queues_destroy: + pvr_queues_destroy(device); + +err_pvr_free_list_destroy: + pvr_free_list_destroy(device->global_free_list); + +err_pvr_winsys_destroy: + pvr_winsys_destroy(device->ws); + +err_close_master_fd: + if (device->master_fd >= 0) + close(device->master_fd); + + close(device->render_fd); + +err_vk_device_finish: + vk_device_finish(&device->vk); + +err_free_device: + vk_free(&device->vk.alloc, device); + + return result; +} + +void pvr_DestroyDevice(VkDevice _device, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + + pvr_bo_free(device, device->pds_compute_fence_program.pvr_bo); + pvr_queues_destroy(device); + pvr_free_list_destroy(device->global_free_list); + pvr_winsys_destroy(device->ws); + close(device->render_fd); + vk_device_finish(&device->vk); + vk_free(&device->vk.alloc, device); +} + +VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, + VkLayerProperties *pProperties) +{ + if (!pProperties) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); +} + +VkResult pvr_AllocateMemory(VkDevice _device, + const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *pAllocator, + VkDeviceMemory *pMem) +{ + const VkImportMemoryFdInfoKHR *fd_info = NULL; + PVR_FROM_HANDLE(pvr_device, device, _device); + enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU; + struct pvr_device_memory *mem; + VkResult result; + + assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); + assert(pAllocateInfo->allocationSize > 0); + + mem = vk_object_alloc(&device->vk, + pAllocator, + sizeof(*mem), + VK_OBJECT_TYPE_DEVICE_MEMORY); + if (!mem) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_foreach_struct_const (ext, pAllocateInfo->pNext) { + switch ((unsigned)ext->sType) { + case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA: + type = PVR_WINSYS_BO_TYPE_DISPLAY; + break; + case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR: + fd_info = (void *)ext; + break; + default: + pvr_debug_ignored_stype(ext->sType); + break; + } + } + + if (fd_info && fd_info->handleType) { + VkDeviceSize aligned_alloc_size = + ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size); + + assert( + fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || + fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + + result = device->ws->ops->buffer_create_from_fd(device->ws, + fd_info->fd, + &mem->bo); + if (result != VK_SUCCESS) + goto err_vk_object_free_mem; + + /* For security purposes, we reject importing the bo if it's smaller + * than the requested allocation size. This prevents a malicious client + * from passing a buffer to a trusted client, lying about the size, and + * telling the trusted client to try and texture from an image that goes + * out-of-bounds. This sort of thing could lead to GPU hangs or worse + * in the trusted client. The trusted client can protect itself against + * this sort of attack but only if it can trust the buffer size. + */ + if (aligned_alloc_size > mem->bo->size) { + result = vk_errorf(device, + VK_ERROR_INVALID_EXTERNAL_HANDLE, + "Aligned requested size too large for the given fd " + "%" PRIu64 "B > %" PRIu64 "B", + pAllocateInfo->allocationSize, + mem->bo->size); + device->ws->ops->buffer_destroy(mem->bo); + goto err_vk_object_free_mem; + } + + /* From the Vulkan spec: + * + * "Importing memory from a file descriptor transfers ownership of + * the file descriptor from the application to the Vulkan + * implementation. The application must not perform any operations on + * the file descriptor after a successful import." + * + * If the import fails, we leave the file descriptor open. + */ + close(fd_info->fd); + } else { + /* Align physical allocations to the page size of the heap that will be + * used when binding device memory (see pvr_bind_memory()) to ensure the + * entire allocation can be mapped. + */ + const uint64_t alignment = device->heaps.general_heap->page_size; + + /* FIXME: Need to determine the flags based on + * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags. + * + * The alternative would be to store the flags alongside the memory + * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so + * that they can be looked up. + */ + result = device->ws->ops->buffer_create(device->ws, + pAllocateInfo->allocationSize, + alignment, + type, + PVR_WINSYS_BO_FLAG_CPU_ACCESS, + &mem->bo); + if (result != VK_SUCCESS) + goto err_vk_object_free_mem; + } + + *pMem = pvr_device_memory_to_handle(mem); + + return VK_SUCCESS; + +err_vk_object_free_mem: + vk_object_free(&device->vk, pAllocator, mem); + + return result; +} + +VkResult pvr_GetMemoryFdKHR(VkDevice _device, + const VkMemoryGetFdInfoKHR *pGetFdInfo, + int *pFd) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory); + + assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR); + + assert( + pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || + pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + + return device->ws->ops->buffer_get_fd(mem->bo, pFd); +} + +VkResult +pvr_GetMemoryFdPropertiesKHR(VkDevice _device, + VkExternalMemoryHandleTypeFlagBits handleType, + int fd, + VkMemoryFdPropertiesKHR *pMemoryFdProperties) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + + switch (handleType) { + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: + /* FIXME: This should only allow memory types having + * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as + * dma-buf should be imported using cacheable memory types, + * given exporter's mmap will always map it as cacheable. + * Ref: + * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops + */ + pMemoryFdProperties->memoryTypeBits = + (1 << device->pdevice->memory.memoryTypeCount) - 1; + return VK_SUCCESS; + default: + return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); + } +} + +void pvr_FreeMemory(VkDevice _device, + VkDeviceMemory _mem, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_device_memory, mem, _mem); + + if (!mem) + return; + + device->ws->ops->buffer_destroy(mem->bo); + + vk_object_free(&device->vk, pAllocator, mem); +} + +VkResult pvr_MapMemory(VkDevice _device, + VkDeviceMemory _memory, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void **ppData) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_device_memory, mem, _memory); + void *map; + + if (!mem) { + *ppData = NULL; + return VK_SUCCESS; + } + + if (size == VK_WHOLE_SIZE) + size = mem->bo->size - offset; + + /* From the Vulkan spec version 1.0.32 docs for MapMemory: + * + * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0 + * assert(size != 0); + * * If size is not equal to VK_WHOLE_SIZE, size must be less than or + * equal to the size of the memory minus offset + */ + + assert(size > 0); + assert(offset + size <= mem->bo->size); + + /* Check if already mapped */ + if (mem->bo->map) { + *ppData = mem->bo->map + offset; + return VK_SUCCESS; + } + + /* Map it all at once */ + map = device->ws->ops->buffer_map(mem->bo); + if (!map) + return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED); + + *ppData = map + offset; + + return VK_SUCCESS; +} + +void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_device_memory, mem, _memory); + + if (!mem || !mem->bo->map) + return; + + device->ws->ops->buffer_unmap(mem->bo); +} + +VkResult pvr_FlushMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +VkResult +pvr_InvalidateMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +void pvr_GetImageSparseMemoryRequirements2( + VkDevice device, + const VkImageSparseMemoryRequirementsInfo2 *pInfo, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) +{ + *pSparseMemoryRequirementCount = 0; +} + +void pvr_GetDeviceMemoryCommitment(VkDevice device, + VkDeviceMemory memory, + VkDeviceSize *pCommittedMemoryInBytes) +{ + *pCommittedMemoryInBytes = 0; +} + +VkResult pvr_bind_memory(struct pvr_device *device, + struct pvr_device_memory *mem, + VkDeviceSize offset, + VkDeviceSize size, + VkDeviceSize alignment, + struct pvr_winsys_vma **const vma_out, + pvr_dev_addr_t *const dev_addr_out) +{ + VkDeviceSize virt_size = + size + (offset & (device->heaps.general_heap->page_size - 1)); + struct pvr_winsys_vma *vma; + pvr_dev_addr_t dev_addr; + + /* Valid usage: + * + * "memoryOffset must be an integer multiple of the alignment member of + * the VkMemoryRequirements structure returned from a call to + * vkGetBufferMemoryRequirements with buffer" + * + * "memoryOffset must be an integer multiple of the alignment member of + * the VkMemoryRequirements structure returned from a call to + * vkGetImageMemoryRequirements with image" + */ + assert(offset % alignment == 0); + assert(offset < mem->bo->size); + + vma = device->ws->ops->heap_alloc(device->heaps.general_heap, + virt_size, + alignment); + if (!vma) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + dev_addr = device->ws->ops->vma_map(vma, mem->bo, offset, size); + if (!dev_addr.addr) { + device->ws->ops->heap_free(vma); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + *dev_addr_out = dev_addr; + *vma_out = vma; + + return VK_SUCCESS; +} + +void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma) +{ + device->ws->ops->vma_unmap(vma); + device->ws->ops->heap_free(vma); +} + +VkResult pvr_BindBufferMemory2(VkDevice _device, + uint32_t bindInfoCount, + const VkBindBufferMemoryInfo *pBindInfos) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + uint32_t i; + + for (i = 0; i < bindInfoCount; i++) { + PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory); + PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer); + + VkResult result = pvr_bind_memory(device, + mem, + pBindInfos[i].memoryOffset, + buffer->size, + buffer->alignment, + &buffer->vma, + &buffer->dev_addr); + if (result != VK_SUCCESS) { + while (i--) { + PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer); + pvr_unbind_memory(device, buffer->vma); + } + + return result; + } + } + + return VK_SUCCESS; +} + +VkResult pvr_QueueBindSparse(VkQueue _queue, + uint32_t bindInfoCount, + const VkBindSparseInfo *pBindInfo, + VkFence fence) +{ + return VK_SUCCESS; +} + +/* Event functions. */ + +VkResult pvr_CreateEvent(VkDevice _device, + const VkEventCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkEvent *pEvent) +{ + assert(!"Unimplemented"); + return VK_SUCCESS; +} + +void pvr_DestroyEvent(VkDevice _device, + VkEvent _event, + const VkAllocationCallbacks *pAllocator) +{ + assert(!"Unimplemented"); +} + +VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event) +{ + assert(!"Unimplemented"); + return VK_SUCCESS; +} + +VkResult pvr_SetEvent(VkDevice _device, VkEvent _event) +{ + assert(!"Unimplemented"); + return VK_SUCCESS; +} + +VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event) +{ + assert(!"Unimplemented"); + return VK_SUCCESS; +} + +/* Buffer functions. */ + +VkResult pvr_CreateBuffer(VkDevice _device, + const VkBufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBuffer *pBuffer) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + const uint32_t alignment = 4096; + struct pvr_buffer *buffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + assert(pCreateInfo->usage != 0); + + /* We check against (ULONG_MAX - alignment) to prevent overflow issues */ + if (pCreateInfo->size >= ULONG_MAX - alignment) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + buffer = vk_object_zalloc(&device->vk, + pAllocator, + sizeof(*buffer), + VK_OBJECT_TYPE_BUFFER); + if (!buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + buffer->size = pCreateInfo->size; + buffer->alignment = alignment; + + *pBuffer = pvr_buffer_to_handle(buffer); + + return VK_SUCCESS; +} + +void pvr_DestroyBuffer(VkDevice _device, + VkBuffer _buffer, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer); + + if (!buffer) + return; + + pvr_unbind_memory(device, buffer->vma); + vk_object_free(&device->vk, pAllocator, buffer); +} + +void pvr_DestroySampler(VkDevice _device, + VkSampler _sampler, + const VkAllocationCallbacks *pAllocator) +{ + assert(!"Unimplemented"); +} + +VkResult pvr_gpu_upload(struct pvr_device *device, + struct pvr_winsys_heap *heap, + const void *data, + size_t size, + uint64_t alignment, + struct pvr_bo **const pvr_bo_out) +{ + struct pvr_bo *pvr_bo = NULL; + VkResult result; + + assert(size > 0); + + result = pvr_bo_alloc(device, + heap, + size, + alignment, + PVR_BO_ALLOC_FLAG_CPU_MAPPED, + &pvr_bo); + if (result != VK_SUCCESS) + return result; + + memcpy(pvr_bo->bo->map, data, size); + pvr_bo_cpu_unmap(device, pvr_bo); + + *pvr_bo_out = pvr_bo; + + return VK_SUCCESS; +} + +VkResult pvr_gpu_upload_usc(struct pvr_device *device, + const void *code, + size_t code_size, + uint64_t code_alignment, + struct pvr_bo **const pvr_bo_out) +{ + struct pvr_bo *pvr_bo = NULL; + VkResult result; + + assert(code_size > 0); + + /* The USC will prefetch the next instruction, so over allocate by 1 + * instruction to prevent reading off the end of a page into a potentially + * unallocated page. + */ + result = pvr_bo_alloc(device, + device->heaps.usc_heap, + code_size + ROGUE_MAX_INSTR_BYTES, + code_alignment, + PVR_BO_ALLOC_FLAG_CPU_MAPPED, + &pvr_bo); + if (result != VK_SUCCESS) + return result; + + memcpy(pvr_bo->bo->map, code, code_size); + pvr_bo_cpu_unmap(device, pvr_bo); + + *pvr_bo_out = pvr_bo; + + return VK_SUCCESS; +} + +/** + * \brief Upload PDS program data and code segments from host memory to device + * memory. + * + * \param[in] device Logical device pointer. + * \param[in] data Pointer to PDS data segment to upload. + * \param[in] data_size_dwords Size of PDS data segment in dwords. + * \param[in] data_alignment Required alignment of the PDS data segment in + * bytes. Must be a power of two. + * \param[in] code Pointer to PDS code segment to upload. + * \param[in] code_size_dwords Size of PDS code segment in dwords. + * \param[in] code_alignment Required alignment of the PDS code segment in + * bytes. Must be a power of two. + * \param[in] min_alignment Minimum alignment of the bo holding the PDS + * program in bytes. + * \param[out] pds_upload_out On success will be initialized based on the + * uploaded PDS program. + * \return VK_SUCCESS on success, or error code otherwise. + */ +VkResult pvr_gpu_upload_pds(struct pvr_device *device, + const uint32_t *data, + uint32_t data_size_dwords, + uint32_t data_alignment, + const uint32_t *code, + uint32_t code_size_dwords, + uint32_t code_alignment, + uint64_t min_alignment, + struct pvr_pds_upload *const pds_upload_out) +{ + /* All alignment and sizes below are in bytes. */ + const size_t data_size = data_size_dwords * sizeof(*data); + const size_t code_size = code_size_dwords * sizeof(*code); + const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment); + const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment); + const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment); + const uint64_t bo_alignment = MAX2(min_alignment, data_alignment); + const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size) + : data_aligned_size; + const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_CPU_MAPPED | + PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC; + VkResult result; + + assert(code || data); + assert(!code || (code_size_dwords != 0 && code_alignment != 0)); + assert(!data || (data_size_dwords != 0 && data_alignment != 0)); + + result = pvr_bo_alloc(device, + device->heaps.pds_heap, + bo_size, + bo_alignment, + bo_flags, + &pds_upload_out->pvr_bo); + if (result != VK_SUCCESS) + return result; + + if (data) { + memcpy(pds_upload_out->pvr_bo->bo->map, data, data_size); + + pds_upload_out->data_offset = pds_upload_out->pvr_bo->vma->dev_addr.addr - + device->heaps.pds_heap->base_addr.addr; + + /* Store data size in dwords. */ + assert(data_aligned_size % 4 == 0); + pds_upload_out->data_size = data_aligned_size / 4; + } else { + pds_upload_out->data_offset = 0; + pds_upload_out->data_size = 0; + } + + if (code) { + memcpy((uint8_t *)pds_upload_out->pvr_bo->bo->map + code_offset, + code, + code_size); + + pds_upload_out->code_offset = + (pds_upload_out->pvr_bo->vma->dev_addr.addr + code_offset) - + device->heaps.pds_heap->base_addr.addr; + + /* Store code size in dwords. */ + assert(code_aligned_size % 4 == 0); + pds_upload_out->code_size = code_aligned_size / 4; + } else { + pds_upload_out->code_offset = 0; + pds_upload_out->code_size = 0; + } + + pvr_bo_cpu_unmap(device, pds_upload_out->pvr_bo); + + return VK_SUCCESS; +} + +static VkResult +pvr_framebuffer_create_ppp_state(struct pvr_device *device, + struct pvr_framebuffer *framebuffer) +{ + const uint32_t cache_line_size = + rogue_get_slc_cache_line_size(&device->pdevice->dev_info); + uint32_t ppp_state[3]; + VkResult result; + + pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) { + header.pres_terminate = true; + } + + pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) { + term0.clip_right = + DIV_ROUND_UP( + framebuffer->width, + PVRX(TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS)) - + 1; + term0.clip_bottom = + DIV_ROUND_UP( + framebuffer->height, + PVRX(TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS)) - + 1; + } + + pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) { + term1.render_target = 0; + term1.clip_left = 0; + } + + result = pvr_gpu_upload(device, + device->heaps.general_heap, + ppp_state, + sizeof(ppp_state), + cache_line_size, + &framebuffer->ppp_state_bo); + if (result != VK_SUCCESS) + return result; + + /* Calculate the size of PPP state in dwords. */ + framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t); + + return VK_SUCCESS; +} + +static bool pvr_render_targets_init(struct pvr_render_target *render_targets, + uint32_t render_targets_count) +{ + uint32_t i; + + for (i = 0; i < render_targets_count; i++) { + if (pthread_mutex_init(&render_targets[i].mutex, NULL)) + goto err_mutex_destroy; + } + + return true; + +err_mutex_destroy: + while (i--) + pthread_mutex_destroy(&render_targets[i].mutex); + + return false; +} + +static void pvr_render_targets_fini(struct pvr_render_target *render_targets, + uint32_t render_targets_count) +{ + for (uint32_t i = 0; i < render_targets_count; i++) { + if (render_targets[i].valid) { + pvr_render_target_dataset_destroy(render_targets[i].rt_dataset); + render_targets[i].valid = false; + } + + pthread_mutex_destroy(&render_targets[i].mutex); + } +} + +VkResult pvr_CreateFramebuffer(VkDevice _device, + const VkFramebufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFramebuffer *pFramebuffer) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_render_target *render_targets; + struct pvr_framebuffer *framebuffer; + struct pvr_image_view **attachments; + uint32_t render_targets_count; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + render_targets_count = + PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info); + + VK_MULTIALLOC(ma); + vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1); + vk_multialloc_add(&ma, + &attachments, + __typeof__(*attachments), + pCreateInfo->attachmentCount); + vk_multialloc_add(&ma, + &render_targets, + __typeof__(*render_targets), + render_targets_count); + + if (!vk_multialloc_zalloc2(&ma, + &device->vk.alloc, + pAllocator, + VK_OBJECT_TYPE_FRAMEBUFFER)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, + &framebuffer->base, + VK_OBJECT_TYPE_FRAMEBUFFER); + + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + + framebuffer->attachments = attachments; + framebuffer->attachment_count = pCreateInfo->attachmentCount; + for (uint32_t i = 0; i < framebuffer->attachment_count; i++) { + framebuffer->attachments[i] = + pvr_image_view_from_handle(pCreateInfo->pAttachments[i]); + } + + result = pvr_framebuffer_create_ppp_state(device, framebuffer); + if (result != VK_SUCCESS) + goto err_free_framebuffer; + + framebuffer->render_targets = render_targets; + framebuffer->render_targets_count = render_targets_count; + if (!pvr_render_targets_init(framebuffer->render_targets, + render_targets_count)) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_free_ppp_state_bo; + } + + *pFramebuffer = pvr_framebuffer_to_handle(framebuffer); + + return VK_SUCCESS; + +err_free_ppp_state_bo: + pvr_bo_free(device, framebuffer->ppp_state_bo); + +err_free_framebuffer: + vk_object_base_finish(&framebuffer->base); + vk_free2(&device->vk.alloc, pAllocator, framebuffer); + + return result; +} + +void pvr_DestroyFramebuffer(VkDevice _device, + VkFramebuffer _fb, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb); + + if (!framebuffer) + return; + + pvr_render_targets_fini(framebuffer->render_targets, + framebuffer->render_targets_count); + pvr_bo_free(device, framebuffer->ppp_state_bo); + vk_object_base_finish(&framebuffer->base); + vk_free2(&device->vk.alloc, pAllocator, framebuffer); +} + +PUBLIC VKAPI_ATTR VkResult VKAPI_CALL +vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) +{ + /* For the full details on loader interface versioning, see + * . + * What follows is a condensed summary, to help you navigate the large and + * confusing official doc. + * + * - Loader interface v0 is incompatible with later versions. We don't + * support it. + * + * - In loader interface v1: + * - The first ICD entrypoint called by the loader is + * vk_icdGetInstanceProcAddr(). The ICD must statically expose this + * entrypoint. + * - The ICD must statically expose no other Vulkan symbol unless it + * is linked with -Bsymbolic. + * - Each dispatchable Vulkan handle created by the ICD must be + * a pointer to a struct whose first member is VK_LOADER_DATA. The + * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. + * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and + * vkDestroySurfaceKHR(). The ICD must be capable of working with + * such loader-managed surfaces. + * + * - Loader interface v2 differs from v1 in: + * - The first ICD entrypoint called by the loader is + * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must + * statically expose this entrypoint. + * + * - Loader interface v3 differs from v2 in: + * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), + * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, + * because the loader no longer does so. + * + * - Loader interface v4 differs from v3 in: + * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr(). + */ + *pSupportedVersion = MIN2(*pSupportedVersion, 4u); + return VK_SUCCESS; +} + +VkResult pvr_CreateSampler(VkDevice _device, + const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSampler *pSampler) +{ + assert(!"Unimplemented"); + return VK_SUCCESS; +} + +void pvr_GetBufferMemoryRequirements2( + VkDevice _device, + const VkBufferMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer); + PVR_FROM_HANDLE(pvr_device, device, _device); + + /* The Vulkan 1.0.166 spec says: + * + * memoryTypeBits is a bitmask and contains one bit set for every + * supported memory type for the resource. Bit 'i' is set if and only + * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties + * structure for the physical device is supported for the resource. + * + * All types are currently supported for buffers. + */ + pMemoryRequirements->memoryRequirements.memoryTypeBits = + (1ul << device->pdevice->memory.memoryTypeCount) - 1; + + pMemoryRequirements->memoryRequirements.alignment = buffer->alignment; + pMemoryRequirements->memoryRequirements.size = + ALIGN_POT(buffer->size, buffer->alignment); +} + +void pvr_GetDeviceQueue(VkDevice _device, + uint32_t queueFamilyIndex, + uint32_t queueIndex, + VkQueue *pQueue) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + + assert(queueFamilyIndex == 0); + + *pQueue = pvr_queue_to_handle(&device->queues[queueIndex]); +} + +void pvr_GetImageMemoryRequirements2(VkDevice _device, + const VkImageMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_image, image, pInfo->image); + + /* The Vulkan 1.0.166 spec says: + * + * memoryTypeBits is a bitmask and contains one bit set for every + * supported memory type for the resource. Bit 'i' is set if and only + * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties + * structure for the physical device is supported for the resource. + * + * All types are currently supported for images. + */ + const uint32_t memory_types = + (1ul << device->pdevice->memory.memoryTypeCount) - 1; + + /* TODO: The returned size is aligned here in case of arrays/CEM (as is done + * in GetImageMemoryRequirements()), but this should be known at image + * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in + * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to + * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually + * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1? + * + * Note: Presumably the 4096 alignment requirement comes from the Vulkan + * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up + * render and compute jobs. + */ + pMemoryRequirements->memoryRequirements.alignment = image->alignment; + pMemoryRequirements->memoryRequirements.size = + ALIGN(image->size, image->alignment); + pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types; +} diff --git a/src/imagination/vulkan/pvr_formats.c b/src/imagination/vulkan/pvr_formats.c new file mode 100644 index 00000000000..33a0a348385 --- /dev/null +++ b/src/imagination/vulkan/pvr_formats.c @@ -0,0 +1,348 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "pvr_formats.h" +#include "pvr_private.h" +#include "vk_format.h" +#include "vk_log.h" +#include "vk_util.h" + +#define FORMAT(vk, tex_fmt, pack_mode) \ + [VK_FORMAT_##vk] = { \ + .vk_format = VK_FORMAT_##vk, \ + .tex_format = ROGUE_TEXSTATE_FORMAT_##tex_fmt, \ + .pbe_packmode = ROGUE_PBESTATE_PACKMODE_##pack_mode, \ + .supported = true, \ + } + +struct pvr_format { + VkFormat vk_format; + uint32_t tex_format; + uint32_t pbe_packmode; + bool supported; +}; + +/* TODO: add all supported core formats */ +static const struct pvr_format pvr_format_table[] = { + FORMAT(B8G8R8A8_UNORM, U8U8U8U8, U8U8U8U8), + FORMAT(D32_SFLOAT, F32, F32), +}; + +#undef FORMAT + +static inline const struct pvr_format *pvr_get_format(VkFormat vk_format) +{ + if (vk_format < ARRAY_SIZE(pvr_format_table) && + pvr_format_table[vk_format].supported) { + return &pvr_format_table[vk_format]; + } + + return NULL; +} + +uint32_t pvr_get_tex_format(VkFormat vk_format) +{ + const struct pvr_format *pvr_format = pvr_get_format(vk_format); + if (pvr_format) { + return pvr_format->tex_format; + } + + return ROGUE_TEXSTATE_FORMAT_INVALID; +} + +uint32_t pvr_get_pbe_packmode(VkFormat vk_format) +{ + const struct pvr_format *pvr_format = pvr_get_format(vk_format); + if (pvr_format) + return pvr_format->pbe_packmode; + + return ROGUE_PBESTATE_PACKMODE_INVALID; +} + +static VkFormatFeatureFlags +pvr_get_image_format_features(const struct pvr_format *pvr_format, + VkImageTiling vk_tiling) +{ + VkFormatFeatureFlags flags = 0; + VkImageAspectFlags aspects; + + if (!pvr_format) + return 0; + + aspects = vk_format_aspects(pvr_format->vk_format); + if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + flags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_BLIT_SRC_BIT; + } + + return flags; +} + +const uint8_t *pvr_get_format_swizzle(VkFormat vk_format) +{ + const struct util_format_description *vf = vk_format_description(vk_format); + static const uint8_t fallback[] = { PIPE_SWIZZLE_X, + PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_Z, + PIPE_SWIZZLE_W }; + + if (vf) + return vf->swizzle; + + assert(!"Unsupported format"); + return fallback; +} + +static VkFormatFeatureFlags +pvr_get_buffer_format_features(const struct pvr_format *pvr_format) +{ + VkFormatFeatureFlags flags = 0; + + if (!pvr_format) + return 0; + + return flags; +} + +void pvr_GetPhysicalDeviceFormatProperties2( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties2 *pFormatProperties) +{ + const struct pvr_format *pvr_format = pvr_get_format(format); + + pFormatProperties->formatProperties = (VkFormatProperties){ + .linearTilingFeatures = + pvr_get_image_format_features(pvr_format, VK_IMAGE_TILING_LINEAR), + .optimalTilingFeatures = + pvr_get_image_format_features(pvr_format, VK_IMAGE_TILING_OPTIMAL), + .bufferFeatures = pvr_get_buffer_format_features(pvr_format), + }; + + vk_foreach_struct (ext, pFormatProperties->pNext) { + pvr_debug_ignored_stype(ext->sType); + } +} + +static VkResult +pvr_get_image_format_properties(struct pvr_physical_device *pdevice, + const VkPhysicalDeviceImageFormatInfo2 *info, + VkImageFormatProperties *pImageFormatProperties) +{ + assert(!"Unimplemented"); + return VK_SUCCESS; +} + +VkResult pvr_GetPhysicalDeviceImageFormatProperties2( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo, + VkImageFormatProperties2 *pImageFormatProperties) +{ + const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL; + PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); + VkExternalImageFormatProperties *external_props = NULL; + VkResult result; + + result = pvr_get_image_format_properties( + pdevice, + pImageFormatInfo, + &pImageFormatProperties->imageFormatProperties); + if (result != VK_SUCCESS) + return result; + + /* Extract input structs */ + vk_foreach_struct_const (ext, pImageFormatInfo->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: + external_info = (const void *)ext; + break; + default: + pvr_debug_ignored_stype(ext->sType); + break; + } + } + + /* Extract output structs */ + vk_foreach_struct (ext, pImageFormatProperties->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: + external_props = (void *)ext; + break; + default: + pvr_debug_ignored_stype(ext->sType); + break; + } + } + + /* From the Vulkan 1.0.42 spec: + * + * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will + * behave as if VkPhysicalDeviceExternalImageFormatInfo was not + * present and VkExternalImageFormatProperties will be ignored. + */ + if (external_info && external_info->handleType != 0) { + switch (external_info->handleType) { + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: + if (!external_props) + break; + + external_props->externalMemoryProperties.externalMemoryFeatures = + VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + external_props->externalMemoryProperties.compatibleHandleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + external_props->externalMemoryProperties.exportFromImportedHandleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + break; + default: + return vk_error(pdevice, VK_ERROR_FORMAT_NOT_SUPPORTED); + } + } + + return VK_SUCCESS; +} + +void pvr_GetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + uint32_t samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t *pNumProperties, + VkSparseImageFormatProperties *pProperties) +{ + /* Sparse images are not yet supported. */ + *pNumProperties = 0; +} + +void pvr_GetPhysicalDeviceSparseImageFormatProperties2( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, + uint32_t *pPropertyCount, + VkSparseImageFormatProperties2 *pProperties) +{ + /* Sparse images are not yet supported. */ + *pPropertyCount = 0; +} + +void pvr_GetPhysicalDeviceExternalBufferProperties( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, + VkExternalBufferProperties *pExternalBufferProperties) +{ + /* The Vulkan 1.0.42 spec says "handleType must be a valid + * VkExternalMemoryHandleTypeFlagBits value" in + * VkPhysicalDeviceExternalBufferInfo. This differs from + * VkPhysicalDeviceExternalImageFormatInfo, which surprisingly permits + * handleType == 0. + */ + assert(pExternalBufferInfo->handleType != 0); + + /* All of the current flags are for sparse which we don't support. */ + if (pExternalBufferInfo->flags) + goto unsupported; + + switch (pExternalBufferInfo->handleType) { + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: + /* clang-format off */ + pExternalBufferProperties->externalMemoryProperties.externalMemoryFeatures = + VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + pExternalBufferProperties->externalMemoryProperties.exportFromImportedHandleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + pExternalBufferProperties->externalMemoryProperties.compatibleHandleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + /* clang-format on */ + return; + default: + break; + } + +unsupported: + /* From the Vulkan 1.1.113 spec: + * + * compatibleHandleTypes must include at least handleType. + */ + pExternalBufferProperties->externalMemoryProperties = + (VkExternalMemoryProperties){ + .compatibleHandleTypes = pExternalBufferInfo->handleType, + }; +} + +bool pvr_format_is_pbe_downscalable(VkFormat vk_format) +{ + if (vk_format_is_pure_integer(vk_format)) { + /* PBE downscale behavior for integer formats does not match Vulkan + * spec. Vulkan requires a single sample to be chosen instead of + * taking the average sample color. + */ + return false; + } + + switch (pvr_get_pbe_packmode(vk_format)) { + default: + return true; + + case ROGUE_PBESTATE_PACKMODE_U16U16U16U16: + case ROGUE_PBESTATE_PACKMODE_S16S16S16S16: + case ROGUE_PBESTATE_PACKMODE_U32U32U32U32: + case ROGUE_PBESTATE_PACKMODE_S32S32S32S32: + case ROGUE_PBESTATE_PACKMODE_F32F32F32F32: + case ROGUE_PBESTATE_PACKMODE_U16U16U16: + case ROGUE_PBESTATE_PACKMODE_S16S16S16: + case ROGUE_PBESTATE_PACKMODE_U32U32U32: + case ROGUE_PBESTATE_PACKMODE_S32S32S32: + case ROGUE_PBESTATE_PACKMODE_F32F32F32: + case ROGUE_PBESTATE_PACKMODE_U16U16: + case ROGUE_PBESTATE_PACKMODE_S16S16: + case ROGUE_PBESTATE_PACKMODE_U32U32: + case ROGUE_PBESTATE_PACKMODE_S32S32: + case ROGUE_PBESTATE_PACKMODE_F32F32: + case ROGUE_PBESTATE_PACKMODE_U24ST8: + case ROGUE_PBESTATE_PACKMODE_ST8U24: + case ROGUE_PBESTATE_PACKMODE_U16: + case ROGUE_PBESTATE_PACKMODE_S16: + case ROGUE_PBESTATE_PACKMODE_U32: + case ROGUE_PBESTATE_PACKMODE_S32: + case ROGUE_PBESTATE_PACKMODE_F32: + case ROGUE_PBESTATE_PACKMODE_X24U8F32: + case ROGUE_PBESTATE_PACKMODE_X24X8F32: + case ROGUE_PBESTATE_PACKMODE_X24G8X32: + case ROGUE_PBESTATE_PACKMODE_X8U24: + case ROGUE_PBESTATE_PACKMODE_U8X24: + case ROGUE_PBESTATE_PACKMODE_PBYTE: + case ROGUE_PBESTATE_PACKMODE_PWORD: + case ROGUE_PBESTATE_PACKMODE_INVALID: + return false; + } +} diff --git a/src/imagination/vulkan/pvr_formats.h b/src/imagination/vulkan/pvr_formats.h new file mode 100644 index 00000000000..c038be7744b --- /dev/null +++ b/src/imagination/vulkan/pvr_formats.h @@ -0,0 +1,36 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_FORMATS_H +#define PVR_FORMATS_H + +#include +#include +#include + +const uint8_t *pvr_get_format_swizzle(VkFormat vk_format); +uint32_t pvr_get_tex_format(VkFormat vk_format); +uint32_t pvr_get_pbe_packmode(VkFormat vk_format); +bool pvr_format_is_pbe_downscalable(VkFormat vk_format); + +#endif /* PVR_FORMATS_H */ diff --git a/src/imagination/vulkan/pvr_hw_pass.c b/src/imagination/vulkan/pvr_hw_pass.c new file mode 100644 index 00000000000..653f927980f --- /dev/null +++ b/src/imagination/vulkan/pvr_hw_pass.c @@ -0,0 +1,122 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "pvr_hw_pass.h" +#include "pvr_private.h" +#include "vk_alloc.h" + +void pvr_destroy_renderpass_hwsetup(struct pvr_device *device, + struct pvr_renderpass_hwsetup *hw_setup) +{ + vk_free(&device->vk.alloc, hw_setup); +} + +struct pvr_renderpass_hwsetup * +pvr_create_renderpass_hwsetup(struct pvr_device *device, + struct pvr_render_pass *pass, + bool disable_merge) +{ + struct pvr_renderpass_hwsetup_eot_surface *eot_surface; + enum pvr_renderpass_surface_initop *color_initops; + struct pvr_renderpass_hwsetup_subpass *subpasses; + struct pvr_renderpass_hwsetup_render *renders; + struct pvr_renderpass_colorinit *color_inits; + struct pvr_renderpass_hwsetup *hw_setup; + struct pvr_renderpass_hw_map *subpass_map; + struct usc_mrt_resource *mrt_resources; + + VK_MULTIALLOC(ma); + vk_multialloc_add(&ma, &hw_setup, __typeof__(*hw_setup), 1); + vk_multialloc_add(&ma, &renders, __typeof__(*renders), 1); + vk_multialloc_add(&ma, &color_inits, __typeof__(*color_inits), 1); + vk_multialloc_add(&ma, &subpass_map, __typeof__(*subpass_map), 1); + vk_multialloc_add(&ma, &mrt_resources, __typeof__(*mrt_resources), 2); + vk_multialloc_add(&ma, &subpasses, __typeof__(*subpasses), 1); + vk_multialloc_add(&ma, &eot_surface, __typeof__(*eot_surface), 1); + vk_multialloc_add(&ma, + &color_initops, + __typeof__(*color_initops), + pass->subpasses[0].color_count); + /* Note, no more multialloc slots available (maximum supported is 8). */ + + if (!vk_multialloc_zalloc(&ma, + &device->vk.alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) { + return NULL; + } + + /* FIXME: Remove hardcoding of hw_setup structure. */ + subpasses[0].z_replicate = -1; + subpasses[0].depth_initop = RENDERPASS_SURFACE_INITOP_CLEAR; + subpasses[0].stencil_clear = false; + subpasses[0].driver_id = 0; + color_initops[0] = RENDERPASS_SURFACE_INITOP_NOP; + subpasses[0].color_initops = color_initops; + subpasses[0].client_data = NULL; + renders[0].subpass_count = 1; + renders[0].subpasses = subpasses; + + renders[0].sample_count = 1; + renders[0].ds_surface_id = 1; + renders[0].depth_init = RENDERPASS_SURFACE_INITOP_CLEAR; + renders[0].stencil_init = RENDERPASS_SURFACE_INITOP_NOP; + + mrt_resources[0].type = USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER; + mrt_resources[0].u.reg.out_reg = 0; + mrt_resources[0].u.reg.offset = 0; + renders[0].init_setup.render_targets_count = 1; + renders[0].init_setup.mrt_resources = &mrt_resources[0]; + + color_inits[0].op = RENDERPASS_SURFACE_INITOP_CLEAR; + color_inits[0].driver_id = 0; + renders[0].color_init_count = 1; + renders[0].color_init = color_inits; + + mrt_resources[1].type = USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER; + mrt_resources[1].u.reg.out_reg = 0; + mrt_resources[1].u.reg.offset = 0; + renders[0].eot_setup.render_targets_count = 1; + renders[0].eot_setup.mrt_resources = &mrt_resources[1]; + + eot_surface->mrt_index = 0; + eot_surface->attachment_index = 0; + eot_surface->need_resolve = false; + eot_surface->resolve_type = PVR_RESOLVE_TYPE_INVALID; + eot_surface->src_attachment_index = 0; + renders[0].eot_surfaces = eot_surface; + renders[0].eot_surface_count = 1; + + renders[0].output_regs_count = 1; + renders[0].tile_buffers_count = 0; + renders[0].client_data = NULL; + hw_setup->render_count = 1; + hw_setup->renders = renders; + + subpass_map->render = 0; + subpass_map->subpass = 0; + hw_setup->subpass_map = subpass_map; + + return hw_setup; +} diff --git a/src/imagination/vulkan/pvr_hw_pass.h b/src/imagination/vulkan/pvr_hw_pass.h new file mode 100644 index 00000000000..520ea7900d9 --- /dev/null +++ b/src/imagination/vulkan/pvr_hw_pass.h @@ -0,0 +1,223 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_HW_PASS_H +#define PVR_HW_PASS_H + +#include +#include + +struct pvr_device; +struct pvr_render_pass; + +enum pvr_renderpass_surface_initop { + RENDERPASS_SURFACE_INITOP_CLEAR, + RENDERPASS_SURFACE_INITOP_LOAD, + RENDERPASS_SURFACE_INITOP_NOP, +}; + +struct pvr_renderpass_hwsetup_subpass { + /* If >=0 then copy the depth into this pixel output for all fragment + * programs in the subpass. + */ + int32_t z_replicate; + + /* The operation to perform on the depth at the start of the subpass. Loads + * are deferred to subpasses when depth has been replicated + */ + enum pvr_renderpass_surface_initop depth_initop; + + /* If true then clear the stencil at the start of the subpass. */ + bool stencil_clear; + + /* Driver Id from the input pvr_render_subpass structure. */ + uint32_t driver_id; + + /* For each color attachment to the subpass: the operation to perform at + * the start of the subpass. + */ + enum pvr_renderpass_surface_initop *color_initops; + + void *client_data; +}; + +struct pvr_renderpass_colorinit { + /* Source surface for the operation. */ + uint32_t driver_id; + + /* Type of operation: either clear or load. */ + enum pvr_renderpass_surface_initop op; +}; + +/* FIXME: Adding these USC enums and structures here for now to avoid adding + * usc.h header. Needs to be moved to compiler specific header. + */ +/* Specifies the location of render target writes. */ +enum usc_mrt_resource_type { + USC_MRT_RESOURCE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */ + USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER, + USC_MRT_RESOURCE_TYPE_MEMORY, +}; + +struct usc_mrt_resource { + /* Resource type allocated for render target. */ + enum usc_mrt_resource_type type; + + union { + /* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER. */ + struct { + /* The output register to use. */ + uint32_t out_reg; + + /* The offset in bytes into the output register. */ + uint32_t offset; + } reg; + + /* If type == USC_MRT_RESOURCE_TYPE_MEMORY. */ + struct { + /* The number of the tile buffer to use. */ + uint32_t tile_buffer; + + /* The offset in dwords within the tile buffer. */ + uint32_t offset_in_dwords; + } mem; + } u; +}; + +struct usc_mrt_setup { + /* Number of render targets present. */ + uint32_t render_targets_count; + + /* Array of MRT resources allocated for each render target. The number of + * elements is determined by usc_mrt_setup::render_targets_count. + */ + struct usc_mrt_resource *mrt_resources; +}; + +enum pvr_resolve_type { + PVR_RESOLVE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */ + PVR_RESOLVE_TYPE_PBE, + PVR_RESOLVE_TYPE_TRANSFER, +}; + +struct pvr_renderpass_hwsetup_eot_surface { + /* MRT index to store from. Also used to index into + * usc_mrt_setup::mrt_resources. + */ + uint32_t mrt_index; + + /* Index of pvr_render_pass_info::attachments to store into. */ + uint32_t attachment_index; + + /* True if the surface should be resolved. */ + bool need_resolve; + + /* How the surface should be resolved at the end of a render. Only valid if + * pvr_renderpass_hwsetup_eot_surface::need_resolve is set to true. + */ + enum pvr_resolve_type resolve_type; + + /* Index of pvr_render_pass_info::attachments to resolve from. Only valid if + * pvr_renderpass_hwsetup_eot_surface::need_resolve is set to true. + */ + uint32_t src_attachment_index; +}; + +struct pvr_renderpass_hwsetup_render { + /* Number of pixel output registers to allocate for this render. */ + uint32_t output_regs_count; + + /* Number of tile buffers to allocate for this render. */ + uint32_t tile_buffers_count; + + /* Number of subpasses in this render. */ + uint32_t subpass_count; + + /* Description of each subpass. */ + struct pvr_renderpass_hwsetup_subpass *subpasses; + + /* The sample count of every color attachment (or depth attachment if + * z-only) in this render + */ + uint32_t sample_count; + + /* Driver Id for the surface to use for depth/stencil load/store in this + * render. + */ + int32_t ds_surface_id; + + /* Operation on the on-chip depth at the start of the render. + * Either load from 'ds_surface_id', clear using 'ds_surface_id' or leave + * uninitialized. + */ + enum pvr_renderpass_surface_initop depth_init; + + /* Operation on the on-chip stencil at the start of the render. */ + enum pvr_renderpass_surface_initop stencil_init; + + /* For each operation: the destination in the on-chip color storage. */ + struct usc_mrt_setup init_setup; + + /* Count of operations on on-chip color storage at the start of the render. + */ + uint32_t color_init_count; + + /* How to initialize render targets at the start of the render. */ + struct pvr_renderpass_colorinit *color_init; + + /* Describes the location of the source data for each stored surface. */ + struct usc_mrt_setup eot_setup; + + struct pvr_renderpass_hwsetup_eot_surface *eot_surfaces; + uint32_t eot_surface_count; + + void *client_data; +}; + +struct pvr_renderpass_hw_map { + uint32_t render; + uint32_t subpass; +}; + +struct pvr_renderpass_hwsetup { + /* Number of renders. */ + uint32_t render_count; + + /* Description of each render. */ + struct pvr_renderpass_hwsetup_render *renders; + + /* Maps indices from pvr_render_pass::subpasses to the + * pvr_renderpass_hwsetup_render/pvr_renderpass_hwsetup_subpass relative to + * that render where the subpass is scheduled. + */ + struct pvr_renderpass_hw_map *subpass_map; +}; + +struct pvr_renderpass_hwsetup * +pvr_create_renderpass_hwsetup(struct pvr_device *device, + struct pvr_render_pass *pass, + bool disable_merge); +void pvr_destroy_renderpass_hwsetup(struct pvr_device *device, + struct pvr_renderpass_hwsetup *hw_setup); + +#endif /* PVR_HW_PASS_H */ diff --git a/src/imagination/vulkan/pvr_image.c b/src/imagination/vulkan/pvr_image.c new file mode 100644 index 00000000000..4be62a1c9b1 --- /dev/null +++ b/src/imagination/vulkan/pvr_image.c @@ -0,0 +1,375 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "pvr_csb.h" +#include "pvr_device_info.h" +#include "pvr_formats.h" +#include "pvr_private.h" +#include "pvr_tex_state.h" +#include "util/macros.h" +#include "util/u_math.h" +#include "vk_format.h" +#include "vk_image.h" +#include "vk_log.h" +#include "vk_util.h" +#include "wsi_common.h" + +static void pvr_image_init_memlayout(struct pvr_image *image) +{ + switch (image->vk.tiling) { + default: + unreachable("bad VkImageTiling"); + case VK_IMAGE_TILING_OPTIMAL: + if (image->vk.wsi_legacy_scanout) + image->memlayout = PVR_MEMLAYOUT_LINEAR; + else if (image->vk.image_type == VK_IMAGE_TYPE_3D) + image->memlayout = PVR_MEMLAYOUT_3DTWIDDLED; + else + image->memlayout = PVR_MEMLAYOUT_TWIDDLED; + break; + case VK_IMAGE_TILING_LINEAR: + image->memlayout = PVR_MEMLAYOUT_LINEAR; + break; + } +} + +static void pvr_image_init_physical_extent(struct pvr_image *image) +{ + assert(image->memlayout != PVR_MEMLAYOUT_UNDEFINED); + + /* clang-format off */ + if (image->vk.mip_levels > 1 || + image->memlayout == PVR_MEMLAYOUT_TWIDDLED || + image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED) { + /* clang-format on */ + image->physical_extent.width = + util_next_power_of_two(image->vk.extent.width); + image->physical_extent.height = + util_next_power_of_two(image->vk.extent.height); + image->physical_extent.depth = + util_next_power_of_two(image->vk.extent.depth); + } else { + assert(image->memlayout == PVR_MEMLAYOUT_LINEAR); + image->physical_extent = image->vk.extent; + } +} + +static void pvr_image_setup_mip_levels(struct pvr_image *image) +{ + const uint32_t extent_alignment = + image->vk.image_type == VK_IMAGE_TYPE_3D ? 4 : 1; + const unsigned int cpp = vk_format_get_blocksize(image->vk.format); + + /* Mip-mapped textures that are non-dword aligned need dword-aligned levels + * so they can be TQd from. + */ + const uint32_t level_alignment = image->vk.mip_levels > 1 ? 4 : 1; + + assert(image->vk.mip_levels <= ARRAY_SIZE(image->mip_levels)); + + image->layer_size = 0; + + for (uint32_t i = 0; i < image->vk.mip_levels; i++) { + const uint32_t height = u_minify(image->physical_extent.height, i); + const uint32_t width = u_minify(image->physical_extent.width, i); + const uint32_t depth = u_minify(image->physical_extent.depth, i); + struct pvr_mip_level *mip_level = &image->mip_levels[i]; + + mip_level->pitch = cpp * ALIGN(width, extent_alignment); + mip_level->height_pitch = ALIGN(height, extent_alignment); + mip_level->size = image->vk.samples * mip_level->pitch * + mip_level->height_pitch * + ALIGN(depth, extent_alignment); + mip_level->size = ALIGN(mip_level->size, level_alignment); + mip_level->offset = image->layer_size; + + image->layer_size += mip_level->size; + } + + /* TODO: It might be useful to store the alignment in the image so it can be + * checked (via an assert?) when setting + * RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN, assuming this is where the + * requirement comes from. + */ + if (image->vk.array_layers > 1) + image->layer_size = ALIGN(image->layer_size, image->alignment); + + image->size = image->layer_size * image->vk.array_layers; +} + +VkResult pvr_CreateImage(VkDevice _device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_image *image; + + pvr_finishme("Review whether all inputs are handled\n"); + + image = + vk_image_create(&device->vk, pCreateInfo, pAllocator, sizeof(*image)); + if (!image) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* All images aligned to 4k, in case of arrays/CEM. + * Refer: pvr_GetImageMemoryRequirements for further details. + */ + image->alignment = 4096U; + + /* Initialize the image using the saved information from pCreateInfo */ + pvr_image_init_memlayout(image); + pvr_image_init_physical_extent(image); + pvr_image_setup_mip_levels(image); + + *pImage = pvr_image_to_handle(image); + + return VK_SUCCESS; +} + +void pvr_DestroyImage(VkDevice _device, + VkImage _image, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_image, image, _image); + + if (!image) + return; + + pvr_unbind_memory(device, image->vma); + vk_image_destroy(&device->vk, pAllocator, &image->vk); +} + +/* clang-format off */ +/* Consider a 4 page buffer object. + * _________________________________________ + * | | | | | + * |_________|__________|_________|__________| + * | + * \__ offset (0.5 page size) + * + * |___size(2 pages)____| + * + * |__VMA size required (3 pages)__| + * + * | + * \__ returned dev_addr = vma + offset % page_size + * + * VMA size = align(size + offset % page_size, page_size); + * + * Note: the above handling is currently divided between generic + * driver code and winsys layer. Given are the details of how this is + * being handled. + * * As winsys vma allocation interface does not have offset information, + * it can not calculate the extra size needed to adjust for the unaligned + * offset. So generic code is responsible for allocating a VMA that has + * extra space to deal with the above scenario. + * * Remaining work of mapping the vma to bo is done by vma_map interface, + * as it contains offset information, we don't need to do any adjustments + * in the generic code for this part. + * + * TODO: Look into merging heap_alloc and vma_map into single interface. + */ +/* clang-format on */ + +VkResult pvr_BindImageMemory2(VkDevice _device, + uint32_t bindInfoCount, + const VkBindImageMemoryInfo *pBindInfos) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + uint32_t i; + + for (i = 0; i < bindInfoCount; i++) { + PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory); + PVR_FROM_HANDLE(pvr_image, image, pBindInfos[i].image); + + VkResult result = pvr_bind_memory(device, + mem, + pBindInfos[i].memoryOffset, + image->size, + image->alignment, + &image->vma, + &image->dev_addr); + if (result != VK_SUCCESS) { + while (i--) { + PVR_FROM_HANDLE(pvr_image, image, pBindInfos[i].image); + + pvr_unbind_memory(device, image->vma); + } + + return result; + } + } + + return VK_SUCCESS; +} + +void pvr_GetImageSubresourceLayout(VkDevice device, + VkImage _image, + const VkImageSubresource *subresource, + VkSubresourceLayout *layout) +{ + PVR_FROM_HANDLE(pvr_image, image, _image); + const struct pvr_mip_level *mip_level = + &image->mip_levels[subresource->mipLevel]; + + pvr_assert(subresource->mipLevel < image->vk.mip_levels); + pvr_assert(subresource->arrayLayer < image->vk.array_layers); + + layout->offset = + subresource->arrayLayer * image->layer_size + mip_level->offset; + layout->rowPitch = mip_level->pitch; + layout->depthPitch = mip_level->pitch * mip_level->height_pitch; + layout->arrayPitch = image->layer_size; + layout->size = mip_level->size; +} + +VkResult pvr_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + PVR_FROM_HANDLE(pvr_image, image, pCreateInfo->image); + PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_texture_state_info info; + unsigned char input_swizzle[4]; + const uint8_t *format_swizzle; + struct pvr_image_view *iview; + VkResult result; + + iview = vk_image_view_create(&device->vk, + pCreateInfo, + pAllocator, + sizeof(*iview)); + if (!iview) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + iview->image = image; + + info.type = iview->vk.view_type; + info.base_level = iview->vk.base_mip_level; + info.mip_levels = iview->vk.level_count; + info.extent = image->vk.extent; + info.is_cube = (info.type == VK_IMAGE_VIEW_TYPE_CUBE || + info.type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY); + info.array_size = iview->vk.layer_count; + info.offset = iview->vk.base_array_layer * image->layer_size + + image->mip_levels[info.base_level].offset; + info.mipmaps_present = (image->vk.mip_levels > 1) ? true : false; + info.stride = image->physical_extent.width - 1; + info.tex_state_type = PVR_TEXTURE_STATE_SAMPLE; + info.mem_layout = image->memlayout; + info.flags = 0; + info.sample_count = image->vk.samples; + info.addr = image->dev_addr; + + /* TODO: if ERN_46863 is supported, Depth and stencil are sampled separately + * from images with combined depth+stencil. Add logic here to handle it. + */ + info.format = iview->vk.format; + + vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle, input_swizzle); + format_swizzle = pvr_get_format_swizzle(info.format); + util_format_compose_swizzles(format_swizzle, input_swizzle, info.swizzle); + + result = pvr_pack_tex_state(device, + &info, + iview->texture_state[info.tex_state_type]); + if (result != VK_SUCCESS) + goto err_vk_image_view_destroy; + + /* Create an additional texture state for cube type if storage + * usage flat is set. + */ + if (info.is_cube && image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) { + info.tex_state_type = PVR_TEXTURE_STATE_STORAGE; + result = pvr_pack_tex_state(device, + &info, + iview->texture_state[info.tex_state_type]); + if (result != VK_SUCCESS) + goto err_vk_image_view_destroy; + } + + /* Attachment state is created as if the mipmaps are not supported, so the + * baselevel is set to zero and num_mip_levels is set to 1. Which gives an + * impression that this is the only level in the image. This also requires + * that width, height and depth be adjusted as well. Given iview->vk.extent + * is already adjusted for base mip map level we use it here. + */ + /* TODO: Investigate and document the reason for above approach. */ + info.extent = iview->vk.extent; + + info.mip_levels = 1; + info.mipmaps_present = false; + info.stride = u_minify(image->physical_extent.width, info.base_level) - 1; + info.base_level = 0; + info.tex_state_type = PVR_TEXTURE_STATE_ATTACHMENT; + + result = pvr_pack_tex_state(device, + &info, + iview->texture_state[info.tex_state_type]); + if (result != VK_SUCCESS) + goto err_vk_image_view_destroy; + + *pView = pvr_image_view_to_handle(iview); + + return VK_SUCCESS; + +err_vk_image_view_destroy: + vk_image_view_destroy(&device->vk, pAllocator, &iview->vk); + + return result; +} + +void pvr_DestroyImageView(VkDevice _device, + VkImageView _iview, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_image_view, iview, _iview); + + if (!iview) + return; + + vk_image_view_destroy(&device->vk, pAllocator, &iview->vk); +} + +VkResult pvr_CreateBufferView(VkDevice _device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) +{ + assert(!"Unimplemented"); + return VK_SUCCESS; +} + +void pvr_DestroyBufferView(VkDevice _device, + VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator) +{ + assert(!"Unimplemented"); +} diff --git a/src/imagination/vulkan/pvr_job_common.c b/src/imagination/vulkan/pvr_job_common.c new file mode 100644 index 00000000000..747434e8c19 --- /dev/null +++ b/src/imagination/vulkan/pvr_job_common.c @@ -0,0 +1,487 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "hwdef/rogue_hw_defs.h" +#include "hwdef/rogue_hw_utils.h" +#include "pvr_device_info.h" +#include "pvr_job_common.h" +#include "pvr_private.h" +#include "util/macros.h" +#include "util/u_math.h" +#include "vk_alloc.h" +#include "vk_format.h" +#include "vk_object.h" + +/* clang-format off */ +static enum PVRX(PBESTATE_SWIZ) +pvr_get_pbe_hw_swizzle(VkComponentSwizzle comp, enum pipe_swizzle swz) +/* clang-format on */ +{ + switch (swz) { + case PIPE_SWIZZLE_0: + return ROGUE_PBESTATE_SWIZ_ZERO; + case PIPE_SWIZZLE_1: + return ROGUE_PBESTATE_SWIZ_ONE; + case PIPE_SWIZZLE_X: + return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0; + case PIPE_SWIZZLE_Y: + return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1; + case PIPE_SWIZZLE_Z: + return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2; + case PIPE_SWIZZLE_W: + return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3; + case PIPE_SWIZZLE_NONE: + if (comp == VK_COMPONENT_SWIZZLE_A) + return ROGUE_PBESTATE_SWIZ_ONE; + else + return ROGUE_PBESTATE_SWIZ_ZERO; + default: + unreachable("Unknown enum pipe_swizzle"); + }; +} + +void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format, + enum pvr_pbe_gamma default_gamma, + bool with_packed_usc_channel, + uint32_t *const src_format_out, + enum pvr_pbe_gamma *const gamma_out) +{ + uint32_t chan_0_width = vk_format_get_channel_width(vk_format, 0); + + *gamma_out = default_gamma; + + if (vk_format_has_32bit_component(vk_format) || + vk_format_is_pure_integer(vk_format)) { + *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); + } else if (vk_format_is_float(vk_format)) { + *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL); + } else if (vk_format_is_srgb(vk_format)) { + *gamma_out = PVR_PBE_GAMMA_ENABLED; + + /* F16 source for gamma'd formats. */ + *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL); + } else if (vk_format_has_depth(vk_format) && + vk_format_get_component_size_in_bits(vk_format, + UTIL_FORMAT_COLORSPACE_ZS, + 0) > 16) { + *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); + } else if (vk_format_has_stencil(vk_format) && + vk_format_get_component_size_in_bits(vk_format, + UTIL_FORMAT_COLORSPACE_ZS, + 1) > 0) { + *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); + } else if (chan_0_width > 16) { + *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); + } else if (chan_0_width > 8) { + *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL); + } else if (!with_packed_usc_channel) { + *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL); + } else { + *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); + } +} + +static void pvr_pbe_get_src_pos(struct pvr_device *device, + enum pvr_pbe_source_start_pos source_start, + uint32_t *const src_pos_out, + bool *const src_pos_offset_128_out) +{ + *src_pos_offset_128_out = false; + + switch (source_start) { + case PVR_PBE_STARTPOS_BIT32: + *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32); + break; + + case PVR_PBE_STARTPOS_BIT64: + *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64); + break; + + case PVR_PBE_STARTPOS_BIT96: + *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96); + break; + + case PVR_PBE_STARTPOS_BIT0: + default: + if (PVR_HAS_FEATURE(&device->pdevice->dev_info, eight_output_registers)) { + switch (source_start) { + case PVR_PBE_STARTPOS_BIT128: + *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0); + *src_pos_offset_128_out = true; + break; + + case PVR_PBE_STARTPOS_BIT160: + *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32); + *src_pos_offset_128_out = true; + break; + + case PVR_PBE_STARTPOS_BIT192: + *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64); + *src_pos_offset_128_out = true; + break; + + case PVR_PBE_STARTPOS_BIT224: + *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96); + *src_pos_offset_128_out = true; + break; + + default: + *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0); + break; + } + } else { + *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0); + } + break; + } +} + +void pvr_pbe_pack_state( + struct pvr_device *device, + const struct pvr_pbe_surf_params *surface_params, + const struct pvr_pbe_render_params *render_params, + uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS], + uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS]) +{ + /* This function needs updating if the value of + * ROGUE_NUM_PBESTATE_STATE_WORDS changes, so check that it's the expected + * value. + */ + STATIC_ASSERT(ROGUE_NUM_PBESTATE_STATE_WORDS == 2); + + /* This function needs updating if the value of ROGUE_NUM_PBESTATE_REG_WORDS + * changes, so check that it's the expected value. + */ + STATIC_ASSERT(ROGUE_NUM_PBESTATE_REG_WORDS == 3); + + pbe_reg_words[2] = 0; + + if (surface_params->z_only_render) { + pbe_cs_words[0] = 0; + + pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) { + state.emptytile = true; + } + + pbe_reg_words[0] = 0; + pbe_reg_words[1] = 0; + + return; + } + + pvr_csb_pack (&pbe_cs_words[0], PBESTATE_STATE_WORD0, state) { + state.address_low = surface_params->addr; + } + + pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) { + state.address_high = surface_params->addr; + + state.source_format = surface_params->source_format; + + pvr_pbe_get_src_pos(device, + render_params->source_start, + &state.source_pos, + &state.source_pos_offset_128); + + /* MRT index (Use 0 for a single render target)/ */ + state.mrt_index = render_params->mrt_index; + + /* Normalization flag based on output format. */ + state.norm = surface_params->is_normalized; + + state.packmode = surface_params->pbe_packmode; + } + + pvr_csb_pack (&pbe_reg_words[0], PBESTATE_REG_WORD0, reg) { + reg.tilerelative = true; + + switch (surface_params->mem_layout) { + case PVR_MEMLAYOUT_TWIDDLED: + reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_2D); + break; + + case PVR_MEMLAYOUT_3DTWIDDLED: + reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_3D); + break; + + case PVR_MEMLAYOUT_LINEAR: + default: + reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_LINEAR); + break; + } + + /* FIXME: Remove rotation and y_flip hardcoding if needed. */ + reg.rotation = PVRX(PBESTATE_ROTATION_TYPE_0_DEG); + reg.y_flip = false; + + /* Note: Due to gamma being overridden above, anything other than + * ENABLED/NONE is ignored. + */ + if (surface_params->gamma == PVR_PBE_GAMMA_ENABLED) { + reg.gamma = true; + + if (surface_params->nr_components == 2) + reg.twocomp_gamma = + PVRX(PBESTATE_TWOCOMP_GAMMA_GAMMA_BOTH_CHANNELS); + } + + reg.linestride = (surface_params->stride - 1) / + PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE); + reg.minclip_x = render_params->min_x_clip; + + reg.swiz_chan0 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_R, + surface_params->swizzle[0]); + reg.swiz_chan1 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_G, + surface_params->swizzle[1]); + reg.swiz_chan2 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_B, + surface_params->swizzle[2]); + reg.swiz_chan3 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_A, + surface_params->swizzle[3]); + + if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) + reg.size_z = util_logbase2_ceil(surface_params->depth); + + reg.downscale = surface_params->down_scale; + } + + pvr_csb_pack (&pbe_reg_words[1], PBESTATE_REG_WORD1, reg) { + if (surface_params->mem_layout == PVR_MEMLAYOUT_TWIDDLED || + surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) { + reg.size_x = util_logbase2_ceil(surface_params->width); + reg.size_y = util_logbase2_ceil(surface_params->height); + } + + reg.minclip_y = render_params->min_y_clip; + reg.maxclip_x = render_params->max_x_clip; + reg.zslice = render_params->slice; + reg.maxclip_y = render_params->max_y_clip; + } +} + +/* TODO: Split this into smaller functions to make it easier to follow. When + * doing this, it would be nice to have a function that returns + * total_tiles_in_flight so that CR_ISP_CTL can be fully packed in + * pvr_render_job_ws_fragment_state_init(). + */ +void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info, + uint32_t msaa_mode, + uint32_t pixel_width, + bool paired_tiles, + uint32_t max_tiles_in_flight, + uint32_t *const isp_ctl_out, + uint32_t *const pixel_ctl_out) +{ + uint32_t total_tiles_in_flight = 0; + uint32_t usable_partition_size; + uint32_t partitions_available; + uint32_t usc_min_output_regs; + uint32_t max_partitions; + uint32_t partition_size; + uint32_t max_phantoms; + uint32_t tile_size_x; + uint32_t tile_size_y; + uint32_t isp_samples; + + /* Round up the pixel width to the next allocation granularity. */ + usc_min_output_regs = + PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 0); + pixel_width = MAX2(pixel_width, usc_min_output_regs); + pixel_width = util_next_power_of_two(pixel_width); + + assert(pixel_width <= rogue_get_max_output_regs_per_pixel(dev_info)); + + partition_size = pixel_width; + + isp_samples = PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1); + if (isp_samples == 2) { + if (msaa_mode != PVRX(CR_ISP_AA_MODE_TYPE_AA_NONE)) + partition_size *= 2U; + } else if (isp_samples == 4) { + if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_4X) || + msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_8X)) + partition_size *= 4U; + else if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_2X)) + partition_size *= 2U; + } + + /* Cores with a tile size of 16x16 don't have quadrant affinity. Hence the + * partition size is the same as for a 32x32 tile quadrant (with no MSAA). + * When MSAA is enabled, the USC has to process half the tile (16x8 pixels). + */ + tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0); + tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0); + + /* We only support square tiles. */ + assert(tile_size_x == tile_size_y); + + if (tile_size_x == 16U) { + /* Cores with 16x16 tiles does not use tile quadrants. */ + partition_size *= tile_size_x * tile_size_y; + } else { + /* Size of a tile quadrant (in dwords). */ + partition_size *= (tile_size_x * tile_size_y / 4U); + } + + /* Maximum available partition space for partitions of this size. */ + max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0); + usable_partition_size = + MIN2(rogue_get_total_reserved_partition_size(dev_info), + partition_size * max_partitions); + + if (PVR_GET_FEATURE_VALUE(dev_info, common_store_size_in_dwords, 0) < + (1024 * 4 * 4)) { + /* Do not apply the limit for cores with 16x16 tile size (no quadrant + * affinity). */ + if (tile_size_x != 16) { + /* This is to counter the extremely limited CS size on some cores. + */ + /* Available partition space is limited to 8 tile quadrants. */ + usable_partition_size = + MIN2((tile_size_x * tile_size_y / 4U) * 8U, usable_partition_size); + } + } + + /* Ensure that maximum number of partitions in use is not greater + * than the total number of partitions available. + */ + partitions_available = + MIN2(max_partitions, usable_partition_size / partition_size); + + if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure)) + max_phantoms = rogue_get_num_phantoms(dev_info); + else if (PVR_HAS_FEATURE(dev_info, roguexe)) + max_phantoms = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0); + else + max_phantoms = 1; + + for (uint32_t i = 0; i < max_phantoms; i++) { + uint32_t usc_tiles_in_flight = partitions_available; + uint32_t isp_tiles_in_flight; + + /* Cores with tiles size other than 16x16 use tile quadrants. */ + if (tile_size_x != 16) { + uint32_t num_clusters = + PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0U); + usc_tiles_in_flight = + (usc_tiles_in_flight * MIN2(4U, num_clusters - (4U * i))) / 4U; + } + + assert(usc_tiles_in_flight > 0); + + isp_tiles_in_flight = + PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0); + /* Ensure that maximum number of ISP tiles in flight is not greater + * than the maximum number of USC tiles in flight. + */ + if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) || + PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) != + 2) { + isp_tiles_in_flight /= rogue_get_num_phantoms(dev_info); + } + + isp_tiles_in_flight = MIN2(usc_tiles_in_flight, isp_tiles_in_flight); + + /* Limit the number of tiles in flight if the shaders have + * requested a large allocation of local memory. + */ + if (max_tiles_in_flight > 0U) { + isp_tiles_in_flight = MIN2(usc_tiles_in_flight, max_tiles_in_flight); + + if (PVR_HAS_FEATURE(dev_info, roguexe)) { + if (tile_size_x == 16) { + /* The FW infers the tiles in flight value from the + * partitions setting. + */ + /* Partitions per tile. */ + partitions_available = isp_tiles_in_flight; + } else { + /* Partitions per tile quadrant. */ + partitions_available = isp_tiles_in_flight * 4U; + } + } + } + + /* Due to limitations of ISP_CTL_PIPE there can only be a difference of + * 1 between Phantoms. + */ + if (total_tiles_in_flight > (isp_tiles_in_flight + 1U)) + total_tiles_in_flight = isp_tiles_in_flight + 1U; + + total_tiles_in_flight += isp_tiles_in_flight; + } + + if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) && + PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) == + 2) { + /* Limit the ISP tiles in flight to fit into the available USC partition + * store. + */ + total_tiles_in_flight = MIN2(total_tiles_in_flight, partitions_available); + } + + if (PVR_HAS_FEATURE(dev_info, paired_tiles) && paired_tiles) { + total_tiles_in_flight = + MIN2(total_tiles_in_flight, partitions_available / 2); + } + + pvr_csb_pack (pixel_ctl_out, CR_USC_PIXEL_OUTPUT_CTRL, reg) { + if (pixel_width == 1 && usc_min_output_regs == 1) { + reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER); + } else if (pixel_width == 2) { + reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS); + } else if (pixel_width == 4) { + reg.width = PVRX(CR_PIXEL_WIDTH_4REGISTERS); + } else if (pixel_width == 8 && + PVR_HAS_FEATURE(dev_info, eight_output_registers)) { + reg.width = PVRX(CR_PIXEL_WIDTH_8REGISTERS); + } else if (usc_min_output_regs == 1) { + reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER); + } else { + reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS); + } + + if (PVR_HAS_FEATURE(dev_info, usc_pixel_partition_mask)) { + /* Setup the partition mask based on the maximum number of + * partitions available. + */ + reg.partition_mask = (1 << max_partitions) - 1; + } else { + reg.enable_4th_partition = true; + + /* Setup the partition mask based on the number of partitions + * available. + */ + reg.partition_mask = (1U << partitions_available) - 1U; + } + } + + pvr_csb_pack (isp_ctl_out, CR_ISP_CTL, reg) { + if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure)) + reg.pipe_enable = (2 * total_tiles_in_flight) - 1; + else + reg.pipe_enable = total_tiles_in_flight - 1; + } +} diff --git a/src/imagination/vulkan/pvr_job_common.h b/src/imagination/vulkan/pvr_job_common.h new file mode 100644 index 00000000000..f197002862d --- /dev/null +++ b/src/imagination/vulkan/pvr_job_common.h @@ -0,0 +1,151 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_JOB_COMMON_H +#define PVR_JOB_COMMON_H + +#include +#include +#include + +#include "hwdef/rogue_hw_defs.h" +#include "pvr_private.h" +#include "pvr_winsys.h" + +enum pvr_pbe_gamma { + PVR_PBE_GAMMA_NONE, + /* For two-channel pack formats. */ + PVR_PBE_GAMMA_RED, + PVR_PBE_GAMMA_REDGREEN, + /* For all other pack formats. */ + PVR_PBE_GAMMA_ENABLED, +}; + +enum pvr_pbe_source_start_pos { + PVR_PBE_STARTPOS_BIT0, + PVR_PBE_STARTPOS_BIT32, + PVR_PBE_STARTPOS_BIT64, + PVR_PBE_STARTPOS_BIT96, + /* The below ones are available if has_eight_output_registers feature is + * enabled. + */ + PVR_PBE_STARTPOS_BIT128, + PVR_PBE_STARTPOS_BIT160, + PVR_PBE_STARTPOS_BIT192, + PVR_PBE_STARTPOS_BIT224, +}; + +/** + * These are parameters specific to the surface being set up and hence can be + * typically set up at surface creation time. + */ +struct pvr_pbe_surf_params { + /* Swizzle for a format can be retrieved using pvr_get_format_swizzle(). */ + uint8_t swizzle[4]; + /* is_normalized can be retrieved using vk_format_is_normalized(). */ + bool is_normalized; + /* pbe_packmode can be retrieved using pvr_get_pbe_packmode(). */ + uint32_t pbe_packmode; + /* source_format and gamma can be retrieved using + * pvr_pbe_get_src_format_and_gamma(). + */ + uint32_t source_format; + enum pvr_pbe_gamma gamma; + /* nr_components can be retrieved using vk_format_get_nr_components(). */ + uint32_t nr_components; + + /* When an RT of MRT is packed using less USC outputs, this flag needs to be + * setup to true. + * + * Currently, this flag is only considered when has_usc_f16_sop is enabled. + * And it needs to be true when a render target by default should use F16 + * USC channel but uses U8 channel instead for squeezing into on-chip MRT. + * + * It is better to make this member with FF_ACCUMFORMAT type or, at least, + * describe USC channel size. But for now, only use this flag for + * simplicity. + */ + + pvr_dev_addr_t addr; + enum pvr_memlayout mem_layout; + uint32_t stride; + + /* Depth size for renders */ + uint32_t depth; + + /* Pre-rotation dimensions of surface */ + uint32_t width; + uint32_t height; + + bool z_only_render; + bool down_scale; + uint32_t msaa_mode; +}; + +/** + * These parameters are generally render-specific and need to be set up at the + * time #pvr_pbe_pack_state() is called. + */ +struct pvr_pbe_render_params { + /* Clipping params are in terms of pixels and are inclusive. */ + uint32_t min_x_clip; + uint32_t max_x_clip; + + uint32_t min_y_clip; + uint32_t max_y_clip; + + /* Start position of pixels to be read within 128bit USC output buffer. */ + enum pvr_pbe_source_start_pos source_start; + + /* 9-bit slice number to be used when memlayout is 3D twiddle. */ + uint32_t slice; + + /* Index */ + uint32_t mrt_index; +}; + +void pvr_pbe_pack_state( + struct pvr_device *device, + const struct pvr_pbe_surf_params *surface_params, + const struct pvr_pbe_render_params *render_params, + uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS], + uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS]); + +/* Helper to calculate pvr_pbe_surf_params::gamma and + * pvr_pbe_surf_params::source_format. + */ +void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format, + enum pvr_pbe_gamma default_gamma, + bool with_packed_usc_channel, + uint32_t *const src_format_out, + enum pvr_pbe_gamma *const gamma_out); + +void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info, + uint32_t msaa_mode, + uint32_t pixel_width, + bool paired_tiles, + uint32_t max_tiles_in_flight, + uint32_t *const isp_ctl_out, + uint32_t *const pixel_ctl_out); + +#endif /* PVR_JOB_COMMON_H */ diff --git a/src/imagination/vulkan/pvr_job_compute.c b/src/imagination/vulkan/pvr_job_compute.c new file mode 100644 index 00000000000..34b55caa61a --- /dev/null +++ b/src/imagination/vulkan/pvr_job_compute.c @@ -0,0 +1,101 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "pvr_csb.h" +#include "pvr_job_common.h" +#include "pvr_job_context.h" +#include "pvr_job_compute.h" +#include "pvr_private.h" +#include "pvr_winsys.h" +#include "util/macros.h" + +static void pvr_compute_job_ws_submit_info_init( + struct pvr_compute_ctx *ctx, + struct pvr_sub_cmd *sub_cmd, + const VkSemaphore *semaphores, + uint32_t semaphore_count, + uint32_t *stage_flags, + struct pvr_winsys_compute_submit_info *submit_info) +{ + const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch; + uint32_t shared_regs = sub_cmd->compute.num_shared_regs; + + submit_info->frame_num = ctx->device->global_queue_present_count; + submit_info->job_num = ctx->device->global_queue_job_count; + + submit_info->semaphores = semaphores; + submit_info->semaphore_count = semaphore_count; + submit_info->stage_flags = stage_flags; + + /* Other registers are initialized in pvr_sub_cmd_compute_job_init(). */ + pvr_csb_pack (&submit_info->regs.cdm_resume_pds1, + CR_CDM_CONTEXT_PDS1, + state) { + /* Convert the data size from dwords to bytes. */ + const uint32_t load_program_data_size = + ctx_switch->sr[0].pds.load_program.data_size * 4U; + + state.pds_seq_dep = false; + state.usc_seq_dep = false; + state.target = false; + state.unified_size = ctx_switch->sr[0].usc.unified_size; + state.common_shared = true; + state.common_size = + DIV_ROUND_UP(shared_regs << 2, + PVRX(CR_CDM_CONTEXT_PDS1_COMMON_SIZE_UNIT_SIZE)); + state.temp_size = 0; + + assert(load_program_data_size % + PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE) == + 0); + state.data_size = + load_program_data_size / PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE); + state.fence = false; + } +} + +VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx, + struct pvr_sub_cmd *sub_cmd, + const VkSemaphore *semaphores, + uint32_t semaphore_count, + uint32_t *stage_flags, + struct pvr_winsys_syncobj **const syncobj_out) +{ + struct pvr_device *device = ctx->device; + + pvr_compute_job_ws_submit_info_init(ctx, + sub_cmd, + semaphores, + semaphore_count, + stage_flags, + &sub_cmd->compute.submit_info); + + return device->ws->ops->compute_submit(ctx->ws_ctx, + &sub_cmd->compute.submit_info, + syncobj_out); +} diff --git a/src/imagination/vulkan/pvr_job_compute.h b/src/imagination/vulkan/pvr_job_compute.h new file mode 100644 index 00000000000..b71add82fbf --- /dev/null +++ b/src/imagination/vulkan/pvr_job_compute.h @@ -0,0 +1,41 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_JOB_COMPUTE_H +#define PVR_JOB_COMPUTE_H + +#include +#include + +struct pvr_compute_ctx; +struct pvr_sub_cmd; +struct pvr_winsys_syncobj; + +VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx, + struct pvr_sub_cmd *sub_cmd, + const VkSemaphore *semaphores, + uint32_t semaphore_count, + uint32_t *stage_flags, + struct pvr_winsys_syncobj **const syncobj_out); + +#endif /* PVR_JOB_COMPUTE_H */ diff --git a/src/imagination/vulkan/pvr_job_context.c b/src/imagination/vulkan/pvr_job_context.c new file mode 100644 index 00000000000..871a75b61fa --- /dev/null +++ b/src/imagination/vulkan/pvr_job_context.c @@ -0,0 +1,1183 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "hwdef/rogue_hw_utils.h" +#include "pvr_bo.h" +#include "pvr_csb.h" +#include "pvr_job_context.h" +#include "pvr_pds.h" +#include "pvr_private.h" +#include "pvr_cdm_load_sr.h" +#include "pvr_vdm_load_sr.h" +#include "pvr_vdm_store_sr.h" +#include "pvr_winsys.h" +#include "util/macros.h" +#include "vk_alloc.h" +#include "vk_log.h" + +/* TODO: Is there some way to ensure the Vulkan driver doesn't exceed this + * value when constructing the control stream? + */ +/* The VDM callstack is used by the hardware to implement control stream links + * with a return, i.e. sub-control streams/subroutines. This value specifies the + * maximum callstack depth. + */ +#define PVR_VDM_CALLSTACK_MAX_DEPTH 1U + +#define ROGUE_PDS_TASK_PROGRAM_SIZE 256U + +static VkResult pvr_ctx_reset_cmd_init(struct pvr_device *device, + struct pvr_reset_cmd *const reset_cmd) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + + /* The reset framework depends on compute support in the hw. */ + assert(PVR_HAS_FEATURE(dev_info, compute)); + + if (PVR_HAS_QUIRK(dev_info, 51764)) + pvr_finishme("Missing reset support for brn51764"); + + if (PVR_HAS_QUIRK(dev_info, 58839)) + pvr_finishme("Missing reset support for brn58839"); + + return VK_SUCCESS; +} + +static void pvr_ctx_reset_cmd_fini(struct pvr_device *device, + struct pvr_reset_cmd *reset_cmd) + +{ + /* TODO: reset command cleanup. */ +} + +static VkResult pvr_pds_pt_store_program_create_and_upload( + struct pvr_device *device, + struct pvr_bo *pt_bo, + uint32_t pt_bo_size, + struct pvr_pds_upload *const pds_upload_out) +{ + struct pvr_pds_stream_out_terminate_program program = { 0 }; + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); + size_t staging_buffer_size; + uint32_t *staging_buffer; + uint32_t *data_buffer; + uint32_t *code_buffer; + VkResult result; + + /* Check the bo size can be converted to dwords without any rounding. */ + assert(pt_bo_size % 4 == 0); + + program.pds_persistent_temp_size_to_store = pt_bo_size / 4; + program.dev_address_for_storing_persistent_temp = pt_bo->vma->dev_addr.addr; + + pvr_pds_generate_stream_out_terminate_program(&program, + NULL, + PDS_GENERATE_SIZES, + dev_info); + + staging_buffer_size = (program.stream_out_terminate_pds_data_size + + program.stream_out_terminate_pds_code_size) * + sizeof(*staging_buffer); + + staging_buffer = vk_zalloc(&device->vk.alloc, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!staging_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + data_buffer = staging_buffer; + code_buffer = + pvr_pds_generate_stream_out_terminate_program(&program, + data_buffer, + PDS_GENERATE_DATA_SEGMENT, + dev_info); + pvr_pds_generate_stream_out_terminate_program(&program, + code_buffer, + PDS_GENERATE_CODE_SEGMENT, + dev_info); + + /* This PDS program is passed to the HW via the PPP state words. These only + * allow the data segment address to be specified and expect the code + * segment to immediately follow. Assume the code alignment is the same as + * the data. + */ + result = + pvr_gpu_upload_pds(device, + data_buffer, + program.stream_out_terminate_pds_data_size, + PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE), + code_buffer, + program.stream_out_terminate_pds_code_size, + PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE), + cache_line_size, + pds_upload_out); + + vk_free(&device->vk.alloc, staging_buffer); + + return result; +} + +static VkResult pvr_pds_pt_resume_program_create_and_upload( + struct pvr_device *device, + struct pvr_bo *pt_bo, + uint32_t pt_bo_size, + struct pvr_pds_upload *const pds_upload_out) +{ + struct pvr_pds_stream_out_init_program program = { 0 }; + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); + size_t staging_buffer_size; + uint32_t *staging_buffer; + uint32_t *data_buffer; + uint32_t *code_buffer; + VkResult result; + + /* Check the bo size can be converted to dwords without any rounding. */ + assert(pt_bo_size % 4 == 0); + + program.num_buffers = 1; + program.pds_buffer_data_size[0] = pt_bo_size / 4; + program.dev_address_for_buffer_data[0] = pt_bo->vma->dev_addr.addr; + + pvr_pds_generate_stream_out_init_program(&program, + NULL, + false, + PDS_GENERATE_SIZES, + dev_info); + + staging_buffer_size = (program.stream_out_init_pds_data_size + + program.stream_out_init_pds_code_size) * + sizeof(*staging_buffer); + + staging_buffer = vk_zalloc(&device->vk.alloc, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!staging_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + data_buffer = staging_buffer; + code_buffer = + pvr_pds_generate_stream_out_init_program(&program, + data_buffer, + false, + PDS_GENERATE_DATA_SEGMENT, + dev_info); + pvr_pds_generate_stream_out_init_program(&program, + code_buffer, + false, + PDS_GENERATE_CODE_SEGMENT, + dev_info); + + /* This PDS program is passed to the HW via the PPP state words. These only + * allow the data segment address to be specified and expect the code + * segment to immediately follow. Assume the code alignment is the same as + * the data. + */ + result = + pvr_gpu_upload_pds(device, + data_buffer, + program.stream_out_init_pds_data_size, + PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE), + code_buffer, + program.stream_out_init_pds_code_size, + PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE), + cache_line_size, + pds_upload_out); + + vk_free(&device->vk.alloc, staging_buffer); + + return result; +} + +static VkResult +pvr_render_job_pt_programs_setup(struct pvr_device *device, + struct rogue_pt_programs *pt_programs) +{ + VkResult result; + + result = pvr_bo_alloc(device, + device->heaps.pds_heap, + ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE, + ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_ALIGNMENT, + PVR_BO_ALLOC_FLAG_CPU_ACCESS, + &pt_programs->store_resume_state_bo); + if (result != VK_SUCCESS) + return result; + + result = pvr_pds_pt_store_program_create_and_upload( + device, + pt_programs->store_resume_state_bo, + ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE, + &pt_programs->pds_store_program); + if (result != VK_SUCCESS) + goto err_free_store_resume_state_bo; + + result = pvr_pds_pt_resume_program_create_and_upload( + device, + pt_programs->store_resume_state_bo, + ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE, + &pt_programs->pds_resume_program); + if (result != VK_SUCCESS) + goto err_free_pds_store_program; + + return VK_SUCCESS; + +err_free_pds_store_program: + pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo); + +err_free_store_resume_state_bo: + pvr_bo_free(device, pt_programs->store_resume_state_bo); + + return result; +} + +static void +pvr_render_job_pt_programs_cleanup(struct pvr_device *device, + struct rogue_pt_programs *pt_programs) +{ + pvr_bo_free(device, pt_programs->pds_resume_program.pvr_bo); + pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo); + pvr_bo_free(device, pt_programs->store_resume_state_bo); +} + +static void pvr_pds_ctx_sr_program_setup( + bool cc_enable, + uint64_t usc_program_upload_offset, + uint8_t usc_temps, + pvr_dev_addr_t sr_addr, + struct pvr_pds_shared_storing_program *const program_out) +{ + /* The PDS task is the same for stores and loads. */ + *program_out = (struct pvr_pds_shared_storing_program){ + .cc_enable = cc_enable, + .doutw_control = { + .dest_store = PDS_UNIFIED_STORE, + .num_const64 = 2, + .doutw_data = { + [0] = sr_addr.addr, + [1] = sr_addr.addr + ROGUE_LLS_SHARED_REGS_RESERVE_SIZE, + }, + .last_instruction = false, + }, + }; + + pvr_pds_setup_doutu(&program_out->usc_task.usc_task_control, + usc_program_upload_offset, + usc_temps, + PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE), + false); +} + +/* Note: pvr_pds_compute_ctx_sr_program_create_and_upload() is very similar to + * this. If there is a problem here it's likely that the same problem exists + * there so don't forget to update the compute function. + */ +static VkResult pvr_pds_render_ctx_sr_program_create_and_upload( + struct pvr_device *device, + uint64_t usc_program_upload_offset, + uint8_t usc_temps, + pvr_dev_addr_t sr_addr, + struct pvr_pds_upload *const pds_upload_out) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); + const uint32_t pds_data_alignment = + PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U; + + /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data + * and code size when using the PDS_GENERATE_SIZES mode. + */ + STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0); + uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 }; + struct pvr_pds_shared_storing_program program; + ASSERTED uint32_t *buffer_end; + uint32_t code_offset; + + pvr_pds_ctx_sr_program_setup(false, + usc_program_upload_offset, + usc_temps, + sr_addr, + &program); + + pvr_pds_generate_shared_storing_program(&program, + &staging_buffer[0], + PDS_GENERATE_DATA_SEGMENT, + dev_info); + + code_offset = ALIGN_POT(program.data_size, pds_data_alignment); + + buffer_end = + pvr_pds_generate_shared_storing_program(&program, + &staging_buffer[code_offset], + PDS_GENERATE_CODE_SEGMENT, + dev_info); + + assert((uint32_t)(buffer_end - staging_buffer) * 4 < + ROGUE_PDS_TASK_PROGRAM_SIZE); + + return pvr_gpu_upload_pds(device, + &staging_buffer[0], + program.data_size, + PVRX(VDMCTRL_PDS_STATE1_PDS_DATA_ADDR_ALIGNMENT), + &staging_buffer[code_offset], + program.code_size, + PVRX(VDMCTRL_PDS_STATE2_PDS_CODE_ADDR_ALIGNMENT), + cache_line_size, + pds_upload_out); +} + +/* Note: pvr_pds_render_ctx_sr_program_create_and_upload() is very similar to + * this. If there is a problem here it's likely that the same problem exists + * there so don't forget to update the render_ctx function. + */ +static VkResult pvr_pds_compute_ctx_sr_program_create_and_upload( + struct pvr_device *device, + bool is_loading_program, + uint64_t usc_program_upload_offset, + uint8_t usc_temps, + pvr_dev_addr_t sr_addr, + struct pvr_pds_upload *const pds_upload_out) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); + const uint32_t pds_data_alignment = + PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U; + + /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data + * and code size when using the PDS_GENERATE_SIZES mode. + */ + STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0); + uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 }; + struct pvr_pds_shared_storing_program program; + uint32_t *buffer_ptr; + uint32_t code_offset; + + pvr_pds_ctx_sr_program_setup(PVR_HAS_ERN(dev_info, 35421), + usc_program_upload_offset, + usc_temps, + sr_addr, + &program); + + if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) { + pvr_pds_generate_compute_shared_loading_program(&program, + &staging_buffer[0], + PDS_GENERATE_DATA_SEGMENT, + dev_info); + } else { + pvr_pds_generate_shared_storing_program(&program, + &staging_buffer[0], + PDS_GENERATE_DATA_SEGMENT, + dev_info); + } + + code_offset = ALIGN_POT(program.data_size, pds_data_alignment); + + buffer_ptr = + pvr_pds_generate_compute_barrier_conditional(&staging_buffer[code_offset], + PDS_GENERATE_CODE_SEGMENT); + + if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) { + buffer_ptr = pvr_pds_generate_compute_shared_loading_program( + &program, + buffer_ptr, + PDS_GENERATE_CODE_SEGMENT, + dev_info); + } else { + buffer_ptr = + pvr_pds_generate_shared_storing_program(&program, + buffer_ptr, + PDS_GENERATE_CODE_SEGMENT, + dev_info); + } + + assert((uint32_t)(buffer_ptr - staging_buffer) * 4 < + ROGUE_PDS_TASK_PROGRAM_SIZE); + + STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT) == + PVRX(CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNMENT)); + + STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT) == + PVRX(CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNMENT)); + + return pvr_gpu_upload_pds( + device, + &staging_buffer[0], + program.data_size, + PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT), + &staging_buffer[code_offset], + (uint32_t)(buffer_ptr - &staging_buffer[code_offset]), + PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT), + cache_line_size, + pds_upload_out); +} + +enum pvr_ctx_sr_program_target { + PVR_CTX_SR_RENDER_TARGET, + PVR_CTX_SR_COMPUTE_TARGET, +}; + +static VkResult pvr_ctx_sr_programs_setup(struct pvr_device *device, + enum pvr_ctx_sr_program_target target, + struct rogue_sr_programs *sr_programs) +{ + const uint64_t store_load_state_bo_size = + PVRX(LLS_USC_SHARED_REGS_BUFFER_SIZE) + + ROGUE_LLS_SHARED_REGS_RESERVE_SIZE; + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); + uint64_t usc_store_program_upload_offset; + uint64_t usc_load_program_upload_offset; + const uint8_t *usc_load_sr_code; + uint32_t usc_load_sr_code_size; + VkResult result; + + /* Note that this is being used for both compute and render ctx. There is no + * compute equivalent define for the VDMCTRL unit size. + */ + /* 4 blocks (16 dwords / 64 bytes) in USC to prevent fragmentation. */ + sr_programs->usc.unified_size = + DIV_ROUND_UP(64, PVRX(VDMCTRL_PDS_STATE0_USC_UNIFIED_SIZE_UNIT_SIZE)); + + result = pvr_bo_alloc(device, + device->heaps.pds_heap, + store_load_state_bo_size, + cache_line_size, + PVR_WINSYS_BO_FLAG_CPU_ACCESS, + &sr_programs->store_load_state_bo); + if (result != VK_SUCCESS) + return result; + + /* USC state update: SR state store. */ + + assert(sizeof(pvr_vdm_store_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE); + + result = pvr_gpu_upload_usc(device, + pvr_vdm_store_sr_code, + sizeof(pvr_vdm_store_sr_code), + cache_line_size, + &sr_programs->usc.store_program_bo); + if (result != VK_SUCCESS) + goto err_free_store_load_state_bo; + + usc_store_program_upload_offset = + sr_programs->usc.store_program_bo->vma->dev_addr.addr - + device->heaps.usc_heap->base_addr.addr; + + /* USC state update: SR state load. */ + + if (target == PVR_CTX_SR_COMPUTE_TARGET && PVR_HAS_QUIRK(dev_info, 62269)) { + STATIC_ASSERT(sizeof(pvr_cdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE); + + usc_load_sr_code = pvr_cdm_load_sr_code; + usc_load_sr_code_size = sizeof(pvr_cdm_load_sr_code); + } else { + STATIC_ASSERT(sizeof(pvr_vdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE); + + usc_load_sr_code = pvr_vdm_load_sr_code; + usc_load_sr_code_size = sizeof(pvr_vdm_load_sr_code); + } + + result = pvr_gpu_upload_usc(device, + usc_load_sr_code, + usc_load_sr_code_size, + cache_line_size, + &sr_programs->usc.load_program_bo); + if (result != VK_SUCCESS) + goto err_free_usc_store_program_bo; + + usc_load_program_upload_offset = + sr_programs->usc.load_program_bo->vma->dev_addr.addr - + device->heaps.usc_heap->base_addr.addr; + + /* FIXME: The number of USC temps should be output alongside + * pvr_vdm_store_sr_code rather than hard coded. + */ + /* Create and upload the PDS load and store programs. Point them to the + * appropriate USC load and store programs. + */ + switch (target) { + case PVR_CTX_SR_RENDER_TARGET: + /* PDS state update: SR state store. */ + result = pvr_pds_render_ctx_sr_program_create_and_upload( + device, + usc_store_program_upload_offset, + 8, + sr_programs->store_load_state_bo->vma->dev_addr, + &sr_programs->pds.store_program); + if (result != VK_SUCCESS) + goto err_free_usc_load_program_bo; + + /* PDS state update: SR state load. */ + result = pvr_pds_render_ctx_sr_program_create_and_upload( + device, + usc_load_program_upload_offset, + 20, + sr_programs->store_load_state_bo->vma->dev_addr, + &sr_programs->pds.load_program); + if (result != VK_SUCCESS) + goto err_free_pds_store_program_bo; + + break; + + case PVR_CTX_SR_COMPUTE_TARGET: + /* PDS state update: SR state store. */ + result = pvr_pds_compute_ctx_sr_program_create_and_upload( + device, + false, + usc_store_program_upload_offset, + 8, + sr_programs->store_load_state_bo->vma->dev_addr, + &sr_programs->pds.store_program); + if (result != VK_SUCCESS) + goto err_free_usc_load_program_bo; + + /* PDS state update: SR state load. */ + result = pvr_pds_compute_ctx_sr_program_create_and_upload( + device, + true, + usc_load_program_upload_offset, + 20, + sr_programs->store_load_state_bo->vma->dev_addr, + &sr_programs->pds.load_program); + if (result != VK_SUCCESS) + goto err_free_pds_store_program_bo; + + break; + + default: + unreachable("Invalid target."); + break; + } + + return VK_SUCCESS; + +err_free_pds_store_program_bo: + pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo); + +err_free_usc_load_program_bo: + pvr_bo_free(device, sr_programs->usc.load_program_bo); + +err_free_usc_store_program_bo: + pvr_bo_free(device, sr_programs->usc.store_program_bo); + +err_free_store_load_state_bo: + pvr_bo_free(device, sr_programs->store_load_state_bo); + + return VK_SUCCESS; +} + +static void pvr_ctx_sr_programs_cleanup(struct pvr_device *device, + struct rogue_sr_programs *sr_programs) +{ + pvr_bo_free(device, sr_programs->pds.load_program.pvr_bo); + pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo); + pvr_bo_free(device, sr_programs->usc.load_program_bo); + pvr_bo_free(device, sr_programs->usc.store_program_bo); + pvr_bo_free(device, sr_programs->store_load_state_bo); +} + +static VkResult +pvr_render_ctx_switch_programs_setup(struct pvr_device *device, + struct pvr_render_ctx_programs *programs) +{ + VkResult result; + + result = pvr_render_job_pt_programs_setup(device, &programs->pt); + if (result != VK_SUCCESS) + return result; + + result = pvr_ctx_sr_programs_setup(device, + PVR_CTX_SR_RENDER_TARGET, + &programs->sr); + if (result != VK_SUCCESS) + goto err_pt_programs_cleanup; + + return VK_SUCCESS; + +err_pt_programs_cleanup: + pvr_render_job_pt_programs_cleanup(device, &programs->pt); + + return result; +} + +static void +pvr_render_ctx_switch_programs_cleanup(struct pvr_device *device, + struct pvr_render_ctx_programs *programs) +{ + pvr_ctx_sr_programs_cleanup(device, &programs->sr); + pvr_render_job_pt_programs_cleanup(device, &programs->pt); +} + +static VkResult pvr_render_ctx_switch_init(struct pvr_device *device, + struct pvr_render_ctx *ctx) +{ + struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch; + const uint64_t vdm_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED | + PVR_BO_ALLOC_FLAG_CPU_ACCESS; + const uint64_t geom_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED | + PVR_BO_ALLOC_FLAG_CPU_ACCESS; + VkResult result; + + result = pvr_bo_alloc(device, + device->heaps.general_heap, + ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_SIZE, + ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_ALIGNMENT, + vdm_state_bo_flags, + &ctx_switch->vdm_state_bo); + if (result != VK_SUCCESS) + return result; + + result = pvr_bo_alloc(device, + device->heaps.general_heap, + ROGUE_LLS_TA_STATE_BUFFER_SIZE, + ROGUE_LLS_TA_STATE_BUFFER_ALIGNMENT, + geom_state_bo_flags, + &ctx_switch->geom_state_bo); + if (result != VK_SUCCESS) + goto err_pvr_bo_free_vdm_state_bo; + + for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) { + result = + pvr_render_ctx_switch_programs_setup(device, &ctx_switch->programs[i]); + if (result) + goto err_programs_cleanup; + } + + return result; + +err_programs_cleanup: + for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) { + pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]); + } + + pvr_bo_free(device, ctx_switch->geom_state_bo); + +err_pvr_bo_free_vdm_state_bo: + pvr_bo_free(device, ctx_switch->vdm_state_bo); + + return result; +} + +static void pvr_render_ctx_switch_fini(struct pvr_device *device, + struct pvr_render_ctx *ctx) +{ + struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch; + + for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) { + pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]); + } + + pvr_bo_free(device, ctx_switch->geom_state_bo); + pvr_bo_free(device, ctx_switch->vdm_state_bo); +} + +static void +pvr_rogue_get_vdmctrl_pds_state_words(struct pvr_pds_upload *pds_program, + enum PVRX(VDMCTRL_USC_TARGET) usc_target, + uint8_t usc_unified_size, + uint32_t *const state0_out, + uint32_t *const state1_out) +{ + pvr_csb_pack (state0_out, VDMCTRL_PDS_STATE0, state) { + /* Convert the data size from dwords to bytes. */ + const uint32_t pds_data_size = pds_program->data_size * 4; + + state.dm_target = PVRX(VDMCTRL_DM_TARGET_VDM); + state.usc_target = usc_target; + state.usc_common_size = 0; + state.usc_unified_size = usc_unified_size; + state.pds_temp_size = 0; + + assert(pds_data_size % PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) == + 0); + state.pds_data_size = + pds_data_size / PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE); + }; + + pvr_csb_pack (state1_out, VDMCTRL_PDS_STATE1, state) { + state.pds_data_addr.addr = pds_program->data_offset; + state.sd_type = PVRX(VDMCTRL_SD_TYPE_PDS); + state.sd_next_type = PVRX(VDMCTRL_SD_TYPE_PDS); + } +} + +static void +pvr_rogue_get_geom_state_stream_out_words(struct pvr_pds_upload *pds_program, + uint32_t *const stream_out1_out, + uint32_t *const stream_out2_out) +{ + pvr_csb_pack (stream_out1_out, TA_STATE_STREAM_OUT1, state) { + /* Convert the data size from dwords to bytes. */ + const uint32_t pds_data_size = pds_program->data_size * 4; + + state.sync = true; + + assert(pds_data_size % + PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE) == + 0); + state.pds_data_size = + pds_data_size / PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE); + + state.pds_temp_size = 0; + } + + pvr_csb_pack (stream_out2_out, TA_STATE_STREAM_OUT2, state) { + state.pds_data_addr.addr = pds_program->data_offset; + } +} + +static void pvr_render_ctx_ws_static_state_init( + struct pvr_render_ctx *ctx, + struct pvr_winsys_render_ctx_static_state *static_state) +{ + void *dst; + + dst = &static_state->vdm_ctx_state_base_addr; + pvr_csb_pack (dst, CR_VDM_CONTEXT_STATE_BASE, base) { + base.addr = ctx->ctx_switch.vdm_state_bo->vma->dev_addr; + } + + dst = &static_state->geom_ctx_state_base_addr; + pvr_csb_pack (dst, CR_TA_CONTEXT_STATE_BASE, base) { + base.addr = ctx->ctx_switch.geom_state_bo->vma->dev_addr; + } + + for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.programs); i++) { + struct rogue_pt_programs *pt_prog = &ctx->ctx_switch.programs[i].pt; + struct rogue_sr_programs *sr_prog = &ctx->ctx_switch.programs[i].sr; + + /* Context store state. */ + dst = &static_state->geom_state[i].vdm_ctx_store_task0; + pvr_csb_pack (dst, CR_VDM_CONTEXT_STORE_TASK0, task0) { + pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.store_program, + PVRX(VDMCTRL_USC_TARGET_ANY), + sr_prog->usc.unified_size, + &task0.pds_state0, + &task0.pds_state1); + } + + dst = &static_state->geom_state[i].vdm_ctx_store_task1; + pvr_csb_pack (dst, CR_VDM_CONTEXT_STORE_TASK1, task1) { + pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) { + state.pds_code_addr.addr = sr_prog->pds.store_program.code_offset; + } + } + + dst = &static_state->geom_state[i].vdm_ctx_store_task2; + pvr_csb_pack (dst, CR_VDM_CONTEXT_STORE_TASK2, task2) { + pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_store_program, + &task2.stream_out1, + &task2.stream_out2); + } + + /* Context resume state. */ + dst = &static_state->geom_state[i].vdm_ctx_resume_task0; + pvr_csb_pack (dst, CR_VDM_CONTEXT_RESUME_TASK0, task0) { + pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.load_program, + PVRX(VDMCTRL_USC_TARGET_ALL), + sr_prog->usc.unified_size, + &task0.pds_state0, + &task0.pds_state1); + } + + dst = &static_state->geom_state[i].vdm_ctx_resume_task1; + pvr_csb_pack (dst, CR_VDM_CONTEXT_RESUME_TASK1, task1) { + pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) { + state.pds_code_addr.addr = sr_prog->pds.load_program.code_offset; + } + } + + dst = &static_state->geom_state[i].vdm_ctx_resume_task2; + pvr_csb_pack (dst, CR_VDM_CONTEXT_RESUME_TASK2, task2) { + pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_resume_program, + &task2.stream_out1, + &task2.stream_out2); + } + } +} + +static void pvr_render_ctx_ws_create_info_init( + struct pvr_render_ctx *ctx, + enum pvr_winsys_ctx_priority priority, + struct pvr_winsys_render_ctx_create_info *create_info) +{ + create_info->priority = priority; + create_info->vdm_callstack_addr = ctx->vdm_callstack_bo->vma->dev_addr; + + pvr_render_ctx_ws_static_state_init(ctx, &create_info->static_state); +} + +VkResult pvr_render_ctx_create(struct pvr_device *device, + enum pvr_winsys_ctx_priority priority, + struct pvr_render_ctx **const ctx_out) +{ + const uint64_t vdm_callstack_size = + sizeof(uint64_t) * PVR_VDM_CALLSTACK_MAX_DEPTH; + struct pvr_winsys_render_ctx_create_info create_info; + struct pvr_render_ctx *ctx; + VkResult result; + + ctx = vk_alloc(&device->vk.alloc, + sizeof(*ctx), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!ctx) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + ctx->device = device; + + result = pvr_bo_alloc(device, + device->heaps.general_heap, + vdm_callstack_size, + PVRX(CR_VDM_CALL_STACK_POINTER_ADDR_ALIGNMENT), + 0, + &ctx->vdm_callstack_bo); + if (result != VK_SUCCESS) + goto err_vk_free_ctx; + + result = pvr_render_ctx_switch_init(device, ctx); + if (result != VK_SUCCESS) + goto err_free_vdm_callstack_bo; + + result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd); + if (result != VK_SUCCESS) + goto err_render_ctx_switch_fini; + + /* ctx must be fully initialized by this point since + * pvr_render_ctx_ws_create_info_init() depends on this. + */ + pvr_render_ctx_ws_create_info_init(ctx, priority, &create_info); + + result = device->ws->ops->render_ctx_create(device->ws, + &create_info, + &ctx->ws_ctx); + if (result != VK_SUCCESS) + goto err_render_ctx_reset_cmd_fini; + + *ctx_out = ctx; + + return VK_SUCCESS; + +err_render_ctx_reset_cmd_fini: + pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); + +err_render_ctx_switch_fini: + pvr_render_ctx_switch_fini(device, ctx); + +err_free_vdm_callstack_bo: + pvr_bo_free(device, ctx->vdm_callstack_bo); + +err_vk_free_ctx: + vk_free(&device->vk.alloc, ctx); + + return result; +} + +void pvr_render_ctx_destroy(struct pvr_render_ctx *ctx) +{ + struct pvr_device *device = ctx->device; + + device->ws->ops->render_ctx_destroy(ctx->ws_ctx); + + pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); + pvr_render_ctx_switch_fini(device, ctx); + pvr_bo_free(device, ctx->vdm_callstack_bo); + vk_free(&device->vk.alloc, ctx); +} + +static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload( + struct pvr_device *device, + struct pvr_pds_upload *const pds_upload_out) +{ + const uint32_t pds_data_alignment = + PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U; + ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + uint32_t staging_buffer[PVRX(PDS_TASK_PROGRAM_SIZE) >> 2U]; + struct pvr_pds_fence_program program = { 0 }; + ASSERTED uint32_t *buffer_end; + uint32_t code_offset; + uint32_t data_size; + + /* SW_COMPUTE_PDS_BARRIER is not supported with 2 or more phantoms. */ + assert(!(PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) && + rogue_get_num_phantoms(dev_info) >= 2)); + + pvr_pds_generate_fence_terminate_program(&program, + staging_buffer, + PDS_GENERATE_DATA_SEGMENT, + &device->pdevice->dev_info); + + /* FIXME: pvr_pds_generate_fence_terminate_program() zeros out the data_size + * when we generate the code segment. Implement + * PDS_GENERATE_CODEDATA_SEGMENTS? Or wait for the pds gen api to change? + * This behavior doesn't seem consistent with the rest of the api. For now + * we store the size in a variable. + */ + data_size = program.data_size; + code_offset = ALIGN_POT(program.data_size, pds_data_alignment); + + buffer_end = + pvr_pds_generate_fence_terminate_program(&program, + &staging_buffer[code_offset], + PDS_GENERATE_CODE_SEGMENT, + &device->pdevice->dev_info); + + assert((uint64_t)(buffer_end - staging_buffer) * 4U < + ROGUE_PDS_TASK_PROGRAM_SIZE); + + return pvr_gpu_upload_pds(device, + staging_buffer, + data_size, + PVRX(CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNMENT), + &staging_buffer[code_offset], + program.code_size, + PVRX(CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNMENT), + 0, + pds_upload_out); +} + +static void pvr_compute_ctx_ws_static_state_init( + const struct pvr_device_info *const dev_info, + const struct pvr_compute_ctx *const ctx, + struct pvr_winsys_compute_ctx_static_state *const static_state) +{ + const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch; + + pvr_csb_pack (&static_state->cdm_ctx_state_base_addr, + CR_CDM_CONTEXT_STATE_BASE, + state) { + state.addr = ctx_switch->compute_state_bo->vma->dev_addr; + } + + /* CR_CDM_CONTEXT_... use state store program info. */ + + pvr_csb_pack (&static_state->cdm_ctx_store_pds0, + CR_CDM_CONTEXT_PDS0, + state) { + state.data_addr.addr = ctx_switch->sr[0].pds.store_program.data_offset; + state.code_addr.addr = ctx_switch->sr[0].pds.store_program.code_offset; + } + + pvr_csb_pack (&static_state->cdm_ctx_store_pds0_b, + CR_CDM_CONTEXT_PDS0, + state) { + state.data_addr.addr = ctx_switch->sr[1].pds.store_program.data_offset; + state.code_addr.addr = ctx_switch->sr[1].pds.store_program.code_offset; + } + + pvr_csb_pack (&static_state->cdm_ctx_store_pds1, + CR_CDM_CONTEXT_PDS1, + state) { + /* Convert the data size from dwords to bytes. */ + const uint32_t store_program_data_size = + ctx_switch->sr[0].pds.store_program.data_size * 4U; + + state.pds_seq_dep = true; + state.usc_seq_dep = false; + state.target = true; + state.unified_size = ctx_switch->sr[0].usc.unified_size; + state.common_shared = false; + state.common_size = 0; + state.temp_size = 0; + + assert(store_program_data_size % + PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) == + 0); + state.data_size = store_program_data_size / + PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE); + + state.fence = true; + } + + /* CR_CDM_TERMINATE_... use fence terminate info. */ + + pvr_csb_pack (&static_state->cdm_ctx_terminate_pds, + CR_CDM_TERMINATE_PDS, + state) { + state.data_addr.addr = ctx_switch->sr_fence_terminate_program.data_offset; + state.code_addr.addr = ctx_switch->sr_fence_terminate_program.code_offset; + } + + pvr_csb_pack (&static_state->cdm_ctx_terminate_pds1, + CR_CDM_TERMINATE_PDS1, + state) { + /* Convert the data size from dwords to bytes. */ + const uint32_t fence_terminate_program_data_size = + ctx_switch->sr_fence_terminate_program.data_size * 4U; + + state.pds_seq_dep = true; + state.usc_seq_dep = false; + state.target = !PVR_HAS_FEATURE(dev_info, compute_morton_capable); + state.unified_size = 0; + /* Common store is for shareds -- this will free the partitions. */ + state.common_shared = true; + state.common_size = 0; + state.temp_size = 0; + + assert(fence_terminate_program_data_size % + PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) == + 0); + state.data_size = fence_terminate_program_data_size / + PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE); + state.fence = true; + } + + /* CR_CDM_RESUME_... use state load program info. */ + + pvr_csb_pack (&static_state->cdm_ctx_resume_pds0, + CR_CDM_CONTEXT_LOAD_PDS0, + state) { + state.data_addr.addr = ctx_switch->sr[0].pds.load_program.data_offset; + state.code_addr.addr = ctx_switch->sr[0].pds.load_program.code_offset; + } + + pvr_csb_pack (&static_state->cdm_ctx_resume_pds0_b, + CR_CDM_CONTEXT_LOAD_PDS0, + state) { + state.data_addr.addr = ctx_switch->sr[1].pds.load_program.data_offset; + state.code_addr.addr = ctx_switch->sr[1].pds.load_program.code_offset; + } +} + +static void pvr_compute_ctx_ws_create_info_init( + const struct pvr_compute_ctx *const ctx, + enum pvr_winsys_ctx_priority priority, + struct pvr_winsys_compute_ctx_create_info *const create_info) +{ + create_info->priority = priority; + + pvr_compute_ctx_ws_static_state_init(&ctx->device->pdevice->dev_info, + ctx, + &create_info->static_state); +} + +VkResult pvr_compute_ctx_create(struct pvr_device *const device, + enum pvr_winsys_ctx_priority priority, + struct pvr_compute_ctx **const ctx_out) +{ + struct pvr_winsys_compute_ctx_create_info create_info; + struct pvr_compute_ctx *ctx; + VkResult result; + + ctx = vk_alloc(&device->vk.alloc, + sizeof(*ctx), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!ctx) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + ctx->device = device; + + result = pvr_bo_alloc( + device, + device->heaps.general_heap, + rogue_get_cdm_context_resume_buffer_size(&device->pdevice->dev_info), + rogue_get_cdm_context_resume_buffer_alignment(&device->pdevice->dev_info), + PVR_WINSYS_BO_FLAG_CPU_ACCESS | PVR_WINSYS_BO_FLAG_GPU_UNCACHED, + &ctx->ctx_switch.compute_state_bo); + if (result != VK_SUCCESS) + goto err_free_ctx; + + /* TODO: Change this so that enabling storage to B doesn't change the array + * size. Instead of looping we could unroll this and have the second + * programs setup depending on the B enable. Doing it that way would make + * things more obvious. + */ + for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); i++) { + result = pvr_ctx_sr_programs_setup(device, + PVR_CTX_SR_COMPUTE_TARGET, + &ctx->ctx_switch.sr[i]); + if (result != VK_SUCCESS) { + for (uint32_t j = 0; j < i; j++) + pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[j]); + + goto err_free_state_buffer; + } + } + + result = pvr_pds_sr_fence_terminate_program_create_and_upload( + device, + &ctx->ctx_switch.sr_fence_terminate_program); + if (result != VK_SUCCESS) + goto err_free_sr_programs; + + pvr_compute_ctx_ws_create_info_init(ctx, priority, &create_info); + + result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd); + if (result != VK_SUCCESS) + goto err_free_pds_fence_terminate_program; + + result = device->ws->ops->compute_ctx_create(device->ws, + &create_info, + &ctx->ws_ctx); + if (result != VK_SUCCESS) + goto err_fini_reset_cmd; + + *ctx_out = ctx; + + return VK_SUCCESS; + +err_fini_reset_cmd: + pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); + +err_free_pds_fence_terminate_program: + pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo); + +err_free_sr_programs: + for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i) + pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]); + +err_free_state_buffer: + pvr_bo_free(device, ctx->ctx_switch.compute_state_bo); + +err_free_ctx: + vk_free(&device->vk.alloc, ctx); + + return result; +} + +void pvr_compute_ctx_destroy(struct pvr_compute_ctx *const ctx) +{ + struct pvr_device *device = ctx->device; + + device->ws->ops->compute_ctx_destroy(ctx->ws_ctx); + + pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); + + pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo); + for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i) + pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]); + + pvr_bo_free(device, ctx->ctx_switch.compute_state_bo); + + vk_free(&device->vk.alloc, ctx); +} diff --git a/src/imagination/vulkan/pvr_job_context.h b/src/imagination/vulkan/pvr_job_context.h new file mode 100644 index 00000000000..0e3f81b0cd5 --- /dev/null +++ b/src/imagination/vulkan/pvr_job_context.h @@ -0,0 +1,126 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_JOB_CONTEXT_H +#define PVR_JOB_CONTEXT_H + +#include "pvr_winsys.h" +#include "pvr_private.h" + +/* Support PDS code/data loading/storing to the 'B' shared register state + * buffers. + */ +#define ROGUE_NUM_SHADER_STATE_BUFFERS 2U + +/* TODO: Add reset framework support. */ +struct pvr_reset_cmd { +}; + +struct pvr_compute_ctx; + +struct rogue_sr_programs { + struct pvr_bo *store_load_state_bo; + + struct { + uint8_t unified_size; + + struct pvr_bo *store_program_bo; + + struct pvr_bo *load_program_bo; + } usc; + + struct { + struct pvr_pds_upload store_program; + struct pvr_pds_upload load_program; + } pds; +}; + +struct pvr_render_ctx { + struct pvr_device *device; + + struct pvr_winsys_render_ctx *ws_ctx; + + /* Buffer to hold the VDM call stack */ + struct pvr_bo *vdm_callstack_bo; + + struct pvr_render_ctx_switch { + /* Buffer to hold the VDM context resume control stream. */ + struct pvr_bo *vdm_state_bo; + + struct pvr_bo *geom_state_bo; + + struct pvr_render_ctx_programs { + /* Context switch persistent state programs. */ + struct rogue_pt_programs { + /* Buffer used to hold the persistent state. */ + struct pvr_bo *store_resume_state_bo; + + /* PDS program to store out the persistent state in + * 'store_resume_state_bo'. + */ + struct pvr_pds_upload pds_store_program; + + /* PDS program to load in the persistent state in + * 'store_resume_state_bo'. + */ + struct pvr_pds_upload pds_resume_program; + } pt; + + /* Context switch shared register programs. */ + struct rogue_sr_programs sr; + + } programs[ROGUE_NUM_SHADER_STATE_BUFFERS]; + } ctx_switch; + + /* Reset framework. */ + struct pvr_reset_cmd reset_cmd; +}; + +struct pvr_compute_ctx { + struct pvr_device *device; + + struct pvr_winsys_compute_ctx *ws_ctx; + + struct pvr_compute_ctx_switch { + struct pvr_bo *compute_state_bo; + + struct rogue_sr_programs sr[ROGUE_NUM_SHADER_STATE_BUFFERS]; + + struct pvr_pds_upload sr_fence_terminate_program; + } ctx_switch; + + /* Reset framework. */ + struct pvr_reset_cmd reset_cmd; +}; + +VkResult pvr_render_ctx_create(struct pvr_device *device, + enum pvr_winsys_ctx_priority priority, + struct pvr_render_ctx **const ctx_out); +void pvr_render_ctx_destroy(struct pvr_render_ctx *ctx); + +VkResult pvr_compute_ctx_create(struct pvr_device *const device, + enum pvr_winsys_ctx_priority priority, + struct pvr_compute_ctx **const ctx_out); +void pvr_compute_ctx_destroy(struct pvr_compute_ctx *ctx); + +#endif /* PVR_JOB_CONTEXT_H */ diff --git a/src/imagination/vulkan/pvr_job_render.c b/src/imagination/vulkan/pvr_job_render.c new file mode 100644 index 00000000000..8c6b15fa43b --- /dev/null +++ b/src/imagination/vulkan/pvr_job_render.c @@ -0,0 +1,1595 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "hwdef/rogue_hw_defs.h" +#include "hwdef/rogue_hw_utils.h" +#include "pvr_bo.h" +#include "pvr_csb.h" +#include "pvr_job_common.h" +#include "pvr_job_context.h" +#include "pvr_job_render.h" +#include "pvr_pds.h" +#include "pvr_private.h" +#include "pvr_rogue_fw.h" +#include "pvr_winsys.h" +#include "util/compiler.h" +#include "util/macros.h" +#include "util/u_math.h" +#include "vk_alloc.h" +#include "vk_log.h" +#include "vk_util.h" + +#define ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE 16U + +/* FIXME: Is there a hardware define we can use instead? */ +/* 1 DWord per PM physical page stored in the free list */ +#define ROGUE_FREE_LIST_ENTRY_SIZE ((uint32_t)sizeof(uint32_t)) + +/* FIXME: The three defines below, for the number of PC, PD and PT entries in a + * 4KB page, come from rgxmmudefs_km.h (meaning they're part of the + * auto-generated hwdefs). Should these be defined in rogue_mmu.xml? Keeping in + * mind that we probably only need these three values. */ +#define ROGUE_NUM_PC_ENTRIES_PER_PAGE 0x400U + +#define ROGUE_NUM_PD_ENTRIES_PER_PAGE 0x200U + +#define ROGUE_NUM_PT_ENTRIES_PER_PAGE 0x200U + +struct pvr_free_list { + struct pvr_device *device; + + uint64_t size; + + struct pvr_bo *bo; + + struct pvr_winsys_free_list *ws_free_list; +}; + +/* Macrotile information. */ +struct pvr_rt_mtile_info { + uint32_t tile_size_x; + uint32_t tile_size_y; + + uint32_t num_tiles_x; + uint32_t num_tiles_y; + + uint32_t tiles_per_mtile_x; + uint32_t tiles_per_mtile_y; + + uint32_t x_tile_max; + uint32_t y_tile_max; + + uint32_t mtiles_x; + uint32_t mtiles_y; + + uint32_t mtile_x1; + uint32_t mtile_y1; + uint32_t mtile_x2; + uint32_t mtile_y2; + uint32_t mtile_x3; + uint32_t mtile_y3; + + uint32_t mtile_stride; +}; + +struct pvr_rt_dataset { + struct pvr_device *device; + + /* RT dataset information */ + uint32_t width; + uint32_t height; + uint32_t samples; + uint32_t layers; + + struct pvr_free_list *global_free_list; + struct pvr_free_list *local_free_list; + + struct pvr_bo *vheap_rtc_bo; + pvr_dev_addr_t vheap_dev_addr; + pvr_dev_addr_t rtc_dev_addr; + + struct pvr_bo *tpc_bo; + uint64_t tpc_stride; + uint64_t tpc_size; + + struct pvr_winsys_rt_dataset *ws_rt_dataset; + + /* RT data information */ + struct pvr_bo *mta_mlist_bo; + + struct pvr_bo *rgn_headers_bo; + uint64_t rgn_headers_stride; + + bool need_frag; + + uint8_t rt_data_idx; + + struct { + pvr_dev_addr_t mta_dev_addr; + pvr_dev_addr_t mlist_dev_addr; + pvr_dev_addr_t rgn_headers_dev_addr; + } rt_datas[ROGUE_NUM_RTDATAS]; +}; + +VkResult pvr_free_list_create(struct pvr_device *device, + uint32_t initial_size, + uint32_t max_size, + uint32_t grow_size, + uint32_t grow_threshold, + struct pvr_free_list *parent_free_list, + struct pvr_free_list **const free_list_out) +{ + struct pvr_winsys_free_list *parent_ws_free_list = + parent_free_list ? parent_free_list->ws_free_list : NULL; + const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED | + PVR_BO_ALLOC_FLAG_PM_FW_PROTECT; + struct pvr_free_list *free_list; + uint32_t cache_line_size; + uint32_t initial_num_pages; + uint32_t grow_num_pages; + uint32_t max_num_pages; + uint64_t addr_alignment; + uint64_t size_alignment; + uint64_t size; + VkResult result; + + assert((initial_size + grow_size) <= max_size); + assert(max_size != 0); + assert(grow_threshold <= 100); + + /* Make sure the free list is created with at least a single page. */ + if (initial_size == 0) + initial_size = ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE; + + /* The freelists sizes must respect the PM freelist base address alignment + * requirement. As the freelist entries are cached by the SLC, it's also + * necessary to ensure the sizes respect the SLC cache line size to avoid + * invalid entries appearing in the cache, which would be problematic after + * a grow operation, as the SLC entries aren't invalidated. We do this by + * making sure the freelist values are appropriately aligned. + * + * To calculate the alignment, we first take the largest of the freelist + * base address alignment and the SLC cache line size. We then divide this + * by the freelist entry size to determine the number of freelist entries + * required by the PM. Finally, as each entry holds a single PM physical + * page, we multiple the number of entries by the page size. + * + * As an example, if the base address alignment is 16 bytes, the SLC cache + * line size is 64 bytes and the freelist entry size is 4 bytes then 16 + * entries are required, as we take the SLC cacheline size (being the larger + * of the two values) and divide this by 4. If the PM page size is 4096 + * bytes then we end up with an alignment of 65536 bytes. + */ + cache_line_size = rogue_get_slc_cache_line_size(&device->pdevice->dev_info); + + addr_alignment = + MAX2(ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE, cache_line_size); + size_alignment = (addr_alignment / ROGUE_FREE_LIST_ENTRY_SIZE) * + ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE; + + assert(util_is_power_of_two_nonzero(size_alignment)); + + initial_size = align64(initial_size, size_alignment); + max_size = align64(max_size, size_alignment); + grow_size = align64(grow_size, size_alignment); + + /* Make sure the 'max' size doesn't exceed what the firmware supports and + * adjust the other sizes accordingly. + */ + if (max_size > ROGUE_FREE_LIST_MAX_SIZE) { + max_size = ROGUE_FREE_LIST_MAX_SIZE; + assert(align64(max_size, size_alignment) == max_size); + } + + if (initial_size > max_size) + initial_size = max_size; + + if (initial_size == max_size) + grow_size = 0; + + initial_num_pages = initial_size >> ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT; + max_num_pages = max_size >> ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT; + grow_num_pages = grow_size >> ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT; + + /* Calculate the size of the buffer needed to store the free list entries + * based on the maximum number of pages we can have. + */ + size = max_num_pages * ROGUE_FREE_LIST_ENTRY_SIZE; + assert(align64(size, addr_alignment) == size); + + free_list = vk_alloc(&device->vk.alloc, + sizeof(*free_list), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!free_list) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* FIXME: The memory is mapped GPU uncached, but this seems to contradict + * the comment above about aligning to the SLC cache line size. + */ + result = pvr_bo_alloc(device, + device->heaps.general_heap, + size, + addr_alignment, + bo_flags, + &free_list->bo); + if (result != VK_SUCCESS) + goto err_vk_free_free_list; + + result = device->ws->ops->free_list_create(device->ws, + free_list->bo->vma, + initial_num_pages, + max_num_pages, + grow_num_pages, + grow_threshold, + parent_ws_free_list, + &free_list->ws_free_list); + if (result != VK_SUCCESS) + goto err_pvr_bo_free_bo; + + free_list->device = device; + free_list->size = size; + + *free_list_out = free_list; + + return VK_SUCCESS; + +err_pvr_bo_free_bo: + pvr_bo_free(device, free_list->bo); + +err_vk_free_free_list: + vk_free(&device->vk.alloc, free_list); + + return result; +} + +void pvr_free_list_destroy(struct pvr_free_list *free_list) +{ + struct pvr_device *device = free_list->device; + + device->ws->ops->free_list_destroy(free_list->ws_free_list); + pvr_bo_free(device, free_list->bo); + vk_free(&device->vk.alloc, free_list); +} + +static inline void pvr_get_samples_in_xy(uint32_t samples, + uint32_t *const x_out, + uint32_t *const y_out) +{ + switch (samples) { + case 1: + *x_out = 1; + *y_out = 1; + break; + case 2: + *x_out = 1; + *y_out = 2; + break; + case 4: + *x_out = 2; + *y_out = 2; + break; + case 8: + *x_out = 2; + *y_out = 4; + break; + default: + unreachable("Unsupported number of samples"); + } +} + +static void pvr_rt_mtile_info_init(struct pvr_device *device, + struct pvr_rt_mtile_info *info, + uint32_t width, + uint32_t height, + uint32_t samples) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + uint32_t samples_in_x; + uint32_t samples_in_y; + + pvr_get_samples_in_xy(samples, &samples_in_x, &samples_in_y); + + info->tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 1); + info->tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 1); + + info->num_tiles_x = DIV_ROUND_UP(width, info->tile_size_x); + info->num_tiles_y = DIV_ROUND_UP(height, info->tile_size_y); + + rogue_get_num_macrotiles_xy(dev_info, &info->mtiles_x, &info->mtiles_y); + + if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) { + assert(PVR_GET_FEATURE_VALUE(dev_info, + simple_parameter_format_version, + 0) == 2); + /* Set up 16 macrotiles with a multiple of 2x2 tiles per macrotile, + * which is aligned to a tile group. + */ + info->mtile_x1 = DIV_ROUND_UP(info->num_tiles_x, 8) * 2; + info->mtile_y1 = DIV_ROUND_UP(info->num_tiles_y, 8) * 2; + info->mtile_x2 = 0; + info->mtile_y2 = 0; + info->mtile_x3 = 0; + info->mtile_y3 = 0; + info->x_tile_max = ALIGN_POT(info->num_tiles_x, 2) - 1; + info->y_tile_max = ALIGN_POT(info->num_tiles_y, 2) - 1; + } else { + /* Set up 16 macrotiles with a multiple of 4x4 tiles per macrotile. */ + info->mtile_x1 = ALIGN_POT(DIV_ROUND_UP(info->num_tiles_x, 4), 4); + info->mtile_y1 = ALIGN_POT(DIV_ROUND_UP(info->num_tiles_y, 4), 4); + info->mtile_x2 = info->mtile_x1 * 2; + info->mtile_y2 = info->mtile_y1 * 2; + info->mtile_x3 = info->mtile_x1 * 3; + info->mtile_y3 = info->mtile_y1 * 3; + info->x_tile_max = info->num_tiles_x - 1; + info->y_tile_max = info->num_tiles_y - 1; + } + + info->tiles_per_mtile_x = info->mtile_x1 * samples_in_x; + info->tiles_per_mtile_y = info->mtile_y1 * samples_in_y; + + info->mtile_stride = info->mtile_x1 * info->mtile_y1; +} + +/* Note that the unit of the return value depends on the GPU. For cores with the + * simple_internal_parameter_format feature the returned size is interpreted as + * the number of region headers. For cores without this feature its interpreted + * as the size in dwords. + */ +static uint64_t +pvr_rt_get_isp_region_size(struct pvr_device *device, + const struct pvr_rt_mtile_info *mtile_info) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + uint64_t rgn_size = + mtile_info->tiles_per_mtile_x * mtile_info->tiles_per_mtile_y; + + if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) { + uint32_t version; + + rgn_size *= mtile_info->mtiles_x * mtile_info->mtiles_y; + + if (PVR_FEATURE_VALUE(dev_info, + simple_parameter_format_version, + &version)) { + version = 0; + } + + if (version == 2) { + /* One region header per 2x2 tile group. */ + rgn_size /= (2U * 2U); + } + } else { + const uint64_t rgn_header_size = rogue_get_region_header_size(dev_info); + + /* Round up to next dword to prevent IPF overrun and convert to bytes. + */ + rgn_size = DIV_ROUND_UP(rgn_size * rgn_header_size, 4); + } + + return rgn_size; +} + +static VkResult pvr_rt_vheap_rtc_data_init(struct pvr_device *device, + struct pvr_rt_dataset *rt_dataset, + uint32_t layers) +{ + const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED | + PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC; + uint64_t vheap_size; + uint32_t alignment; + uint64_t rtc_size; + VkResult result; + + vheap_size = ROGUE_CR_PM_VHEAP_TABLE_SIZE * ROGUE_PM_VHEAP_ENTRY_SIZE; + + if (layers > 1) { + uint64_t rtc_entries; + + vheap_size = ALIGN_POT(vheap_size, PVRX(CR_TA_RTC_ADDR_BASE_ALIGNMENT)); + + rtc_entries = ROGUE_NUM_TEAC + ROGUE_NUM_TE + ROGUE_NUM_VCE; + if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 48545)) + rtc_entries += ROGUE_NUM_TE; + + rtc_size = rtc_entries * ROGUE_RTC_SIZE_IN_BYTES; + } else { + rtc_size = 0; + } + + alignment = MAX2(PVRX(CR_PM_VHEAP_TABLE_BASE_ADDR_ALIGNMENT), + PVRX(CR_TA_RTC_ADDR_BASE_ALIGNMENT)); + + result = pvr_bo_alloc(device, + device->heaps.general_heap, + vheap_size + rtc_size, + alignment, + bo_flags, + &rt_dataset->vheap_rtc_bo); + if (result != VK_SUCCESS) + return result; + + rt_dataset->vheap_dev_addr = rt_dataset->vheap_rtc_bo->vma->dev_addr; + + if (rtc_size > 0) { + rt_dataset->rtc_dev_addr.addr = + rt_dataset->vheap_dev_addr.addr + vheap_size; + } else { + rt_dataset->rtc_dev_addr = PVR_DEV_ADDR_INVALID; + } + + return VK_SUCCESS; +} + +static void pvr_rt_vheap_rtc_data_fini(struct pvr_rt_dataset *rt_dataset) +{ + rt_dataset->rtc_dev_addr = PVR_DEV_ADDR_INVALID; + + pvr_bo_free(rt_dataset->device, rt_dataset->vheap_rtc_bo); + rt_dataset->vheap_rtc_bo = NULL; +} + +static void +pvr_rt_get_tail_ptr_stride_size(const struct pvr_device *device, + const struct pvr_rt_mtile_info *mtile_info, + uint32_t layers, + uint64_t *const stride_out, + uint64_t *const size_out) +{ + uint32_t max_num_mtiles; + uint32_t num_mtiles_x; + uint32_t num_mtiles_y; + uint32_t version; + uint64_t size; + + num_mtiles_x = mtile_info->mtiles_x * mtile_info->tiles_per_mtile_x; + num_mtiles_y = mtile_info->mtiles_y * mtile_info->tiles_per_mtile_y; + + max_num_mtiles = MAX2(util_next_power_of_two64(num_mtiles_x), + util_next_power_of_two64(num_mtiles_y)); + + size = max_num_mtiles * max_num_mtiles; + + if (PVR_FEATURE_VALUE(&device->pdevice->dev_info, + simple_parameter_format_version, + &version)) { + version = 0; + } + + if (version == 2) { + /* One tail pointer cache entry per 2x2 tile group. */ + size /= (2U * 2U); + } + + size *= ROGUE_TAIL_POINTER_SIZE; + + if (layers > 1) { + size = ALIGN_POT(size, ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE); + + *stride_out = size / ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE; + *size_out = size * layers; + } else { + *stride_out = 0; + *size_out = size; + } +} + +static VkResult pvr_rt_tpc_data_init(struct pvr_device *device, + struct pvr_rt_dataset *rt_dataset, + const struct pvr_rt_mtile_info *mtile_info, + uint32_t layers) +{ + const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED | + PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC; + uint64_t tpc_size; + + pvr_rt_get_tail_ptr_stride_size(device, + mtile_info, + layers, + &rt_dataset->tpc_stride, + &rt_dataset->tpc_size); + tpc_size = ALIGN_POT(rt_dataset->tpc_size, ROGUE_TE_TPC_CACHE_LINE_SIZE); + + return pvr_bo_alloc(device, + device->heaps.general_heap, + tpc_size, + PVRX(CR_TE_TPC_ADDR_BASE_ALIGNMENT), + bo_flags, + &rt_dataset->tpc_bo); +} + +static void pvr_rt_tpc_data_fini(struct pvr_rt_dataset *rt_dataset) +{ + pvr_bo_free(rt_dataset->device, rt_dataset->tpc_bo); + rt_dataset->tpc_bo = NULL; +} + +static uint32_t +pvr_rt_get_mlist_size(const struct pvr_free_list *global_free_list, + const struct pvr_free_list *local_free_list) +{ + uint32_t num_pte_pages; + uint32_t num_pde_pages; + uint32_t num_pce_pages; + uint64_t total_pages; + uint32_t mlist_size; + + assert(global_free_list->size + local_free_list->size <= + ROGUE_PM_MAX_PB_VIRT_ADDR_SPACE); + + total_pages = (global_free_list->size + local_free_list->size) >> + ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT; + + /* Calculate the total number of physical pages required to hold the page + * table, directory and catalog entries for the freelist pages. + */ + num_pte_pages = DIV_ROUND_UP(total_pages, ROGUE_NUM_PT_ENTRIES_PER_PAGE); + num_pde_pages = DIV_ROUND_UP(num_pte_pages, ROGUE_NUM_PD_ENTRIES_PER_PAGE); + num_pce_pages = DIV_ROUND_UP(num_pde_pages, ROGUE_NUM_PC_ENTRIES_PER_PAGE); + + /* Calculate the MList size considering the total number of pages in the PB + * are shared among all the PM address spaces. + */ + mlist_size = (num_pce_pages + num_pde_pages + num_pte_pages) * + ROGUE_NUM_PM_ADDRESS_SPACES * ROGUE_MLIST_ENTRY_STRIDE; + + return ALIGN_POT(mlist_size, ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE); +} + +static void pvr_rt_get_region_headers_stride_size( + const struct pvr_device *device, + const struct pvr_rt_mtile_info *mtile_info, + uint32_t layers, + uint64_t *const stride_out, + uint64_t *const size_out) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + const uint32_t rgn_header_size = rogue_get_region_header_size(dev_info); + uint32_t rgn_headers_size; + uint32_t num_tiles_x; + uint32_t num_tiles_y; + uint32_t group_size; + uint32_t version; + + if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version)) + version = 0; + + group_size = version == 2 ? 2 : 1; + + num_tiles_x = mtile_info->mtiles_x * mtile_info->tiles_per_mtile_x; + num_tiles_y = mtile_info->mtiles_y * mtile_info->tiles_per_mtile_y; + + rgn_headers_size = + (num_tiles_x / group_size) * (num_tiles_y / group_size) * rgn_header_size; + + if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) { + rgn_headers_size = + ALIGN_POT(rgn_headers_size, PVRX(CR_TE_PSGREGION_ADDR_BASE_ALIGNMENT)); + } + + if (layers > 1) { + rgn_headers_size = + ALIGN_POT(rgn_headers_size, PVRX(CR_TE_PSG_REGION_STRIDE_UNIT_SIZE)); + } + + *stride_out = rgn_header_size; + *size_out = rgn_headers_size * layers; +} + +static VkResult +pvr_rt_mta_mlist_data_init(struct pvr_device *device, + struct pvr_rt_dataset *rt_dataset, + const struct pvr_free_list *global_free_list, + const struct pvr_free_list *local_free_list, + const struct pvr_rt_mtile_info *mtile_info) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + const uint32_t mlist_size = + pvr_rt_get_mlist_size(global_free_list, local_free_list); + uint32_t mta_size = rogue_get_macrotile_array_size(dev_info); + const uint32_t num_rt_datas = ARRAY_SIZE(rt_dataset->rt_datas); + uint32_t rt_datas_mlist_size; + uint32_t rt_datas_mta_size; + pvr_dev_addr_t dev_addr; + VkResult result; + + /* Allocate memory for macrotile array and Mlist for all RT datas. + * + * Allocation layout: MTA[0..N] + Mlist alignment padding + Mlist[0..N]. + * + * N is number of RT datas. + */ + rt_datas_mta_size = ALIGN_POT(mta_size * num_rt_datas, + PVRX(CR_PM_MLIST0_BASE_ADDR_ALIGNMENT)); + rt_datas_mlist_size = mlist_size * num_rt_datas; + + result = pvr_bo_alloc(device, + device->heaps.general_heap, + rt_datas_mta_size + rt_datas_mlist_size, + PVRX(CR_PM_MTILE_ARRAY_BASE_ADDR_ALIGNMENT), + PVR_BO_ALLOC_FLAG_GPU_UNCACHED, + &rt_dataset->mta_mlist_bo); + if (result != VK_SUCCESS) + return result; + + dev_addr = rt_dataset->mta_mlist_bo->vma->dev_addr; + + for (uint32_t i = 0; i < num_rt_datas; i++) { + if (mta_size != 0) { + rt_dataset->rt_datas[i].mta_dev_addr = dev_addr; + dev_addr.addr += mta_size; + } else { + rt_dataset->rt_datas[i].mta_dev_addr = PVR_DEV_ADDR_INVALID; + } + } + + dev_addr.addr = + rt_dataset->mta_mlist_bo->vma->dev_addr.addr + rt_datas_mta_size; + + for (uint32_t i = 0; i < num_rt_datas; i++) { + if (mlist_size != 0) { + rt_dataset->rt_datas[i].mlist_dev_addr = dev_addr; + dev_addr.addr += mlist_size; + } else { + rt_dataset->rt_datas[i].mlist_dev_addr = PVR_DEV_ADDR_INVALID; + } + } + + return VK_SUCCESS; +} + +static void pvr_rt_mta_mlist_data_fini(struct pvr_rt_dataset *rt_dataset) +{ + for (uint32_t i = 0; i < ARRAY_SIZE(rt_dataset->rt_datas); i++) { + rt_dataset->rt_datas[i].mlist_dev_addr = PVR_DEV_ADDR_INVALID; + rt_dataset->rt_datas[i].mta_dev_addr = PVR_DEV_ADDR_INVALID; + } + + pvr_bo_free(rt_dataset->device, rt_dataset->mta_mlist_bo); + rt_dataset->mta_mlist_bo = NULL; +} + +static VkResult +pvr_rt_rgn_headers_data_init(struct pvr_device *device, + struct pvr_rt_dataset *rt_dataset, + const struct pvr_rt_mtile_info *mtile_info, + uint32_t layers) +{ + const uint32_t num_rt_datas = ARRAY_SIZE(rt_dataset->rt_datas); + uint64_t rgn_headers_size; + pvr_dev_addr_t dev_addr; + VkResult result; + + pvr_rt_get_region_headers_stride_size(device, + mtile_info, + layers, + &rt_dataset->rgn_headers_stride, + &rgn_headers_size); + + result = pvr_bo_alloc(device, + device->heaps.rgn_hdr_heap, + rgn_headers_size * num_rt_datas, + PVRX(CR_TE_PSGREGION_ADDR_BASE_ALIGNMENT), + PVR_BO_ALLOC_FLAG_GPU_UNCACHED, + &rt_dataset->rgn_headers_bo); + if (result != VK_SUCCESS) + return result; + + dev_addr = rt_dataset->rgn_headers_bo->vma->dev_addr; + + for (uint32_t i = 0; i < num_rt_datas; i++) { + rt_dataset->rt_datas[i].rgn_headers_dev_addr = dev_addr; + dev_addr.addr += rgn_headers_size; + } + + return VK_SUCCESS; +} + +static void pvr_rt_rgn_headers_data_fini(struct pvr_rt_dataset *rt_dataset) +{ + for (uint32_t i = 0; i < ARRAY_SIZE(rt_dataset->rt_datas); i++) + rt_dataset->rt_datas[i].rgn_headers_dev_addr = PVR_DEV_ADDR_INVALID; + + pvr_bo_free(rt_dataset->device, rt_dataset->rgn_headers_bo); + rt_dataset->rgn_headers_bo = NULL; +} + +static VkResult pvr_rt_datas_init(struct pvr_device *device, + struct pvr_rt_dataset *rt_dataset, + const struct pvr_free_list *global_free_list, + const struct pvr_free_list *local_free_list, + const struct pvr_rt_mtile_info *mtile_info, + uint32_t layers) +{ + VkResult result; + + result = pvr_rt_mta_mlist_data_init(device, + rt_dataset, + global_free_list, + local_free_list, + mtile_info); + if (result != VK_SUCCESS) + return result; + + result = + pvr_rt_rgn_headers_data_init(device, rt_dataset, mtile_info, layers); + if (result != VK_SUCCESS) + goto err_pvr_rt_mta_mlist_data_fini; + + return VK_SUCCESS; + +err_pvr_rt_mta_mlist_data_fini: + pvr_rt_mta_mlist_data_fini(rt_dataset); + + return VK_SUCCESS; +} + +static void pvr_rt_datas_fini(struct pvr_rt_dataset *rt_dataset) +{ + pvr_rt_rgn_headers_data_fini(rt_dataset); + pvr_rt_mta_mlist_data_fini(rt_dataset); +} + +static uint32_t +pvr_rogue_get_cr_isp_mtile_size_val(const struct pvr_device_info *dev_info, + uint32_t samples, + const struct pvr_rt_mtile_info *mtile_info) +{ + uint32_t samples_per_pixel = + PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0); + uint32_t isp_mtile_size; + + pvr_csb_pack (&isp_mtile_size, CR_ISP_MTILE_SIZE, value) { + value.x = mtile_info->mtile_x1; + value.y = mtile_info->mtile_y1; + + if (samples_per_pixel == 1) { + if (samples >= 4) + value.x <<= 1; + + if (samples >= 2) + value.y <<= 1; + } else if (samples_per_pixel == 2) { + if (samples >= 8) + value.x <<= 1; + + if (samples >= 4) + value.y <<= 1; + } else if (samples_per_pixel == 4) { + if (samples >= 8) + value.y <<= 1; + } else { + assert(!"Unsupported ISP samples per pixel value"); + } + } + + return isp_mtile_size; +} + +static uint64_t pvr_rogue_get_cr_multisamplectl_val(uint32_t samples, + bool y_flip) +{ + static const struct { + uint8_t x[8]; + uint8_t y[8]; + } sample_positions[4] = { + /* 1 sample */ + { + .x = { 8 }, + .y = { 8 }, + }, + /* 2 samples */ + { + .x = { 12, 4 }, + .y = { 12, 4 }, + }, + /* 4 samples */ + { + .x = { 6, 14, 2, 10 }, + .y = { 2, 6, 10, 14 }, + }, + /* 8 samples */ + { + .x = { 9, 7, 13, 5, 3, 1, 11, 15 }, + .y = { 5, 11, 9, 3, 13, 7, 15, 1 }, + }, + }; + uint64_t multisamplectl; + uint8_t idx; + + idx = util_fast_log2(samples); + assert(idx < ARRAY_SIZE(sample_positions)); + + pvr_csb_pack (&multisamplectl, CR_PPP_MULTISAMPLECTL, value) { + switch (samples) { + case 8: + value.msaa_x7 = sample_positions[idx].x[7]; + value.msaa_x6 = sample_positions[idx].x[6]; + value.msaa_x5 = sample_positions[idx].x[5]; + value.msaa_x4 = sample_positions[idx].x[4]; + + if (y_flip) { + value.msaa_y7 = 16U - sample_positions[idx].y[7]; + value.msaa_y6 = 16U - sample_positions[idx].y[6]; + value.msaa_y5 = 16U - sample_positions[idx].y[5]; + value.msaa_y4 = 16U - sample_positions[idx].y[4]; + } else { + value.msaa_y7 = sample_positions[idx].y[7]; + value.msaa_y6 = sample_positions[idx].y[6]; + value.msaa_y5 = sample_positions[idx].y[5]; + value.msaa_y4 = sample_positions[idx].y[4]; + } + + FALLTHROUGH; + case 4: + value.msaa_x3 = sample_positions[idx].x[3]; + value.msaa_x2 = sample_positions[idx].x[2]; + + if (y_flip) { + value.msaa_y3 = 16U - sample_positions[idx].y[3]; + value.msaa_y2 = 16U - sample_positions[idx].y[2]; + } else { + value.msaa_y3 = sample_positions[idx].y[3]; + value.msaa_y2 = sample_positions[idx].y[2]; + } + + FALLTHROUGH; + case 2: + value.msaa_x1 = sample_positions[idx].x[1]; + + if (y_flip) { + value.msaa_y1 = 16U - sample_positions[idx].y[1]; + } else { + value.msaa_y1 = sample_positions[idx].y[1]; + } + + FALLTHROUGH; + case 1: + value.msaa_x0 = sample_positions[idx].x[0]; + + if (y_flip) { + value.msaa_y0 = 16U - sample_positions[idx].y[0]; + } else { + value.msaa_y0 = sample_positions[idx].y[0]; + } + + break; + default: + unreachable("Unsupported number of samples"); + } + } + + return multisamplectl; +} + +static uint32_t +pvr_rogue_get_cr_te_aa_val(const struct pvr_device_info *dev_info, + uint32_t samples) +{ + uint32_t samples_per_pixel = + PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0); + uint32_t te_aa; + + pvr_csb_pack (&te_aa, CR_TE_AA, value) { + if (samples_per_pixel == 1) { + if (samples >= 2) + value.y = true; + if (samples >= 4) + value.x = true; + } else if (samples_per_pixel == 2) { + if (samples >= 2) + value.x2 = true; + if (samples >= 4) + value.y = true; + if (samples >= 8) + value.x = true; + } else if (samples_per_pixel == 4) { + if (samples >= 2) + value.x2 = true; + if (samples >= 4) + value.y2 = true; + if (samples >= 8) + value.y = true; + } else { + assert(!"Unsupported ISP samples per pixel value"); + } + } + + return te_aa; +} + +static void pvr_rt_dataset_ws_create_info_init( + struct pvr_rt_dataset *rt_dataset, + const struct pvr_rt_mtile_info *mtile_info, + struct pvr_winsys_rt_dataset_create_info *create_info) +{ + struct pvr_device *device = rt_dataset->device; + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + + memset(create_info, 0, sizeof(*create_info)); + + /* Local freelist. */ + create_info->local_free_list = rt_dataset->local_free_list->ws_free_list; + + /* ISP register values. */ + if (PVR_HAS_ERN(dev_info, 42307) && + !(PVR_HAS_FEATURE(dev_info, roguexe) && mtile_info->tile_size_x == 16)) { + float value; + + if (rt_dataset->width != 0) { + value = + ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR / (float)rt_dataset->width; + create_info->isp_merge_lower_x = fui(value); + + value = + ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR / (float)rt_dataset->width; + create_info->isp_merge_upper_x = fui(value); + } + + if (rt_dataset->height != 0) { + value = + ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR / (float)rt_dataset->height; + create_info->isp_merge_lower_y = fui(value); + + value = + ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR / (float)rt_dataset->height; + create_info->isp_merge_upper_y = fui(value); + } + + value = ((float)rt_dataset->width * ROGUE_ISP_MERGE_SCALE_FACTOR) / + (ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR - + ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR); + create_info->isp_merge_scale_x = fui(value); + + value = ((float)rt_dataset->height * ROGUE_ISP_MERGE_SCALE_FACTOR) / + (ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR - + ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR); + create_info->isp_merge_scale_y = fui(value); + } + + create_info->isp_mtile_size = + pvr_rogue_get_cr_isp_mtile_size_val(dev_info, + rt_dataset->samples, + mtile_info); + + /* PPP register values. */ + create_info->ppp_multi_sample_ctl = + pvr_rogue_get_cr_multisamplectl_val(rt_dataset->samples, false); + create_info->ppp_multi_sample_ctl_y_flipped = + pvr_rogue_get_cr_multisamplectl_val(rt_dataset->samples, true); + + pvr_csb_pack (&create_info->ppp_screen, CR_PPP_SCREEN, value) { + value.pixxmax = rt_dataset->width - 1; + value.pixymax = rt_dataset->height - 1; + } + + /* TE register values. */ + create_info->te_aa = + pvr_rogue_get_cr_te_aa_val(dev_info, rt_dataset->samples); + + pvr_csb_pack (&create_info->te_mtile1, CR_TE_MTILE1, value) { + value.x1 = mtile_info->mtile_x1; + if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) { + value.x2 = mtile_info->mtile_x2; + value.x3 = mtile_info->mtile_x3; + } + } + + pvr_csb_pack (&create_info->te_mtile2, CR_TE_MTILE2, value) { + value.y1 = mtile_info->mtile_y1; + if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) { + value.y2 = mtile_info->mtile_y2; + value.y3 = mtile_info->mtile_y3; + } + } + + pvr_csb_pack (&create_info->te_screen, CR_TE_SCREEN, value) { + value.xmax = mtile_info->x_tile_max; + value.ymax = mtile_info->y_tile_max; + } + + /* Allocations and associated information. */ + create_info->vheap_table_dev_addr = rt_dataset->vheap_dev_addr; + create_info->rtc_dev_addr = rt_dataset->rtc_dev_addr; + + create_info->tpc_dev_addr = rt_dataset->tpc_bo->vma->dev_addr; + create_info->tpc_stride = rt_dataset->tpc_stride; + create_info->tpc_size = rt_dataset->tpc_size; + + STATIC_ASSERT(ARRAY_SIZE(create_info->rt_datas) == + ARRAY_SIZE(rt_dataset->rt_datas)); + for (uint32_t i = 0; i < ARRAY_SIZE(create_info->rt_datas); i++) { + create_info->rt_datas[i].pm_mlist_dev_addr = + rt_dataset->rt_datas[i].mlist_dev_addr; + create_info->rt_datas[i].macrotile_array_dev_addr = + rt_dataset->rt_datas[i].mta_dev_addr; + create_info->rt_datas[i].rgn_header_dev_addr = + rt_dataset->rt_datas[i].rgn_headers_dev_addr; + } + + create_info->rgn_header_size = + pvr_rt_get_isp_region_size(device, mtile_info); + + /* Miscellaneous. */ + create_info->mtile_stride = mtile_info->mtile_stride; + create_info->max_rts = rt_dataset->layers; +} + +VkResult +pvr_render_target_dataset_create(struct pvr_device *device, + uint32_t width, + uint32_t height, + uint32_t samples, + uint32_t layers, + struct pvr_rt_dataset **const rt_dataset_out) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + struct pvr_winsys_rt_dataset_create_info rt_dataset_create_info; + struct pvr_rt_mtile_info mtile_info; + struct pvr_rt_dataset *rt_dataset; + VkResult result; + + assert(device->global_free_list); + assert(width <= rogue_get_render_size_max_x(dev_info)); + assert(height <= rogue_get_render_size_max_y(dev_info)); + assert(layers > 0 && layers <= PVR_MAX_FRAMEBUFFER_LAYERS); + + pvr_rt_mtile_info_init(device, &mtile_info, width, height, samples); + + rt_dataset = vk_zalloc(&device->vk.alloc, + sizeof(*rt_dataset), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!rt_dataset) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + rt_dataset->device = device; + rt_dataset->width = width; + rt_dataset->height = height; + rt_dataset->samples = samples; + rt_dataset->layers = layers; + rt_dataset->global_free_list = device->global_free_list; + + /* The maximum supported free list size is based on the assumption that this + * freelist (the "local" freelist) is always the minimum size required by + * the hardware. See the documentation of ROGUE_FREE_LIST_MAX_SIZE for more + * details. + */ + result = pvr_free_list_create(device, + rogue_get_min_free_list_size(dev_info), + rogue_get_min_free_list_size(dev_info), + 0 /* grow_size */, + 0 /* grow_threshold */, + rt_dataset->global_free_list, + &rt_dataset->local_free_list); + if (result != VK_SUCCESS) + goto err_vk_free_rt_dataset; + + result = pvr_rt_vheap_rtc_data_init(device, rt_dataset, layers); + if (result != VK_SUCCESS) + goto err_pvr_free_list_destroy; + + result = pvr_rt_tpc_data_init(device, rt_dataset, &mtile_info, layers); + if (result != VK_SUCCESS) + goto err_pvr_rt_vheap_rtc_data_fini; + + result = pvr_rt_datas_init(device, + rt_dataset, + rt_dataset->global_free_list, + rt_dataset->local_free_list, + &mtile_info, + layers); + if (result != VK_SUCCESS) + goto err_pvr_rt_tpc_data_fini; + + /* rt_dataset must be fully initialized by this point since + * pvr_rt_dataset_ws_create_info_init() depends on this. + */ + pvr_rt_dataset_ws_create_info_init(rt_dataset, + &mtile_info, + &rt_dataset_create_info); + + result = + device->ws->ops->render_target_dataset_create(device->ws, + &rt_dataset_create_info, + &rt_dataset->ws_rt_dataset); + if (result != VK_SUCCESS) + goto err_pvr_rt_datas_fini; + + *rt_dataset_out = rt_dataset; + + return VK_SUCCESS; + +err_pvr_rt_datas_fini: + pvr_rt_datas_fini(rt_dataset); + +err_pvr_rt_tpc_data_fini: + pvr_rt_tpc_data_fini(rt_dataset); + +err_pvr_rt_vheap_rtc_data_fini: + pvr_rt_vheap_rtc_data_fini(rt_dataset); + +err_pvr_free_list_destroy: + pvr_free_list_destroy(rt_dataset->local_free_list); + +err_vk_free_rt_dataset: + vk_free(&device->vk.alloc, rt_dataset); + + return result; +} + +void pvr_render_target_dataset_destroy(struct pvr_rt_dataset *rt_dataset) +{ + struct pvr_device *device = rt_dataset->device; + + device->ws->ops->render_target_dataset_destroy(rt_dataset->ws_rt_dataset); + + pvr_rt_datas_fini(rt_dataset); + pvr_rt_tpc_data_fini(rt_dataset); + pvr_rt_vheap_rtc_data_fini(rt_dataset); + + pvr_free_list_destroy(rt_dataset->local_free_list); + + vk_free(&device->vk.alloc, rt_dataset); +} + +static void +pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx, + struct pvr_render_job *job, + struct pvr_winsys_geometry_state *state) +{ + const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info; + + /* FIXME: Should this just be done unconditionally? The firmware will just + * ignore the value anyway. + */ + if (PVR_HAS_QUIRK(dev_info, 56279)) { + pvr_csb_pack (&state->regs.pds_ctrl, CR_PDS_CTRL, value) { + value.max_num_vdm_tasks = rogue_get_max_num_vdm_pds_tasks(dev_info); + } + } else { + state->regs.pds_ctrl = 0; + } + + pvr_csb_pack (&state->regs.ppp_ctrl, CR_PPP_CTRL, value) { + value.wclampen = true; + value.fixed_point_format = 1; + } + + pvr_csb_pack (&state->regs.te_psg, CR_TE_PSG, value) { + value.completeonterminate = job->geometry_terminate; + + value.region_stride = job->rt_dataset->rgn_headers_stride / + PVRX(CR_TE_PSG_REGION_STRIDE_UNIT_SIZE); + + value.forcenewstate = PVR_HAS_QUIRK(dev_info, 52942); + } + + /* The set up of CR_TPU must be identical to + * pvr_render_job_ws_fragment_state_init(). + */ + pvr_csb_pack (&state->regs.tpu, CR_TPU, value) { + value.tag_cem_4k_face_packing = true; + } + + pvr_csb_pack (&state->regs.tpu_border_colour_table, + CR_TPU_BORDER_COLOUR_TABLE_VDM, + value) { + value.border_colour_table_address = job->border_colour_table_addr; + } + + pvr_csb_pack (&state->regs.vdm_ctrl_stream_base, + CR_VDM_CTRL_STREAM_BASE, + value) { + value.addr = job->ctrl_stream_addr; + } + + /* Set up the USC common size for the context switch resume/load program + * (ctx->ctx_switch.programs[i].sr->pds_load_program), which was created + * as part of the render context. + */ + pvr_csb_pack (&state->regs.vdm_ctx_resume_task0_size, + VDMCTRL_PDS_STATE0, + value) { + /* Calculate the size in bytes. */ + const uint16_t shared_registers_size = job->max_shared_registers * 4; + + value.usc_common_size = + DIV_ROUND_UP(shared_registers_size, + PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE)); + }; + + state->flags = 0; + + if (!job->rt_dataset->need_frag) + state->flags |= PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY; + + if (job->geometry_terminate) + state->flags |= PVR_WINSYS_GEOM_FLAG_LAST_GEOMETRY; + + if (job->frag_uses_atomic_ops) + state->flags |= PVR_WINSYS_GEOM_FLAG_SINGLE_CORE; +} + +static inline void +pvr_get_isp_num_tiles_xy(const struct pvr_device_info *dev_info, + uint32_t samples, + uint32_t width, + uint32_t height, + uint32_t *const x_out, + uint32_t *const y_out) +{ + uint32_t tile_samples_x; + uint32_t tile_samples_y; + uint32_t scale_x; + uint32_t scale_y; + + rogue_get_isp_samples_per_tile_xy(dev_info, + samples, + &tile_samples_x, + &tile_samples_y); + + switch (samples) { + case 1: + scale_x = 1; + scale_y = 1; + break; + case 2: + scale_x = 1; + scale_y = 2; + break; + case 4: + scale_x = 2; + scale_y = 2; + break; + case 8: + scale_x = 2; + scale_y = 4; + break; + default: + unreachable("Unsupported number of samples"); + } + + *x_out = DIV_ROUND_UP(width * scale_x, tile_samples_x); + *y_out = DIV_ROUND_UP(height * scale_y, tile_samples_y); + + if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) { + assert(PVR_GET_FEATURE_VALUE(dev_info, + simple_parameter_format_version, + 0U) == 2U); + /* Align to a 2x2 tile block. */ + *x_out = ALIGN_POT(*x_out, 2); + *y_out = ALIGN_POT(*y_out, 2); + } +} + +static void +pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx, + struct pvr_render_job *job, + struct pvr_winsys_fragment_state *state) +{ + const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info; + enum PVRX(CR_ISP_AA_MODE_TYPE) isp_aa_mode; + uint32_t isp_ctl; + + /* FIXME: what to do when job->run_frag is false? */ + + switch (job->samples) { + case 1: + isp_aa_mode = PVRX(CR_ISP_AA_MODE_TYPE_AA_NONE); + break; + case 2: + isp_aa_mode = PVRX(CR_ISP_AA_MODE_TYPE_AA_2X); + break; + case 3: + isp_aa_mode = PVRX(CR_ISP_AA_MODE_TYPE_AA_4X); + break; + case 8: + isp_aa_mode = PVRX(CR_ISP_AA_MODE_TYPE_AA_8X); + break; + default: + unreachable("Unsupported number of samples"); + } + + /* FIXME: pass in the number of samples rather than isp_aa_mode? */ + pvr_setup_tiles_in_flight(dev_info, + isp_aa_mode, + job->pixel_output_width, + false, + job->max_tiles_in_flight, + &isp_ctl, + &state->regs.usc_pixel_output_ctrl); + + pvr_csb_pack (&state->regs.isp_ctl, CR_ISP_CTL, value) { + value.sample_pos = true; + + /* FIXME: There are a number of things that cause this to be set, this + * is just one of them. + */ + value.process_empty_tiles = job->process_empty_tiles; + } + + /* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be + * possible to fully pack CR_ISP_CTL above rather than having to OR in part + * of the value. + */ + state->regs.isp_ctl |= isp_ctl; + + pvr_csb_pack (&state->regs.isp_aa, CR_ISP_AA, value) { + value.mode = isp_aa_mode; + } + + /* The set up of CR_TPU must be identical to + * pvr_render_job_ws_geometry_state_init(). + */ + pvr_csb_pack (&state->regs.tpu, CR_TPU, value) { + value.tag_cem_4k_face_packing = true; + } + + if (PVR_HAS_FEATURE(dev_info, cluster_grouping) && + PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) && + rogue_get_num_phantoms(dev_info) > 1 && job->frag_uses_atomic_ops) { + /* Each phantom has its own MCU, so atomicity can only be guaranteed + * when all work items are processed on the same phantom. This means we + * need to disable all USCs other than those of the first phantom, which + * has 4 clusters. Note that we only need to do this for atomic + * operations in fragment shaders, since hardware prevents the TA to run + * on more than one phantom anyway. + */ + state->regs.pixel_phantom = 0xF; + } else { + state->regs.pixel_phantom = 0; + } + + pvr_csb_pack (&state->regs.isp_bgobjvals, CR_ISP_BGOBJVALS, value) { + value.enablebgtag = job->enable_bg_tag; + + value.mask = true; + + /* FIXME: Hard code this for now as we don't currently support any + * stencil image formats. + */ + value.stencil = 0xFF; + } + + pvr_csb_pack (&state->regs.isp_bgobjdepth, CR_ISP_BGOBJDEPTH, value) { + /* FIXME: This is suitable for the single depth format the driver + * currently supports, but may need updating to handle other depth + * formats. + */ + value.value = fui(job->depth_clear_value); + } + + /* FIXME: Some additional set up needed to support depth and stencil + * load/store operations. + */ + pvr_csb_pack (&state->regs.isp_zlsctl, CR_ISP_ZLSCTL, value) { + uint32_t aligned_width = + ALIGN_POT(job->depth_physical_width, ROGUE_IPF_TILE_SIZE_PIXELS); + uint32_t aligned_height = + ALIGN_POT(job->depth_physical_height, ROGUE_IPF_TILE_SIZE_PIXELS); + + pvr_get_isp_num_tiles_xy(dev_info, + job->samples, + aligned_width, + aligned_height, + &value.zlsextent_x_z, + &value.zlsextent_y_z); + value.zlsextent_x_z -= 1; + value.zlsextent_y_z -= 1; + + if (job->depth_memlayout == PVR_MEMLAYOUT_TWIDDLED) { + value.loadtwiddled = true; + value.storetwiddled = true; + } + + /* FIXME: This is suitable for the single depth format the driver + * currently supports, but may need updating to handle other depth + * formats. + */ + assert(job->depth_vk_format == VK_FORMAT_D32_SFLOAT); + value.zloadformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z); + value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z); + } + + if (PVR_HAS_FEATURE(dev_info, zls_subtile)) { + pvr_csb_pack (&state->regs.isp_zls_pixels, CR_ISP_ZLS_PIXELS, value) { + value.x = job->depth_stride - 1; + value.y = job->depth_height - 1; + } + } else { + state->regs.isp_zls_pixels = 0; + } + + pvr_csb_pack (&state->regs.isp_zload_store_base, CR_ISP_ZLOAD_BASE, value) { + value.addr = job->depth_addr; + } + + pvr_csb_pack (&state->regs.isp_stencil_load_store_base, + CR_ISP_STENCIL_LOAD_BASE, + value) { + value.addr = job->stencil_addr; + + /* FIXME: May need to set value.enable to true. */ + } + + pvr_csb_pack (&state->regs.tpu_border_colour_table, + CR_TPU_BORDER_COLOUR_TABLE_PDM, + value) { + value.border_colour_table_address = job->border_colour_table_addr; + } + + state->regs.isp_oclqry_base = 0; + + pvr_csb_pack (&state->regs.isp_dbias_base, CR_ISP_DBIAS_BASE, value) { + value.addr = job->depth_bias_table_addr; + } + + pvr_csb_pack (&state->regs.isp_scissor_base, CR_ISP_SCISSOR_BASE, value) { + value.addr = job->scissor_table_addr; + } + + pvr_csb_pack (&state->regs.event_pixel_pds_info, + CR_EVENT_PIXEL_PDS_INFO, + value) { + value.const_size = + DIV_ROUND_UP(ctx->device->pixel_event_data_size_in_dwords, + PVRX(CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE)); + value.temp_stride = 0; + value.usc_sr_size = + DIV_ROUND_UP(PVR_STATE_PBE_DWORDS, + PVRX(CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE)); + } + + pvr_csb_pack (&state->regs.event_pixel_pds_data, + CR_EVENT_PIXEL_PDS_DATA, + value) { + value.addr.addr = job->pds_pixel_event_data_offset; + } + + STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word) == + ARRAY_SIZE(job->pbe_reg_words)); + STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word[0]) == + ARRAY_SIZE(job->pbe_reg_words[0])); + + for (uint32_t i = 0; i < ARRAY_SIZE(job->pbe_reg_words); i++) { + state->regs.pbe_word[i][0] = job->pbe_reg_words[i][0]; + state->regs.pbe_word[i][1] = job->pbe_reg_words[i][1]; + state->regs.pbe_word[i][2] = job->pbe_reg_words[i][2]; + } + + STATIC_ASSERT(__same_type(state->regs.pds_bgnd, job->pds_bgnd_reg_values)); + typed_memcpy(state->regs.pds_bgnd, + job->pds_bgnd_reg_values, + ARRAY_SIZE(state->regs.pds_bgnd)); + + memset(state->regs.pds_pr_bgnd, 0, sizeof(state->regs.pds_pr_bgnd)); + + /* FIXME: Merge geometry and fragment flags into a single flags member? */ + /* FIXME: move to its own function? */ + state->flags = 0; + + if (job->depth_addr.addr) + state->flags |= PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT; + + if (job->stencil_addr.addr) + state->flags |= PVR_WINSYS_FRAG_FLAG_STENCIL_BUFFER_PRESENT; + + if (job->disable_compute_overlap) + state->flags |= PVR_WINSYS_FRAG_FLAG_PREVENT_CDM_OVERLAP; + + if (job->frag_uses_atomic_ops) + state->flags |= PVR_WINSYS_FRAG_FLAG_SINGLE_CORE; + + state->zls_stride = job->depth_layer_size; + state->sls_stride = job->depth_layer_size; +} + +static void pvr_render_job_ws_submit_info_init( + struct pvr_render_ctx *ctx, + struct pvr_render_job *job, + const struct pvr_winsys_job_bo *bos, + uint32_t bo_count, + const VkSemaphore *semaphores, + uint32_t semaphore_count, + uint32_t *stage_flags, + struct pvr_winsys_render_submit_info *submit_info) +{ + memset(submit_info, 0, sizeof(*submit_info)); + + submit_info->rt_dataset = job->rt_dataset->ws_rt_dataset; + submit_info->rt_data_idx = job->rt_dataset->rt_data_idx; + + submit_info->frame_num = ctx->device->global_queue_present_count; + submit_info->job_num = ctx->device->global_queue_job_count; + + submit_info->run_frag = job->run_frag; + + submit_info->bos = bos; + submit_info->bo_count = bo_count; + + submit_info->semaphores = semaphores; + submit_info->semaphore_count = semaphore_count; + submit_info->stage_flags = stage_flags; + + /* FIXME: add WSI image bos. */ + + pvr_render_job_ws_geometry_state_init(ctx, job, &submit_info->geometry); + pvr_render_job_ws_fragment_state_init(ctx, job, &submit_info->fragment); + + /* These values are expected to match. */ + assert(submit_info->geometry.regs.tpu == submit_info->fragment.regs.tpu); +} + +VkResult +pvr_render_job_submit(struct pvr_render_ctx *ctx, + struct pvr_render_job *job, + const struct pvr_winsys_job_bo *bos, + uint32_t bo_count, + const VkSemaphore *semaphores, + uint32_t semaphore_count, + uint32_t *stage_flags, + struct pvr_winsys_syncobj **const syncobj_geom_out, + struct pvr_winsys_syncobj **const syncobj_frag_out) +{ + struct pvr_rt_dataset *rt_dataset = job->rt_dataset; + struct pvr_winsys_render_submit_info submit_info; + struct pvr_device *device = ctx->device; + VkResult result; + + pvr_render_job_ws_submit_info_init(ctx, + job, + bos, + bo_count, + semaphores, + semaphore_count, + stage_flags, + &submit_info); + + result = device->ws->ops->render_submit(ctx->ws_ctx, + &submit_info, + syncobj_geom_out, + syncobj_frag_out); + if (result != VK_SUCCESS) + return result; + + if (job->run_frag) { + /* Move to the next render target data now that a fragment job has been + * successfully submitted. This will allow the next geometry job to be + * submitted to been run in parallel with it. + */ + rt_dataset->rt_data_idx = + (rt_dataset->rt_data_idx + 1) % ARRAY_SIZE(rt_dataset->rt_datas); + + rt_dataset->need_frag = false; + } else { + rt_dataset->need_frag = true; + } + + return VK_SUCCESS; +} diff --git a/src/imagination/vulkan/pvr_job_render.h b/src/imagination/vulkan/pvr_job_render.h new file mode 100644 index 00000000000..58fae423f7b --- /dev/null +++ b/src/imagination/vulkan/pvr_job_render.h @@ -0,0 +1,128 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_JOB_RENDER_H +#define PVR_JOB_RENDER_H + +#include +#include +#include + +#include "hwdef/rogue_hw_defs.h" +#include "pvr_limits.h" +#include "pvr_winsys.h" + +struct pvr_device; +struct pvr_free_list; +struct pvr_render_ctx; +struct pvr_rt_dataset; + +/* FIXME: Turn 'struct pvr_sub_cmd' into 'struct pvr_job' and change 'struct + * pvr_render_job' to subclass it? This is approximately what v3dv does + * (although it doesn't subclass). + */ +struct pvr_render_job { + struct pvr_rt_dataset *rt_dataset; + + bool run_frag; + bool geometry_terminate; + bool frag_uses_atomic_ops; + bool disable_compute_overlap; + bool enable_bg_tag; + bool process_empty_tiles; + + uint32_t pds_pixel_event_data_offset; + + pvr_dev_addr_t ctrl_stream_addr; + + pvr_dev_addr_t border_colour_table_addr; + pvr_dev_addr_t depth_bias_table_addr; + pvr_dev_addr_t scissor_table_addr; + + pvr_dev_addr_t depth_addr; + uint32_t depth_stride; + uint32_t depth_height; + uint32_t depth_physical_width; + uint32_t depth_physical_height; + uint32_t depth_layer_size; + float depth_clear_value; + VkFormat depth_vk_format; + /* FIXME: This should be of type 'enum pvr_memlayout', but this is defined + * in pvr_private.h, which causes a circular include dependency. For now, + * treat it has a uint32_t. A couple of ways to possibly fix this: + * + * 1. Merge the contents of this header file into pvr_private.h. + * 2. Move 'enum pvr_memlayout' into it a new header that can be included + * by both this header and pvr_private.h. + */ + uint32_t depth_memlayout; + + pvr_dev_addr_t stencil_addr; + + uint32_t samples; + + uint32_t pixel_output_width; + + uint8_t max_shared_registers; + + /* Upper limit for tiles in flight, '0' means use default limit based + * on partition store. + */ + uint32_t max_tiles_in_flight; + + uint64_t pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS] + [ROGUE_NUM_PBESTATE_REG_WORDS]; + + uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS]; +}; + +VkResult pvr_free_list_create(struct pvr_device *device, + uint32_t initial_size, + uint32_t max_size, + uint32_t grow_size, + uint32_t grow_threshold, + struct pvr_free_list *parent_free_list, + struct pvr_free_list **const free_list_out); +void pvr_free_list_destroy(struct pvr_free_list *free_list); + +VkResult +pvr_render_target_dataset_create(struct pvr_device *device, + uint32_t width, + uint32_t height, + uint32_t samples, + uint32_t layers, + struct pvr_rt_dataset **const rt_dataset_out); +void pvr_render_target_dataset_destroy(struct pvr_rt_dataset *dataset); + +VkResult +pvr_render_job_submit(struct pvr_render_ctx *ctx, + struct pvr_render_job *job, + const struct pvr_winsys_job_bo *bos, + uint32_t bo_count, + const VkSemaphore *semaphores, + uint32_t semaphore_count, + uint32_t *stage_flags, + struct pvr_winsys_syncobj **const syncobj_geom_out, + struct pvr_winsys_syncobj **const syncobj_frag_out); + +#endif /* PVR_JOB_RENDER_H */ diff --git a/src/imagination/vulkan/pvr_limits.h b/src/imagination/vulkan/pvr_limits.h new file mode 100644 index 00000000000..cda91e5cac9 --- /dev/null +++ b/src/imagination/vulkan/pvr_limits.h @@ -0,0 +1,64 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Constants for VkPhysicalDeviceLimits. + */ + +#ifndef PVR_LIMITS_H +#define PVR_LIMITS_H + +#include "hwdef/rogue_hw_defs.h" +#include "pvr_device_info.h" +#include "util/u_math.h" + +#define PVR_MAX_COLOR_ATTACHMENTS 8U +#define PVR_MAX_QUEUES 2U +#define PVR_MAX_VIEWPORTS 1U +#define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U + +#define PVR_MAX_PUSH_CONSTANTS_SIZE 128U + +#define PVR_MAX_DESCRIPTOR_SETS 4U +#define PVR_MAX_FRAMEBUFFER_LAYERS ROGUE_MAX_RENDER_TARGETS + +/* The limit is somewhat arbitrary, it just translates into more pds code + * and larger arrays, 32 appears to be the popular (and highest choice) across + * other implementations. + */ +#define PVR_MAX_VERTEX_INPUT_BINDINGS 16U + +/* We need one RenderTarget per supported MSAA mode as each render target + * contains state that is dependent on the sample count of the render that is + * rendering to it. + * + * As we do not know the sample count until we know the renderpass framebuffer + * combination being used, we create one per supported sample mode. + */ +#define PVR_RENDER_TARGETS_PER_FRAMEBUFFER(dev_info) \ + ({ \ + uint32_t __ret = PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4U); \ + util_logbase2(__ret) + 1; \ + }) + +#endif diff --git a/src/imagination/vulkan/pvr_pass.c b/src/imagination/vulkan/pvr_pass.c new file mode 100644 index 00000000000..3b188555625 --- /dev/null +++ b/src/imagination/vulkan/pvr_pass.c @@ -0,0 +1,587 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "hwdef/rogue_hw_utils.h" +#include "pvr_bo.h" +#include "pvr_device_info.h" +#include "pvr_formats.h" +#include "pvr_hw_pass.h" +#include "pvr_pds.h" +#include "pvr_private.h" +#include "pvr_usc_fragment_shader.h" +#include "rogue/rogue.h" +#include "vk_alloc.h" +#include "vk_format.h" +#include "vk_log.h" + +/***************************************************************************** + PDS pre-baked program generation parameters and variables. +*****************************************************************************/ +/* These would normally be produced by the compiler or other code. We're using + * them for now just to speed up things. All of these should eventually be + * removed. + */ + +static const struct { + /* Indicates the amount of temporaries for the shader. */ + uint32_t temp_count; + enum rogue_msaa_mode msaa_mode; + /* Indicates the presence of PHAS instruction. */ + bool has_phase_rate_change; +} pvr_pds_fragment_program_params = { + .temp_count = 0, + .msaa_mode = ROGUE_MSAA_MODE_PIXEL, + .has_phase_rate_change = false, +}; + +static inline bool pvr_subpass_has_msaa_input_attachment( + struct pvr_render_subpass *subpass, + const VkRenderPassCreateInfo2KHR *pCreateInfo) +{ + for (uint32_t i = 0; i < subpass->input_count; i++) { + const uint32_t attachment = subpass->input_attachments[i]; + + if (pCreateInfo->pAttachments[attachment].samples > 1) + return true; + } + + return false; +} + +static inline size_t +pvr_num_subpass_attachments(const VkSubpassDescription2 *desc) +{ + return desc->inputAttachmentCount + desc->colorAttachmentCount + + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + + (desc->pDepthStencilAttachment != NULL); +} + +static bool pvr_is_subpass_initops_flush_needed( + const struct pvr_render_pass *pass, + const struct pvr_renderpass_hwsetup_render *hw_render) +{ + struct pvr_render_subpass *subpass = &pass->subpasses[0]; + uint32_t render_loadop_mask = 0; + uint32_t color_attachment_mask; + + for (uint32_t i = 0; i < hw_render->color_init_count; i++) { + if (hw_render->color_init[i].op != RENDERPASS_SURFACE_INITOP_NOP) + render_loadop_mask |= (1 << hw_render->color_init[i].driver_id); + } + + /* If there are no load ops then there's nothing to flush. */ + if (render_loadop_mask == 0) + return false; + + /* If the first subpass has any input attachments, they need to be + * initialized with the result of the load op. Since the input attachment + * may be read from fragments with an opaque pass type, the load ops must be + * flushed or else they would be obscured and eliminated by HSR. + */ + if (subpass->input_count != 0) + return true; + + color_attachment_mask = 0; + + for (uint32_t i = 0; i < subpass->color_count; i++) { + const int32_t color_idx = subpass->color_attachments[i]; + + if (color_idx != -1) + color_attachment_mask |= (1 << pass->attachments[color_idx].index); + } + + /* If the first subpass does not write to all attachments which have a load + * op then the load ops need to be flushed to ensure they don't get obscured + * and removed by HSR. + */ + return (render_loadop_mask & color_attachment_mask) != render_loadop_mask; +} + +static void +pvr_init_subpass_userpass_spawn(struct pvr_renderpass_hwsetup *hw_setup, + struct pvr_render_pass *pass, + struct pvr_render_subpass *subpasses) +{ + uint32_t subpass_idx = 0; + + for (uint32_t i = 0; i < hw_setup->render_count; i++) { + struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i]; + uint32_t initial_userpass_spawn = + (uint32_t)pvr_is_subpass_initops_flush_needed(pass, hw_render); + + for (uint32_t j = 0; j < hw_render->subpass_count; j++) { + subpasses[subpass_idx].userpass_spawn = (j + initial_userpass_spawn); + subpass_idx++; + } + } + + assert(subpass_idx == pass->subpass_count); +} + +static inline bool pvr_has_output_register_writes( + const struct pvr_renderpass_hwsetup_render *hw_render) +{ + for (uint32_t i = 0; i < hw_render->init_setup.render_targets_count; i++) { + struct usc_mrt_resource *mrt_resource = + &hw_render->init_setup.mrt_resources[i]; + + if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER) + return true; + } + + return false; +} + +static VkResult pvr_pds_texture_state_program_create_and_upload( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_pds_upload *const pds_upload_out) +{ + struct pvr_pds_pixel_shader_sa_program program = { + .num_texture_dma_kicks = 1, + }; + uint32_t staging_buffer_size; + uint32_t *staging_buffer; + VkResult result; + + pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&program); + + staging_buffer_size = program.code_size * sizeof(*staging_buffer); + + staging_buffer = vk_alloc2(&device->vk.alloc, + allocator, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!staging_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + pvr_pds_generate_pixel_shader_sa_code_segment(&program, staging_buffer); + + /* FIXME: Figure out the define for alignment of 16. */ + result = pvr_gpu_upload_pds(device, + NULL, + 0, + 0, + &staging_buffer[program.data_size], + program.code_size, + 16, + 16, + pds_upload_out); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, allocator, staging_buffer); + return result; + } + + vk_free2(&device->vk.alloc, allocator, staging_buffer); + + return VK_SUCCESS; +} + +static VkResult +pvr_load_op_create(struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_renderpass_hwsetup_render *hw_render, + struct pvr_load_op **const load_op_out) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); + struct pvr_load_op *load_op; + VkResult result; + + load_op = vk_zalloc2(&device->vk.alloc, + allocator, + sizeof(*load_op), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!load_op) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + for (uint32_t i = 0; i < hw_render->color_init_count; i++) { + struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i]; + + if (color_init->op == RENDERPASS_SURFACE_INITOP_CLEAR) + load_op->clear_mask |= 1U << i; + else if (color_init->op == RENDERPASS_SURFACE_INITOP_LOAD) + pvr_finishme("Missing 'load' load op"); + } + + result = pvr_gpu_upload_usc(device, + pvr_usc_fragment_shader, + sizeof(pvr_usc_fragment_shader), + cache_line_size, + &load_op->usc_frag_prog_bo); + if (result != VK_SUCCESS) + goto err_free_load_op; + + result = pvr_pds_fragment_program_create_and_upload( + device, + allocator, + load_op->usc_frag_prog_bo, + pvr_pds_fragment_program_params.temp_count, + pvr_pds_fragment_program_params.msaa_mode, + pvr_pds_fragment_program_params.has_phase_rate_change, + &load_op->pds_frag_prog); + if (result != VK_SUCCESS) + goto err_free_usc_frag_prog_bo; + + result = pvr_pds_texture_state_program_create_and_upload( + device, + allocator, + &load_op->pds_tex_state_prog); + if (result != VK_SUCCESS) + goto err_free_pds_frag_prog; + + load_op->is_hw_object = true; + /* FIXME: These should be based on the USC and PDS programs, but are hard + * coded for now. + */ + load_op->const_shareds_count = 1; + load_op->shareds_dest_offset = 0; + load_op->shareds_count = 1; + load_op->temps_count = 1; + + *load_op_out = load_op; + + return VK_SUCCESS; + +err_free_pds_frag_prog: + pvr_bo_free(device, load_op->pds_frag_prog.pvr_bo); + +err_free_usc_frag_prog_bo: + pvr_bo_free(device, load_op->usc_frag_prog_bo); + +err_free_load_op: + vk_free2(&device->vk.alloc, allocator, load_op); + + return result; +} + +static void pvr_load_op_destroy(struct pvr_device *device, + const VkAllocationCallbacks *allocator, + struct pvr_load_op *load_op) +{ + pvr_bo_free(device, load_op->pds_tex_state_prog.pvr_bo); + pvr_bo_free(device, load_op->pds_frag_prog.pvr_bo); + pvr_bo_free(device, load_op->usc_frag_prog_bo); + vk_free2(&device->vk.alloc, allocator, load_op); +} + +#define PVR_SPM_LOAD_IN_BUFFERS_COUNT(dev_info) \ + ({ \ + int __ret = 7U; \ + if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) \ + __ret = 3U; \ + __ret; \ + }) + +VkResult pvr_CreateRenderPass2(VkDevice _device, + const VkRenderPassCreateInfo2KHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkRenderPass *pRenderPass) +{ + struct pvr_render_pass_attachment *attachments; + PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_render_subpass *subpasses; + size_t subpass_attachment_count; + uint32_t *subpass_attachments; + struct pvr_render_pass *pass; + uint32_t *dep_list; + bool *flush_on_dep; + VkResult result; + + VK_MULTIALLOC(ma); + vk_multialloc_add(&ma, &pass, __typeof__(*pass), 1); + vk_multialloc_add(&ma, + &attachments, + __typeof__(*attachments), + pCreateInfo->attachmentCount); + vk_multialloc_add(&ma, + &subpasses, + __typeof__(*subpasses), + pCreateInfo->subpassCount); + + subpass_attachment_count = 0; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + subpass_attachment_count += + pvr_num_subpass_attachments(&pCreateInfo->pSubpasses[i]); + } + + vk_multialloc_add(&ma, + &subpass_attachments, + __typeof__(*subpass_attachments), + subpass_attachment_count); + vk_multialloc_add(&ma, + &dep_list, + __typeof__(*dep_list), + pCreateInfo->dependencyCount); + vk_multialloc_add(&ma, + &flush_on_dep, + __typeof__(*flush_on_dep), + pCreateInfo->dependencyCount); + + if (!vk_multialloc_zalloc2(&ma, + &device->vk.alloc, + pAllocator, + VK_OBJECT_TYPE_RENDER_PASS)) { + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS); + pass->attachment_count = pCreateInfo->attachmentCount; + pass->attachments = attachments; + pass->subpass_count = pCreateInfo->subpassCount; + pass->subpasses = subpasses; + pass->max_sample_count = 1; + + /* Copy attachment descriptions. */ + for (uint32_t i = 0; i < pass->attachment_count; i++) { + const VkAttachmentDescription2 *desc = &pCreateInfo->pAttachments[i]; + struct pvr_render_pass_attachment *attachment = &pass->attachments[i]; + + pvr_assert(!(desc->flags & ~VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT)); + + attachment->load_op = desc->loadOp; + attachment->store_op = desc->storeOp; + + attachment->has_stencil = vk_format_has_stencil(attachment->vk_format); + if (attachment->has_stencil) { + attachment->stencil_load_op = desc->stencilLoadOp; + attachment->stencil_store_op = desc->stencilStoreOp; + } + + attachment->vk_format = desc->format; + attachment->sample_count = desc->samples; + attachment->initial_layout = desc->initialLayout; + attachment->is_pbe_downscalable = + pvr_format_is_pbe_downscalable(attachment->vk_format); + attachment->index = i; + + if (attachment->sample_count > pass->max_sample_count) + pass->max_sample_count = attachment->sample_count; + } + + /* Count how many dependencies each subpass has. */ + for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { + const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i]; + + if (dep->srcSubpass != VK_SUBPASS_EXTERNAL && + dep->dstSubpass != VK_SUBPASS_EXTERNAL && + dep->srcSubpass != dep->dstSubpass) { + pass->subpasses[dep->dstSubpass].dep_count++; + } + } + + /* Assign reference pointers to lists, and fill in the attachments list, we + * need to re-walk the dependencies array later to fill the per-subpass + * dependencies lists in. + */ + for (uint32_t i = 0; i < pass->subpass_count; i++) { + const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; + struct pvr_render_subpass *subpass = &pass->subpasses[i]; + + subpass->pipeline_bind_point = desc->pipelineBindPoint; + subpass->sample_count = 1; + + subpass->color_count = desc->colorAttachmentCount; + if (subpass->color_count > 0) { + bool has_used_color_attachment = false; + uint32_t index; + + subpass->color_attachments = subpass_attachments; + subpass_attachments += subpass->color_count; + + for (uint32_t j = 0; j < subpass->color_count; j++) { + subpass->color_attachments[j] = + desc->pColorAttachments[j].attachment; + + if (subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED) + continue; + + index = subpass->color_attachments[j]; + subpass->sample_count = pass->attachments[index].sample_count; + has_used_color_attachment = true; + } + + if (!has_used_color_attachment && desc->pDepthStencilAttachment && + desc->pDepthStencilAttachment->attachment != + VK_ATTACHMENT_UNUSED) { + index = desc->pDepthStencilAttachment->attachment; + subpass->sample_count = pass->attachments[index].sample_count; + } + } + + if (desc->pResolveAttachments) { + subpass->resolve_attachments = subpass_attachments; + subpass_attachments += subpass->color_count; + + for (uint32_t j = 0; j < subpass->color_count; j++) { + subpass->resolve_attachments[j] = + desc->pResolveAttachments[j].attachment; + } + } + + subpass->input_count = desc->inputAttachmentCount; + if (subpass->input_count > 0) { + subpass->input_attachments = subpass_attachments; + subpass_attachments += subpass->input_count; + + for (uint32_t j = 0; j < subpass->input_count; j++) { + subpass->input_attachments[j] = + desc->pInputAttachments[j].attachment; + } + } + + if (desc->pDepthStencilAttachment) { + subpass->depth_stencil_attachment = subpass_attachments++; + *subpass->depth_stencil_attachment = + desc->pDepthStencilAttachment->attachment; + } + + /* Give the dependencies a slice of the subpass_attachments array. */ + subpass->dep_list = dep_list; + dep_list += subpass->dep_count; + subpass->flush_on_dep = flush_on_dep; + flush_on_dep += subpass->dep_count; + + /* Reset the dependencies count so we can start from 0 and index into + * the dependencies array. + */ + subpass->dep_count = 0; + subpass->index = i; + } + + /* Compute dependencies and populate dep_list and flush_on_dep. */ + for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { + const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i]; + + if (dep->srcSubpass != VK_SUBPASS_EXTERNAL && + dep->dstSubpass != VK_SUBPASS_EXTERNAL && + dep->srcSubpass != dep->dstSubpass) { + struct pvr_render_subpass *subpass = &pass->subpasses[dep->dstSubpass]; + + subpass->dep_list[subpass->dep_count] = dep->srcSubpass; + if (pvr_subpass_has_msaa_input_attachment(subpass, pCreateInfo)) + subpass->flush_on_dep[subpass->dep_count] = true; + + subpass->dep_count++; + } + } + + pass->max_tilebuffer_count = + PVR_SPM_LOAD_IN_BUFFERS_COUNT(&device->pdevice->dev_info); + + pass->hw_setup = pvr_create_renderpass_hwsetup(device, pass, false); + if (!pass->hw_setup) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_free_pass; + } + + pvr_init_subpass_userpass_spawn(pass->hw_setup, pass, pass->subpasses); + + for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { + struct pvr_renderpass_hwsetup_render *hw_render = + &pass->hw_setup->renders[i]; + struct pvr_load_op *load_op = NULL; + + if (hw_render->tile_buffers_count) + pvr_finishme("Set up tile buffer table"); + + if (!hw_render->color_init_count) { + assert(!hw_render->client_data); + continue; + } + + if (!pvr_has_output_register_writes(hw_render)) + pvr_finishme("Add output register write"); + + result = pvr_load_op_create(device, pAllocator, hw_render, &load_op); + if (result != VK_SUCCESS) + goto err_load_op_destroy; + + hw_render->client_data = load_op; + } + + *pRenderPass = pvr_render_pass_to_handle(pass); + + return VK_SUCCESS; + +err_load_op_destroy: + for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { + struct pvr_renderpass_hwsetup_render *hw_render = + &pass->hw_setup->renders[i]; + + if (hw_render->client_data) + pvr_load_op_destroy(device, pAllocator, hw_render->client_data); + } + + pvr_destroy_renderpass_hwsetup(device, pass->hw_setup); + +err_free_pass: + vk_object_base_finish(&pass->base); + vk_free2(&device->vk.alloc, pAllocator, pass); + + return result; +} + +void pvr_DestroyRenderPass(VkDevice _device, + VkRenderPass _pass, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_render_pass, pass, _pass); + + if (!pass) + return; + + for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { + struct pvr_renderpass_hwsetup_render *hw_render = + &pass->hw_setup->renders[i]; + + pvr_load_op_destroy(device, pAllocator, hw_render->client_data); + } + + pvr_destroy_renderpass_hwsetup(device, pass->hw_setup); + vk_object_base_finish(&pass->base); + vk_free2(&device->vk.alloc, pAllocator, pass); +} + +void pvr_GetRenderAreaGranularity(VkDevice _device, + VkRenderPass renderPass, + VkExtent2D *pGranularity) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + + /* Granularity does not depend on any settings in the render pass, so return + * the tile granularity. + * + * The default value is based on the minimum value found in all existing + * cores. + */ + pGranularity->width = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 16); + pGranularity->height = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 16); +} diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c new file mode 100644 index 00000000000..4584ddd0489 --- /dev/null +++ b/src/imagination/vulkan/pvr_pipeline.c @@ -0,0 +1,1859 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * based in part on v3dv driver which is: + * Copyright © 2019 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "compiler/shader_enums.h" +#include "hwdef/rogue_hw_utils.h" +#include "nir/nir.h" +#include "pvr_bo.h" +#include "pvr_csb.h" +#include "pvr_pds.h" +#include "pvr_private.h" +#include "pvr_shader.h" +#include "pvr_usc_compute_shader.h" +#include "pvr_winsys.h" +#include "rogue/rogue.h" +#include "rogue/rogue_build_data.h" +#include "util/log.h" +#include "util/macros.h" +#include "util/ralloc.h" +#include "vk_alloc.h" +#include "vk_log.h" +#include "vk_object.h" +#include "vk_util.h" + +#define WORKGROUP_DIMENSIONS 3U + +/* FIXME: Remove this when the compiler is hooked up. */ +/****************************************************************************** + Hard coding + ******************************************************************************/ +/* This section contains hard coding related structs. */ + +struct pvr_explicit_constant_usage { + /* Hardware register number assigned to the explicit constant with the lower + * pre_assigned offset. + */ + uint32_t start_offset; +}; + +static const struct { + uint32_t local_invocation_regs[2]; + + uint32_t work_group_regs[WORKGROUP_DIMENSIONS]; + + uint32_t barrier_reg; + + uint32_t usc_temps; +} pvr_pds_compute_program_params = { + .local_invocation_regs = { 0, 1 }, + + .work_group_regs = { 0, 1, 2 }, + + .barrier_reg = ROGUE_REG_UNUSED, + + .usc_temps = 0, +}; + +/***************************************************************************** + PDS functions +*****************************************************************************/ + +/* If allocator == NULL, the internal one will be used. */ +static VkResult pvr_pds_coeff_program_create_and_upload( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + const uint32_t *fpu_iterators, + uint32_t fpu_iterators_count, + const uint32_t *destinations, + struct pvr_pds_upload *const pds_upload_out) +{ + struct pvr_pds_coeff_loading_program program = { + .num_fpu_iterators = fpu_iterators_count, + }; + uint32_t staging_buffer_size; + uint32_t *staging_buffer; + VkResult result; + + assert(fpu_iterators_count < PVR_MAXIMUM_ITERATIONS); + + /* Get the size of the program and then allocate that much memory. */ + pvr_pds_coefficient_loading(&program, NULL, PDS_GENERATE_SIZES); + + staging_buffer_size = + (program.code_size + program.data_size) * sizeof(*staging_buffer); + + staging_buffer = vk_alloc2(&device->vk.alloc, + allocator, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!staging_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* FIXME: Should we save pointers when we redesign the pds gen api ? */ + typed_memcpy(program.FPU_iterators, + fpu_iterators, + program.num_fpu_iterators); + + typed_memcpy(program.destination, destinations, program.num_fpu_iterators); + + /* Generate the program into is the staging_buffer. */ + pvr_pds_coefficient_loading(&program, + staging_buffer, + PDS_GENERATE_CODEDATA_SEGMENTS); + + /* FIXME: Figure out the define for alignment of 16. */ + result = pvr_gpu_upload_pds(device, + &staging_buffer[0], + program.data_size, + 16, + &staging_buffer[program.data_size], + program.code_size, + 16, + 16, + pds_upload_out); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, allocator, staging_buffer); + return result; + } + + vk_free2(&device->vk.alloc, allocator, staging_buffer); + + return VK_SUCCESS; +} + +/* FIXME: move this elsewhere since it's also called in pvr_pass.c? */ +/* If allocator == NULL, the internal one will be used. */ +VkResult pvr_pds_fragment_program_create_and_upload( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + const struct pvr_bo *fragment_shader_bo, + uint32_t fragment_temp_count, + enum rogue_msaa_mode msaa_mode, + bool has_phase_rate_change, + struct pvr_pds_upload *const pds_upload_out) +{ + const enum PVRX(PDSINST_DOUTU_SAMPLE_RATE) + sample_rate = pvr_sample_rate_from_usc_msaa_mode(msaa_mode); + struct pvr_pds_kickusc_program program = { 0 }; + uint32_t staging_buffer_size; + uint32_t *staging_buffer; + VkResult result; + + /* FIXME: Should it be passing in the USC offset rather than address here? + */ + /* Note this is not strictly required to be done before calculating the + * staging_buffer_size in this particular case. It can also be done after + * allocating the buffer. The size from pvr_pds_kick_usc() is constant. + */ + pvr_pds_setup_doutu(&program.usc_task_control, + fragment_shader_bo->vma->dev_addr.addr, + fragment_temp_count, + sample_rate, + has_phase_rate_change); + + pvr_pds_kick_usc(&program, NULL, 0, false, PDS_GENERATE_SIZES); + + staging_buffer_size = + (program.code_size + program.data_size) * sizeof(*staging_buffer); + + staging_buffer = vk_alloc2(&device->vk.alloc, + allocator, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!staging_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + pvr_pds_kick_usc(&program, + staging_buffer, + 0, + false, + PDS_GENERATE_CODEDATA_SEGMENTS); + + /* FIXME: Figure out the define for alignment of 16. */ + result = pvr_gpu_upload_pds(device, + &staging_buffer[0], + program.data_size, + 16, + &staging_buffer[program.data_size], + program.code_size, + 16, + 16, + pds_upload_out); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, allocator, staging_buffer); + return result; + } + + vk_free2(&device->vk.alloc, allocator, staging_buffer); + + return VK_SUCCESS; +} + +static inline size_t pvr_pds_get_max_vertex_program_const_map_size_in_bytes( + const struct pvr_device_info *dev_info, + bool robust_buffer_access) +{ + /* FIXME: Use more local variable to improve formatting. */ + + /* Maximum memory allocation needed for const map entries in + * pvr_pds_generate_vertex_primary_program(). + * When robustBufferAccess is disabled, it must be >= 410. + * When robustBufferAccess is enabled, it must be >= 570. + * + * 1. Size of entry for base instance + * (pvr_const_map_entry_base_instance) + * + * 2. Max. number of vertex inputs (PVR_MAX_VERTEX_INPUT_BINDINGS) * ( + * if (!robustBufferAccess) + * size of vertex attribute entry + * (pvr_const_map_entry_vertex_attribute_address) + + * else + * size of robust vertex attribute entry + * (pvr_const_map_entry_robust_vertex_attribute_address) + + * size of entry for max attribute index + * (pvr_const_map_entry_vertex_attribute_max_index) + + * fi + * size of Unified Store burst entry + * (pvr_const_map_entry_literal32) + + * size of entry for vertex stride + * (pvr_const_map_entry_literal32) + + * size of entries for DDMAD control word + * (num_ddmad_literals * pvr_const_map_entry_literal32)) + * + * 3. Size of entry for DOUTW vertex/instance control word + * (pvr_const_map_entry_literal32) + * + * 4. Size of DOUTU entry (pvr_const_map_entry_doutu_address) + */ + + const size_t attribute_size = + (!robust_buffer_access) + ? sizeof(struct pvr_const_map_entry_vertex_attribute_address) + : sizeof(struct pvr_const_map_entry_robust_vertex_attribute_address) + + sizeof(struct pvr_const_map_entry_vertex_attribute_max_index); + + /* If has_pds_ddmadt the DDMAD control word is now a DDMADT control word + * and is increased by one DWORD to contain the data for the DDMADT's + * out-of-bounds check. + */ + const size_t pvr_pds_const_map_vertex_entry_num_ddmad_literals = + 1U + (size_t)PVR_HAS_FEATURE(dev_info, pds_ddmadt); + + return (sizeof(struct pvr_const_map_entry_base_instance) + + PVR_MAX_VERTEX_INPUT_BINDINGS * + (attribute_size + + (2 + pvr_pds_const_map_vertex_entry_num_ddmad_literals) * + sizeof(struct pvr_const_map_entry_literal32)) + + sizeof(struct pvr_const_map_entry_literal32) + + sizeof(struct pvr_const_map_entry_doutu_address)); +} + +/* This is a const pointer to an array of pvr_pds_vertex_dma structs. + * The array being pointed to is of PVR_MAX_VERTEX_ATTRIB_DMAS size. + */ +typedef struct pvr_pds_vertex_dma ( + *const + pvr_pds_attrib_dma_descriptions_array_ptr)[PVR_MAX_VERTEX_ATTRIB_DMAS]; + +/* dma_descriptions_out_ptr is a pointer to the array used as output. + * The whole array might not be filled so dma_count_out indicates how many + * elements were used. + */ +static void pvr_pds_vertex_attrib_init_dma_descriptions( + const VkPipelineVertexInputStateCreateInfo *const vertex_input_state, + const struct rogue_vs_build_data *vs_data, + pvr_pds_attrib_dma_descriptions_array_ptr dma_descriptions_out_ptr, + uint32_t *const dma_count_out) +{ + struct pvr_pds_vertex_dma *const dma_descriptions = + *dma_descriptions_out_ptr; + uint32_t dma_count = 0; + + if (!vertex_input_state) { + *dma_count_out = 0; + return; + } + + for (uint32_t i = 0; i < vertex_input_state->vertexAttributeDescriptionCount; + i++) { + const VkVertexInputAttributeDescription *const attrib_desc = + &vertex_input_state->pVertexAttributeDescriptions[i]; + const VkVertexInputBindingDescription *binding_desc = NULL; + + /* Finding the matching binding description. */ + for (uint32_t j = 0; + j < vertex_input_state->vertexBindingDescriptionCount; + j++) { + const VkVertexInputBindingDescription *const current_binding_desc = + &vertex_input_state->pVertexBindingDescriptions[j]; + + if (current_binding_desc->binding == attrib_desc->binding) { + binding_desc = current_binding_desc; + break; + } + } + + /* From the Vulkan 1.2.195 spec for + * VkPipelineVertexInputStateCreateInfo: + * + * "For every binding specified by each element of + * pVertexAttributeDescriptions, a + * VkVertexInputBindingDescription must exist in + * pVertexBindingDescriptions with the same value of binding" + * + * So we don't check if we found the matching binding description + * or not. + */ + + struct pvr_pds_vertex_dma *const dma_desc = &dma_descriptions[dma_count]; + + size_t location = attrib_desc->location; + assert(location < vs_data->inputs.num_input_vars); + + dma_desc->offset = attrib_desc->offset; + dma_desc->stride = binding_desc->stride; + + dma_desc->flags = 0; + + if (binding_desc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) + dma_desc->flags |= PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE; + + dma_desc->size_in_dwords = vs_data->inputs.components[location]; + /* TODO: This will be different when other types are supported. + * Store in vs_data with base and components? + */ + /* TODO: Use attrib_desc->format. */ + dma_desc->component_size_in_bytes = ROGUE_REG_SIZE_BYTES; + dma_desc->destination = vs_data->inputs.base[location]; + dma_desc->binding_index = attrib_desc->binding; + dma_desc->divisor = 1; + dma_desc->robustness_buffer_offset = 0; + + ++dma_count; + } + + *dma_count_out = dma_count; +} + +static VkResult pvr_pds_vertex_attrib_program_create_and_upload( + struct pvr_device *const device, + const VkAllocationCallbacks *const allocator, + struct pvr_pds_vertex_primary_program_input *const input, + struct pvr_pds_attrib_program *const program_out) +{ + const size_t const_entries_size_in_bytes = + pvr_pds_get_max_vertex_program_const_map_size_in_bytes( + &device->pdevice->dev_info, + device->features.robustBufferAccess); + struct pvr_pds_upload *const program = &program_out->program; + struct pvr_pds_info *const info = &program_out->info; + struct pvr_const_map_entry *entries_buffer; + ASSERTED uint32_t code_size_in_dwords; + size_t staging_buffer_size; + uint32_t *staging_buffer; + VkResult result; + + memset(info, 0, sizeof(*info)); + + entries_buffer = vk_alloc2(&device->vk.alloc, + allocator, + const_entries_size_in_bytes, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!entries_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + info->entries = entries_buffer; + info->entries_size_in_bytes = const_entries_size_in_bytes; + + pvr_pds_generate_vertex_primary_program(input, + NULL, + info, + device->features.robustBufferAccess, + &device->pdevice->dev_info); + + code_size_in_dwords = info->code_size_in_dwords; + staging_buffer_size = info->code_size_in_dwords * sizeof(*staging_buffer); + + staging_buffer = vk_alloc2(&device->vk.alloc, + allocator, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!staging_buffer) { + vk_free2(&device->vk.alloc, allocator, entries_buffer); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + /* This also fills in info->entries. */ + pvr_pds_generate_vertex_primary_program(input, + staging_buffer, + info, + device->features.robustBufferAccess, + &device->pdevice->dev_info); + + assert(info->code_size_in_dwords <= code_size_in_dwords); + + /* FIXME: Add a vk_realloc2() ? */ + entries_buffer = vk_realloc((!allocator) ? &device->vk.alloc : allocator, + entries_buffer, + info->entries_written_size_in_bytes, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!entries_buffer) { + vk_free2(&device->vk.alloc, allocator, staging_buffer); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + info->entries = entries_buffer; + info->entries_size_in_bytes = info->entries_written_size_in_bytes; + + /* FIXME: Figure out the define for alignment of 16. */ + result = pvr_gpu_upload_pds(device, + NULL, + 0, + 0, + staging_buffer, + info->code_size_in_dwords, + 16, + 16, + program); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, allocator, entries_buffer); + vk_free2(&device->vk.alloc, allocator, staging_buffer); + + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + vk_free2(&device->vk.alloc, allocator, staging_buffer); + + return VK_SUCCESS; +} + +static inline void pvr_pds_vertex_attrib_program_destroy( + struct pvr_device *const device, + const struct VkAllocationCallbacks *const allocator, + struct pvr_pds_attrib_program *const program) +{ + pvr_bo_free(device, program->program.pvr_bo); + vk_free2(&device->vk.alloc, allocator, program->info.entries); +} + +/* This is a const pointer to an array of pvr_pds_attrib_program structs. + * The array being pointed to is of PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT size. + */ +typedef struct pvr_pds_attrib_program (*const pvr_pds_attrib_programs_array_ptr) + [PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT]; + +/* Generate and uploads a PDS program for DMAing vertex attribs into USC vertex + * inputs. This will bake the code segment and create a template of the data + * segment for the command buffer to fill in. + */ +/* If allocator == NULL, the internal one will be used. + * + * programs_out_ptr is a pointer to the array where the outputs will be placed. + * */ +static VkResult pvr_pds_vertex_attrib_programs_create_and_upload( + struct pvr_device *device, + const VkAllocationCallbacks *const allocator, + const VkPipelineVertexInputStateCreateInfo *const vertex_input_state, + uint32_t usc_temp_count, + const struct rogue_vs_build_data *vs_data, + pvr_pds_attrib_programs_array_ptr programs_out_ptr) +{ + struct pvr_pds_vertex_dma dma_descriptions[PVR_MAX_VERTEX_ATTRIB_DMAS]; + struct pvr_pds_attrib_program *const programs_out = *programs_out_ptr; + struct pvr_pds_vertex_primary_program_input input = { + .dma_list = dma_descriptions, + }; + VkResult result; + + pvr_pds_vertex_attrib_init_dma_descriptions(vertex_input_state, + vs_data, + &dma_descriptions, + &input.dma_count); + + pvr_pds_setup_doutu(&input.usc_task_control, + 0, + usc_temp_count, + PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE), + false); + + /* TODO: If statements for all the "bRequired"s + ui32ExtraFlags. */ + + /* Note: programs_out_ptr is a pointer to an array so this is fine. See the + * typedef. + */ + for (uint32_t i = 0; i < ARRAY_SIZE(*programs_out_ptr); i++) { + switch (i) { + case PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC: + input.flags = 0; + break; + + case PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE: + input.flags = PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT; + break; + + case PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT: + /* We unset INSTANCE and set INDIRECT. */ + input.flags = PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT; + break; + + default: + unreachable("Invalid vertex attrib program type."); + } + + result = + pvr_pds_vertex_attrib_program_create_and_upload(device, + allocator, + &input, + &programs_out[i]); + if (result != VK_SUCCESS) { + for (uint32_t j = 0; j < i; j++) { + pvr_pds_vertex_attrib_program_destroy(device, + allocator, + &programs_out[j]); + } + + return result; + } + } + + return VK_SUCCESS; +} + +static size_t pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes() +{ + /* Maximum memory allocation needed for const map entries in + * pvr_pds_generate_descriptor_upload_program(). + * It must be >= 688 bytes. This size is calculated as the sum of: + * + * 1. Max. number of descriptor sets (8) * ( + * size of descriptor entry + * (pvr_const_map_entry_descriptor_set) + + * size of Common Store burst entry + * (pvr_const_map_entry_literal32)) + * + * 2. Max. number of PDS program buffers (24) * ( + * size of the largest buffer structure + * (pvr_const_map_entry_constant_buffer) + + * size of Common Store burst entry + * (pvr_const_map_entry_literal32) + * + * 3. Size of DOUTU entry (pvr_const_map_entry_doutu_address) + */ + + /* FIXME: PVR_MAX_DESCRIPTOR_SETS is 4 and not 8. The comment above seems to + * say that it should be 8. + * Figure our a define for this or is the comment wrong? + */ + return (8 * (sizeof(struct pvr_const_map_entry_descriptor_set) + + sizeof(struct pvr_const_map_entry_literal32)) + + PVR_PDS_MAX_BUFFERS * + (sizeof(struct pvr_const_map_entry_constant_buffer) + + sizeof(struct pvr_const_map_entry_literal32)) + + sizeof(struct pvr_const_map_entry_doutu_address)); +} + +/* This is a const pointer to an array of PVR_PDS_MAX_BUFFERS pvr_pds_buffer + * structs. + */ +typedef struct pvr_pds_buffer ( + *const pvr_pds_uniform_program_buffer_array_ptr)[PVR_PDS_MAX_BUFFERS]; + +static void pvr_pds_uniform_program_setup_buffers( + bool robust_buffer_access, + const struct rogue_ubo_data *ubo_data, + pvr_pds_uniform_program_buffer_array_ptr buffers_out_ptr, + uint32_t *const buffer_count_out) +{ + struct pvr_pds_buffer *const buffers = *buffers_out_ptr; + uint32_t buffer_count = 0; + + for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) { + struct pvr_pds_buffer *current_buffer = &buffers[buffer_count++]; + + /* This is fine since buffers_out_ptr is a pointer to an array. */ + assert(buffer_count <= ARRAY_SIZE(*buffers_out_ptr)); + + current_buffer->type = PVR_BUFFER_TYPE_UBO; + current_buffer->size_in_dwords = ubo_data->size[u]; + current_buffer->destination = ubo_data->dest[u]; + + current_buffer->buffer_id = buffer_count; + current_buffer->desc_set = ubo_data->desc_set[u]; + current_buffer->binding = ubo_data->binding[u]; + /* TODO: Is this always the case? + * E.g. can multiple UBOs have the same base buffer? + */ + current_buffer->source_offset = 0; + } + + *buffer_count_out = buffer_count; +} + +static VkResult pvr_pds_uniform_program_create_and_upload( + struct pvr_device *const device, + const VkAllocationCallbacks *const allocator, + const struct rogue_ubo_data *const ubo_data, + const struct pvr_explicit_constant_usage *const explicit_const_usage, + const struct pvr_pipeline_layout *const layout, + enum pvr_stage_allocation stage, + struct pvr_pds_upload *const pds_code_upload_out, + struct pvr_pds_info *const pds_info_out) +{ + const size_t const_entries_size_in_bytes = + pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes(); + struct pvr_descriptor_program_input program = { 0 }; + struct pvr_const_map_entry *entries_buffer; + ASSERTED uint32_t code_size_in_dwords; + uint32_t staging_buffer_size; + uint32_t *staging_buffer; + VkResult result; + + assert(stage != PVR_STAGE_ALLOCATION_COUNT); + + memset(pds_info_out, 0, sizeof(*pds_info_out)); + + pvr_pds_uniform_program_setup_buffers(device->features.robustBufferAccess, + ubo_data, + &program.buffers, + &program.buffer_count); + + for (uint32_t dma = 0; dma < program.buffer_count; dma++) { + if (program.buffers[dma].type != PVR_BUFFER_TYPES_COMPILE_TIME) + continue; + + assert(!"Unimplemented"); + } + + if (layout->per_stage_reg_info[stage].primary_dynamic_size_in_dwords) + assert(!"Unimplemented"); + + for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) { + const struct pvr_descriptor_set_layout_mem_layout *const reg_layout = + &layout->register_layout_in_dwords_per_stage[stage][set_num]; + const uint32_t start_offset = explicit_const_usage->start_offset; + + /* TODO: Use compiler usage info to optimize this? */ + + /* Only dma primaries if they are actually required. */ + if (reg_layout->primary_size) { + program.descriptor_sets[program.descriptor_set_count++] = + (struct pvr_pds_descriptor_set){ + .descriptor_set = set_num, + .size_in_dwords = reg_layout->primary_size, + .destination = reg_layout->primary_offset + start_offset, + .primary = true, + }; + } + + /* Only dma secondaries if they are actually required. */ + if (!reg_layout->secondary_size) + continue; + + program.descriptor_sets[program.descriptor_set_count++] = + (struct pvr_pds_descriptor_set){ + .descriptor_set = set_num, + .size_in_dwords = reg_layout->secondary_size, + .destination = reg_layout->secondary_offset + start_offset, + }; + } + + entries_buffer = vk_alloc2(&device->vk.alloc, + allocator, + const_entries_size_in_bytes, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!entries_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + pds_info_out->entries = entries_buffer; + pds_info_out->entries_size_in_bytes = const_entries_size_in_bytes; + + pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info_out); + + code_size_in_dwords = pds_info_out->code_size_in_dwords; + staging_buffer_size = + pds_info_out->code_size_in_dwords * sizeof(*staging_buffer); + + if (!staging_buffer_size) { + vk_free2(&device->vk.alloc, allocator, entries_buffer); + + memset(pds_info_out, 0, sizeof(*pds_info_out)); + memset(pds_code_upload_out, 0, sizeof(*pds_code_upload_out)); + return VK_SUCCESS; + } + + staging_buffer = vk_alloc2(&device->vk.alloc, + allocator, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!staging_buffer) { + vk_free2(&device->vk.alloc, allocator, entries_buffer); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + pvr_pds_generate_descriptor_upload_program(&program, + staging_buffer, + pds_info_out); + + assert(pds_info_out->code_size_in_dwords <= code_size_in_dwords); + + /* FIXME: use vk_realloc2() ? */ + entries_buffer = vk_realloc((!allocator) ? &device->vk.alloc : allocator, + entries_buffer, + pds_info_out->entries_written_size_in_bytes, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!entries_buffer) { + vk_free2(&device->vk.alloc, allocator, staging_buffer); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + pds_info_out->entries = entries_buffer; + pds_info_out->entries_size_in_bytes = + pds_info_out->entries_written_size_in_bytes; + + /* FIXME: Figure out the define for alignment of 16. */ + result = pvr_gpu_upload_pds(device, + NULL, + 0, + 0, + staging_buffer, + pds_info_out->code_size_in_dwords, + 16, + 16, + pds_code_upload_out); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, allocator, entries_buffer); + vk_free2(&device->vk.alloc, allocator, staging_buffer); + + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + vk_free2(&device->vk.alloc, allocator, staging_buffer); + + return VK_SUCCESS; +} + +static void pvr_pds_uniform_program_destroy( + struct pvr_device *const device, + const struct VkAllocationCallbacks *const allocator, + struct pvr_pds_upload *const pds_code, + struct pvr_pds_info *const pds_info) +{ + pvr_bo_free(device, pds_code->pvr_bo); + vk_free2(&device->vk.alloc, allocator, pds_info->entries); +} + +/* FIXME: See if pvr_device_init_compute_pds_program() and this could be merged. + */ +static VkResult pvr_pds_compute_program_create_and_upload( + struct pvr_device *const device, + const VkAllocationCallbacks *const allocator, + const uint32_t local_input_regs[static const WORKGROUP_DIMENSIONS], + const uint32_t work_group_input_regs[static const WORKGROUP_DIMENSIONS], + uint32_t barrier_coefficient, + bool add_base_workgroup, + uint32_t usc_temps, + pvr_dev_addr_t usc_shader_dev_addr, + struct pvr_pds_upload *const pds_upload_out, + struct pvr_pds_info *const pds_info_out, + uint32_t *const base_workgroup_data_patching_offset_out) +{ + struct pvr_pds_compute_shader_program program = { + /* clang-format off */ + .local_input_regs = { + local_input_regs[0], + local_input_regs[1], + local_input_regs[2] + }, + .work_group_input_regs = { + work_group_input_regs[0], + work_group_input_regs[1], + work_group_input_regs[2] + }, + .global_input_regs = { + [0 ... (WORKGROUP_DIMENSIONS - 1)] = + PVR_PDS_COMPUTE_INPUT_REG_UNUSED + }, + /* clang-format on */ + .barrier_coefficient = barrier_coefficient, + .flattened_work_groups = true, + .clear_pds_barrier = false, + .add_base_workgroup = add_base_workgroup, + .kick_usc = true, + }; + struct pvr_device_info *dev_info = &device->pdevice->dev_info; + uint32_t staging_buffer_size; + uint32_t *staging_buffer; + VkResult result; + + STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) == WORKGROUP_DIMENSIONS); + STATIC_ASSERT(ARRAY_SIZE(program.work_group_input_regs) == + WORKGROUP_DIMENSIONS); + STATIC_ASSERT(ARRAY_SIZE(program.global_input_regs) == WORKGROUP_DIMENSIONS); + + assert(!add_base_workgroup || base_workgroup_data_patching_offset_out); + + pvr_pds_setup_doutu(&program.usc_task_control, + usc_shader_dev_addr.addr, + usc_temps, + PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE), + false); + + pvr_pds_compute_shader(&program, NULL, PDS_GENERATE_SIZES, dev_info); + + /* FIXME: According to pvr_device_init_compute_pds_program() the code size + * is in bytes. Investigate this. + */ + staging_buffer_size = + (program.code_size + program.data_size) * sizeof(*staging_buffer); + + staging_buffer = vk_alloc2(&device->vk.alloc, + allocator, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!staging_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* FIXME: pvr_pds_compute_shader doesn't implement + * PDS_GENERATE_CODEDATA_SEGMENTS. + */ + pvr_pds_compute_shader(&program, + &staging_buffer[0], + PDS_GENERATE_CODE_SEGMENT, + dev_info); + + pvr_pds_compute_shader(&program, + &staging_buffer[program.code_size], + PDS_GENERATE_DATA_SEGMENT, + dev_info); + + /* We'll need to patch the base workgroup in the PDS data section before + * dispatch so we give back the offsets at which to patch. We only need to + * save the offset for the first workgroup id since the workgroup ids are + * stored contiguously in the data segment. + */ + if (add_base_workgroup) { + *base_workgroup_data_patching_offset_out = + program.base_workgroup_constant_offset_in_dwords[0]; + } + + /* FIXME: Figure out the define for alignment of 16. */ + result = pvr_gpu_upload_pds(device, + &staging_buffer[program.code_size], + program.data_size, + 16, + &staging_buffer[0], + program.code_size, + 16, + 16, + pds_upload_out); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, allocator, staging_buffer); + return result; + } + + *pds_info_out = (struct pvr_pds_info){ + .temps_required = program.highest_temp, + .code_size_in_dwords = program.code_size, + .data_size_in_dwords = program.data_size, + }; + + vk_free2(&device->vk.alloc, allocator, staging_buffer); + + return VK_SUCCESS; +}; + +static void pvr_pds_compute_program_destroy( + struct pvr_device *const device, + const struct VkAllocationCallbacks *const allocator, + struct pvr_pds_upload *const pds_program, + struct pvr_pds_info *const pds_info) +{ + /* We don't allocate an entries buffer so we don't need to free it */ + pvr_bo_free(device, pds_program->pvr_bo); +} + +/****************************************************************************** + Generic pipeline functions + ******************************************************************************/ + +static void pvr_pipeline_init(struct pvr_device *device, + enum pvr_pipeline_type type, + struct pvr_pipeline *const pipeline) +{ + assert(!pipeline->layout); + + vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); + + pipeline->type = type; +} + +static void pvr_pipeline_finish(struct pvr_pipeline *pipeline) +{ + vk_object_base_finish(&pipeline->base); +} + +/****************************************************************************** + Compute pipeline functions + ******************************************************************************/ + +/* Compiles and uploads shaders and PDS programs. */ +static VkResult pvr_compute_pipeline_compile( + struct pvr_device *const device, + struct pvr_pipeline_cache *pipeline_cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *const allocator, + struct pvr_compute_pipeline *const compute_pipeline) +{ + /* FIXME: Remove this hard coding. */ + const struct pvr_explicit_constant_usage explicit_const_usage = { + .start_offset = 0, + }; + const struct rogue_ubo_data uniform_program_ubo_data = { 0 }; + + const uint32_t cache_line_size = + rogue_get_slc_cache_line_size(&device->pdevice->dev_info); + uint32_t work_group_input_regs[WORKGROUP_DIMENSIONS]; + uint32_t local_input_regs[WORKGROUP_DIMENSIONS]; + uint32_t barrier_coefficient; + VkResult result; + + /* FIXME: Compile the shader. */ + + result = pvr_gpu_upload_usc(device, + pvr_usc_compute_shader, + sizeof(pvr_usc_compute_shader), + cache_line_size, + &compute_pipeline->state.bo); + if (result != VK_SUCCESS) + return result; + + result = pvr_pds_uniform_program_create_and_upload( + device, + allocator, + &uniform_program_ubo_data, + &explicit_const_usage, + compute_pipeline->base.layout, + PVR_STAGE_ALLOCATION_COMPUTE, + &compute_pipeline->state.uniform.pds_code, + &compute_pipeline->state.uniform.pds_info); + if (result != VK_SUCCESS) + goto err_free_shader; + + /* We make sure that the compiler's unused reg value is compatible with the + * pds api. + */ + STATIC_ASSERT(ROGUE_REG_UNUSED == PVR_PDS_COMPUTE_INPUT_REG_UNUSED); + + barrier_coefficient = pvr_pds_compute_program_params.barrier_reg; + + /* TODO: Maybe change the pds api to use pointers so we avoid the copy. */ + local_input_regs[0] = + pvr_pds_compute_program_params.local_invocation_regs[0]; + local_input_regs[1] = + pvr_pds_compute_program_params.local_invocation_regs[1]; + /* This is not a mistake. We want to assign element 1 to 2. */ + local_input_regs[2] = + pvr_pds_compute_program_params.local_invocation_regs[1]; + + STATIC_ASSERT(__same_type(work_group_input_regs, + pvr_pds_compute_program_params.work_group_regs)); + typed_memcpy(work_group_input_regs, + pvr_pds_compute_program_params.work_group_regs, + WORKGROUP_DIMENSIONS); + + result = pvr_pds_compute_program_create_and_upload( + device, + allocator, + local_input_regs, + work_group_input_regs, + barrier_coefficient, + false, + pvr_pds_compute_program_params.usc_temps, + compute_pipeline->state.bo->vma->dev_addr, + &compute_pipeline->state.primary_program, + &compute_pipeline->state.primary_program_info, + NULL); + if (result != VK_SUCCESS) + goto err_free_uniform_program; + + /* If the workgroup ID is required, then we require the base workgroup + * variant of the PDS compute program as well. + */ + compute_pipeline->state.flags.base_workgroup = + work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED || + work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED || + work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED; + + if (compute_pipeline->state.flags.base_workgroup) { + result = pvr_pds_compute_program_create_and_upload( + device, + allocator, + local_input_regs, + work_group_input_regs, + barrier_coefficient, + true, + pvr_pds_compute_program_params.usc_temps, + compute_pipeline->state.bo->vma->dev_addr, + &compute_pipeline->state.primary_program_base_workgroup_variant, + &compute_pipeline->state.primary_program_base_workgroup_variant_info, + &compute_pipeline->state.base_workgroup_ids_dword_offset); + if (result != VK_SUCCESS) + goto err_free_compute_program; + } + + return VK_SUCCESS; + +err_free_compute_program: + if (compute_pipeline->state.flags.base_workgroup) + pvr_bo_free(device, compute_pipeline->state.primary_program.pvr_bo); + +err_free_uniform_program: + pvr_bo_free(device, compute_pipeline->state.uniform.pds_code.pvr_bo); + +err_free_shader: + pvr_bo_free(device, compute_pipeline->state.bo); + + return result; +} + +static VkResult +pvr_compute_pipeline_init(struct pvr_device *device, + struct pvr_pipeline_cache *pipeline_cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *allocator, + struct pvr_compute_pipeline *compute_pipeline) +{ + VkResult result; + + pvr_pipeline_init(device, + PVR_PIPELINE_TYPE_COMPUTE, + &compute_pipeline->base); + + compute_pipeline->base.layout = + pvr_pipeline_layout_from_handle(pCreateInfo->layout); + + result = pvr_compute_pipeline_compile(device, + pipeline_cache, + pCreateInfo, + allocator, + compute_pipeline); + if (result != VK_SUCCESS) { + pvr_pipeline_finish(&compute_pipeline->base); + return result; + } + + return VK_SUCCESS; +} + +static VkResult +pvr_compute_pipeline_create(struct pvr_device *device, + struct pvr_pipeline_cache *pipeline_cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *allocator, + VkPipeline *const pipeline_out) +{ + struct pvr_compute_pipeline *compute_pipeline; + VkResult result; + + compute_pipeline = vk_zalloc2(&device->vk.alloc, + allocator, + sizeof(*compute_pipeline), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!compute_pipeline) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Compiles and uploads shaders and PDS programs. */ + result = pvr_compute_pipeline_init(device, + pipeline_cache, + pCreateInfo, + allocator, + compute_pipeline); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, allocator, compute_pipeline); + return result; + } + + *pipeline_out = pvr_pipeline_to_handle(&compute_pipeline->base); + + return VK_SUCCESS; +} + +static void pvr_compute_pipeline_destroy( + struct pvr_device *const device, + const VkAllocationCallbacks *const allocator, + struct pvr_compute_pipeline *const compute_pipeline) +{ + if (compute_pipeline->state.flags.base_workgroup) { + pvr_pds_compute_program_destroy( + device, + allocator, + &compute_pipeline->state.primary_program_base_workgroup_variant, + &compute_pipeline->state.primary_program_base_workgroup_variant_info); + } + + pvr_pds_compute_program_destroy( + device, + allocator, + &compute_pipeline->state.primary_program, + &compute_pipeline->state.primary_program_info); + pvr_pds_uniform_program_destroy(device, + allocator, + &compute_pipeline->state.uniform.pds_code, + &compute_pipeline->state.uniform.pds_info); + pvr_bo_free(device, compute_pipeline->state.bo); + + pvr_pipeline_finish(&compute_pipeline->base); + + vk_free2(&device->vk.alloc, allocator, compute_pipeline); +} + +VkResult +pvr_CreateComputePipelines(VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkComputePipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + PVR_FROM_HANDLE(pvr_pipeline_cache, pipeline_cache, pipelineCache); + PVR_FROM_HANDLE(pvr_device, device, _device); + VkResult result = VK_SUCCESS; + + for (uint32_t i = 0; i < createInfoCount; i++) { + const VkResult local_result = + pvr_compute_pipeline_create(device, + pipeline_cache, + &pCreateInfos[i], + pAllocator, + &pPipelines[i]); + if (local_result != VK_SUCCESS) { + result = local_result; + pPipelines[i] = VK_NULL_HANDLE; + } + } + + return result; +} + +/****************************************************************************** + Graphics pipeline functions + ******************************************************************************/ + +static inline uint32_t pvr_dynamic_state_bit_from_vk(VkDynamicState state) +{ + switch (state) { + case VK_DYNAMIC_STATE_VIEWPORT: + return PVR_DYNAMIC_STATE_BIT_VIEWPORT; + case VK_DYNAMIC_STATE_SCISSOR: + return PVR_DYNAMIC_STATE_BIT_SCISSOR; + case VK_DYNAMIC_STATE_LINE_WIDTH: + return PVR_DYNAMIC_STATE_BIT_LINE_WIDTH; + case VK_DYNAMIC_STATE_DEPTH_BIAS: + return PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS; + case VK_DYNAMIC_STATE_BLEND_CONSTANTS: + return PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS; + case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: + return PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK; + case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: + return PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK; + case VK_DYNAMIC_STATE_STENCIL_REFERENCE: + return PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE; + default: + unreachable("Unsupported state."); + } +} + +static void +pvr_graphics_pipeline_destroy(struct pvr_device *const device, + const VkAllocationCallbacks *const allocator, + struct pvr_graphics_pipeline *const gfx_pipeline) +{ + const uint32_t num_vertex_attrib_programs = + ARRAY_SIZE(gfx_pipeline->vertex_shader_state.pds_attrib_programs); + + pvr_pds_uniform_program_destroy( + device, + allocator, + &gfx_pipeline->fragment_shader_state.uniform_state.pds_code, + &gfx_pipeline->fragment_shader_state.uniform_state.pds_info); + + pvr_pds_uniform_program_destroy( + device, + allocator, + &gfx_pipeline->vertex_shader_state.uniform_state.pds_code, + &gfx_pipeline->vertex_shader_state.uniform_state.pds_info); + + for (uint32_t i = 0; i < num_vertex_attrib_programs; i++) { + struct pvr_pds_attrib_program *const attrib_program = + &gfx_pipeline->vertex_shader_state.pds_attrib_programs[i]; + + pvr_pds_vertex_attrib_program_destroy(device, allocator, attrib_program); + } + + pvr_bo_free(device, + gfx_pipeline->fragment_shader_state.pds_fragment_program.pvr_bo); + pvr_bo_free(device, + gfx_pipeline->fragment_shader_state.pds_coeff_program.pvr_bo); + + pvr_bo_free(device, gfx_pipeline->fragment_shader_state.bo); + pvr_bo_free(device, gfx_pipeline->vertex_shader_state.bo); + + pvr_pipeline_finish(&gfx_pipeline->base); + + vk_free2(&device->vk.alloc, allocator, gfx_pipeline); +} + +static void +pvr_vertex_state_init(struct pvr_graphics_pipeline *gfx_pipeline, + const struct rogue_common_build_data *common_data, + const struct rogue_vs_build_data *vs_data) +{ + struct pvr_vertex_shader_state *vertex_state = + &gfx_pipeline->vertex_shader_state; + + /* TODO: Hard coding these for now. These should be populated based on the + * information returned by the compiler. + */ + vertex_state->stage_state.const_shared_reg_count = common_data->shareds; + vertex_state->stage_state.const_shared_reg_offset = 0; + vertex_state->stage_state.temps_count = common_data->temps; + vertex_state->stage_state.coefficient_size = common_data->coeffs; + vertex_state->stage_state.uses_atomic_ops = false; + vertex_state->stage_state.uses_texture_rw = false; + vertex_state->stage_state.uses_barrier = false; + vertex_state->stage_state.has_side_effects = false; + vertex_state->stage_state.empty_program = false; + + vertex_state->vertex_input_size = vs_data->num_vertex_input_regs; + vertex_state->vertex_output_size = + vs_data->num_vertex_outputs * ROGUE_REG_SIZE_BYTES; + vertex_state->output_selects = 0; + vertex_state->user_clip_planes_mask = 0; + vertex_state->entry_offset = 0; + + /* TODO: The number of varyings should be checked against the fragment + * shader inputs and assigned in the place where that happens. + * There will also be an opportunity to cull unused fs inputs/vs outputs. + */ + pvr_csb_pack (&gfx_pipeline->vertex_shader_state.varying[0], + TA_STATE_VARYING0, + varying0) { + varying0.f32_linear = vs_data->num_varyings; + varying0.f32_flat = 0; + varying0.f32_npc = 0; + } + + pvr_csb_pack (&gfx_pipeline->vertex_shader_state.varying[1], + TA_STATE_VARYING1, + varying1) { + varying1.f16_linear = 0; + varying1.f16_flat = 0; + varying1.f16_npc = 0; + } +} + +static void +pvr_fragment_state_init(struct pvr_graphics_pipeline *gfx_pipeline, + const struct rogue_common_build_data *common_data) +{ + struct pvr_fragment_shader_state *fragment_state = + &gfx_pipeline->fragment_shader_state; + + /* TODO: Hard coding these for now. These should be populated based on the + * information returned by the compiler. + */ + fragment_state->stage_state.const_shared_reg_count = 0; + fragment_state->stage_state.const_shared_reg_offset = 0; + fragment_state->stage_state.temps_count = common_data->temps; + fragment_state->stage_state.coefficient_size = common_data->coeffs; + fragment_state->stage_state.uses_atomic_ops = false; + fragment_state->stage_state.uses_texture_rw = false; + fragment_state->stage_state.uses_barrier = false; + fragment_state->stage_state.has_side_effects = false; + fragment_state->stage_state.empty_program = false; + + fragment_state->pass_type = 0; + fragment_state->entry_offset = 0; +} + +/* Compiles and uploads shaders and PDS programs. */ +static VkResult +pvr_graphics_pipeline_compile(struct pvr_device *const device, + struct pvr_pipeline_cache *pipeline_cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *const allocator, + struct pvr_graphics_pipeline *const gfx_pipeline) +{ + /* FIXME: Remove this hard coding. */ + const struct pvr_explicit_constant_usage explicit_const_usage = { + .start_offset = 16, + }; + + const VkPipelineVertexInputStateCreateInfo *const vertex_input_state = + pCreateInfo->pVertexInputState; + + const uint32_t cache_line_size = + rogue_get_slc_cache_line_size(&device->pdevice->dev_info); + struct rogue_compiler *compiler = device->pdevice->compiler; + struct rogue_build_ctx *ctx; + VkResult result; + + /* Compile the USC shaders. */ + + /* Setup shared build context. */ + ctx = rogue_create_build_context(compiler); + if (!ctx) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* NIR middle-end translation. */ + for (gl_shader_stage stage = MESA_SHADER_FRAGMENT; stage > MESA_SHADER_NONE; + stage--) { + const VkPipelineShaderStageCreateInfo *create_info; + size_t stage_index = gfx_pipeline->stage_indices[stage]; + + /* Skip unused/inactive stages. */ + if (stage_index == ~0) + continue; + + create_info = &pCreateInfo->pStages[stage_index]; + + /* SPIR-V to NIR. */ + ctx->nir[stage] = pvr_spirv_to_nir(ctx, stage, create_info); + if (!ctx->nir[stage]) { + ralloc_free(ctx); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + } + + /* Pre-back-end analysis and optimization, driver data extraction. */ + /* TODO: Analyze and cull unused I/O between stages. */ + /* TODO: Allocate UBOs between stages; + * pipeline->layout->set_{count,layout}. + */ + + /* Back-end translation. */ + for (gl_shader_stage stage = MESA_SHADER_FRAGMENT; stage > MESA_SHADER_NONE; + stage--) { + if (!ctx->nir[stage]) + continue; + + ctx->rogue[stage] = pvr_nir_to_rogue(ctx, ctx->nir[stage]); + if (!ctx->rogue[stage]) { + ralloc_free(ctx); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + ctx->binary[stage] = pvr_rogue_to_binary(ctx, ctx->rogue[stage]); + if (!ctx->binary[stage]) { + ralloc_free(ctx); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + } + + pvr_vertex_state_init(gfx_pipeline, + &ctx->common_data[MESA_SHADER_VERTEX], + &ctx->stage_data.vs); + + result = pvr_gpu_upload_usc(device, + ctx->binary[MESA_SHADER_VERTEX]->data, + ctx->binary[MESA_SHADER_VERTEX]->size, + cache_line_size, + &gfx_pipeline->vertex_shader_state.bo); + if (result != VK_SUCCESS) + goto err_free_build_context; + + pvr_fragment_state_init(gfx_pipeline, + &ctx->common_data[MESA_SHADER_FRAGMENT]); + + result = pvr_gpu_upload_usc(device, + ctx->binary[MESA_SHADER_FRAGMENT]->data, + ctx->binary[MESA_SHADER_FRAGMENT]->size, + cache_line_size, + &gfx_pipeline->fragment_shader_state.bo); + if (result != VK_SUCCESS) + goto err_free_vertex_bo; + + /* TODO: powervr has an optimization where it attempts to recompile shaders. + * See PipelineCompileNoISPFeedbackFragmentStage. Unimplemented since in our + * case the optimization doesn't happen. + */ + + /* TODO: The programs we use are hard coded for now, but these should be + * selected dynamically. + */ + + result = pvr_pds_coeff_program_create_and_upload( + device, + allocator, + ctx->stage_data.fs.iterator_args.fpu_iterators, + ctx->stage_data.fs.iterator_args.num_fpu_iterators, + ctx->stage_data.fs.iterator_args.destination, + &gfx_pipeline->fragment_shader_state.pds_coeff_program); + if (result != VK_SUCCESS) + goto err_free_fragment_bo; + + result = pvr_pds_fragment_program_create_and_upload( + device, + allocator, + gfx_pipeline->fragment_shader_state.bo, + ctx->common_data[MESA_SHADER_FRAGMENT].temps, + ctx->stage_data.fs.msaa_mode, + ctx->stage_data.fs.phas, + &gfx_pipeline->fragment_shader_state.pds_fragment_program); + if (result != VK_SUCCESS) + goto err_free_coeff_program; + + result = pvr_pds_vertex_attrib_programs_create_and_upload( + device, + allocator, + vertex_input_state, + ctx->common_data[MESA_SHADER_VERTEX].temps, + &ctx->stage_data.vs, + &gfx_pipeline->vertex_shader_state.pds_attrib_programs); + if (result != VK_SUCCESS) + goto err_free_frag_program; + + result = pvr_pds_uniform_program_create_and_upload( + device, + allocator, + &ctx->common_data[MESA_SHADER_VERTEX].ubo_data, + &explicit_const_usage, + gfx_pipeline->base.layout, + PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY, + &gfx_pipeline->vertex_shader_state.uniform_state.pds_code, + &gfx_pipeline->vertex_shader_state.uniform_state.pds_info); + if (result != VK_SUCCESS) + goto err_free_vertex_attrib_program; + + /* FIXME: When the temp_buffer_total_size is non-zero we need to allocate a + * scratch buffer for both vertex and fragment stage. + * Figure out the best place to do this. + */ + /* assert(pvr_pds_uniform_program_variables.temp_buff_total_size == 0); */ + /* TODO: Implement spilling with the above. */ + + /* TODO: Call pvr_pds_uniform_program_create_and_upload in a loop. */ + /* FIXME: For now we pass in the same explicit_const_usage since it contains + * all invalid entries. Fix this by hooking it up to the compiler. + */ + result = pvr_pds_uniform_program_create_and_upload( + device, + allocator, + &ctx->common_data[MESA_SHADER_FRAGMENT].ubo_data, + &explicit_const_usage, + gfx_pipeline->base.layout, + PVR_STAGE_ALLOCATION_FRAGMENT, + &gfx_pipeline->fragment_shader_state.uniform_state.pds_code, + &gfx_pipeline->fragment_shader_state.uniform_state.pds_info); + if (result != VK_SUCCESS) + goto err_free_vertex_uniform_program; + + ralloc_free(ctx); + + return VK_SUCCESS; + +err_free_vertex_uniform_program: + pvr_pds_uniform_program_destroy( + device, + allocator, + &gfx_pipeline->vertex_shader_state.uniform_state.pds_code, + &gfx_pipeline->vertex_shader_state.uniform_state.pds_info); +err_free_vertex_attrib_program: + for (uint32_t i = 0; + i < ARRAY_SIZE(gfx_pipeline->vertex_shader_state.pds_attrib_programs); + i++) { + struct pvr_pds_attrib_program *const attrib_program = + &gfx_pipeline->vertex_shader_state.pds_attrib_programs[i]; + + pvr_pds_vertex_attrib_program_destroy(device, allocator, attrib_program); + } +err_free_frag_program: + pvr_bo_free(device, + gfx_pipeline->fragment_shader_state.pds_fragment_program.pvr_bo); +err_free_coeff_program: + pvr_bo_free(device, + gfx_pipeline->fragment_shader_state.pds_coeff_program.pvr_bo); +err_free_fragment_bo: + pvr_bo_free(device, gfx_pipeline->fragment_shader_state.bo); +err_free_vertex_bo: + pvr_bo_free(device, gfx_pipeline->vertex_shader_state.bo); +err_free_build_context: + ralloc_free(ctx); + return result; +} + +static void pvr_graphics_pipeline_init_depth_and_stencil_state( + struct pvr_graphics_pipeline *gfx_pipeline, + const VkPipelineDepthStencilStateCreateInfo *depth_stencil_state) +{ + const VkStencilOpState *front; + const VkStencilOpState *back; + + if (!depth_stencil_state) + return; + + front = &depth_stencil_state->front; + back = &depth_stencil_state->back; + + if (depth_stencil_state->depthTestEnable) { + gfx_pipeline->depth_compare_op = depth_stencil_state->depthCompareOp; + gfx_pipeline->depth_write_disable = + !depth_stencil_state->depthWriteEnable; + } else { + gfx_pipeline->depth_compare_op = VK_COMPARE_OP_ALWAYS; + gfx_pipeline->depth_write_disable = true; + } + + if (depth_stencil_state->stencilTestEnable) { + gfx_pipeline->stencil_front.compare_op = front->compareOp; + gfx_pipeline->stencil_front.fail_op = front->failOp; + gfx_pipeline->stencil_front.depth_fail_op = front->depthFailOp; + gfx_pipeline->stencil_front.pass_op = front->passOp; + + gfx_pipeline->stencil_back.compare_op = back->compareOp; + gfx_pipeline->stencil_back.fail_op = back->failOp; + gfx_pipeline->stencil_back.depth_fail_op = back->depthFailOp; + gfx_pipeline->stencil_back.pass_op = back->passOp; + } else { + gfx_pipeline->stencil_front.compare_op = VK_COMPARE_OP_ALWAYS; + gfx_pipeline->stencil_front.fail_op = VK_STENCIL_OP_KEEP; + gfx_pipeline->stencil_front.depth_fail_op = VK_STENCIL_OP_KEEP; + gfx_pipeline->stencil_front.pass_op = VK_STENCIL_OP_KEEP; + + gfx_pipeline->stencil_back = gfx_pipeline->stencil_front; + } +} + +static void pvr_graphics_pipeline_init_dynamic_state( + struct pvr_graphics_pipeline *gfx_pipeline, + const VkPipelineDynamicStateCreateInfo *dynamic_state, + const VkPipelineViewportStateCreateInfo *viewport_state, + const VkPipelineDepthStencilStateCreateInfo *depth_stencil_state, + const VkPipelineColorBlendStateCreateInfo *color_blend_state, + const VkPipelineRasterizationStateCreateInfo *rasterization_state) +{ + struct pvr_dynamic_state *const internal_dynamic_state = + &gfx_pipeline->dynamic_state; + uint32_t dynamic_states = 0; + + if (dynamic_state) { + for (uint32_t i = 0; i < dynamic_state->dynamicStateCount; i++) { + dynamic_states |= + pvr_dynamic_state_bit_from_vk(dynamic_state->pDynamicStates[i]); + } + } + + /* TODO: Verify this. + * We don't zero out the pipeline's state if they are dynamic since they + * should be set later on in the command buffer. + */ + + /* TODO: Handle rasterizerDiscardEnable. */ + + if (rasterization_state) { + if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_LINE_WIDTH)) + internal_dynamic_state->line_width = rasterization_state->lineWidth; + + /* TODO: Do we need the depthBiasEnable check? */ + if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS)) { + internal_dynamic_state->depth_bias.constant_factor = + rasterization_state->depthBiasConstantFactor; + internal_dynamic_state->depth_bias.clamp = + rasterization_state->depthBiasClamp; + internal_dynamic_state->depth_bias.slope_factor = + rasterization_state->depthBiasSlopeFactor; + } + } + + /* TODO: handle viewport state flags. */ + + /* TODO: handle static viewport state. */ + /* We assume the viewport state to by dynamic for now. */ + + /* TODO: handle static scissor state. */ + /* We assume the scissor state to by dynamic for now. */ + + if (depth_stencil_state) { + const VkStencilOpState *const front = &depth_stencil_state->front; + const VkStencilOpState *const back = &depth_stencil_state->back; + + /* VkPhysicalDeviceFeatures->depthBounds is false. */ + assert(depth_stencil_state->depthBoundsTestEnable == VK_FALSE); + + if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK)) { + internal_dynamic_state->compare_mask.front = front->compareMask; + internal_dynamic_state->compare_mask.back = back->compareMask; + } + + if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK)) { + internal_dynamic_state->write_mask.front = front->writeMask; + internal_dynamic_state->write_mask.back = back->writeMask; + } + + if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE)) { + internal_dynamic_state->reference.front = front->reference; + internal_dynamic_state->reference.back = back->reference; + } + } + + if (color_blend_state && + !(dynamic_states & PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS)) { + STATIC_ASSERT(__same_type(internal_dynamic_state->blend_constants, + color_blend_state->blendConstants)); + + typed_memcpy(internal_dynamic_state->blend_constants, + color_blend_state->blendConstants, + ARRAY_SIZE(internal_dynamic_state->blend_constants)); + } + + /* TODO: handle STATIC_STATE_DEPTH_BOUNDS ? */ + + internal_dynamic_state->mask = dynamic_states; +} + +static VkResult +pvr_graphics_pipeline_init(struct pvr_device *device, + struct pvr_pipeline_cache *pipeline_cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *allocator, + struct pvr_graphics_pipeline *gfx_pipeline) +{ + /* If rasterization is not enabled, various CreateInfo structs must be + * ignored. + */ + const bool raster_discard_enabled = + pCreateInfo->pRasterizationState->rasterizerDiscardEnable; + const VkPipelineViewportStateCreateInfo *vs_info = + !raster_discard_enabled ? pCreateInfo->pViewportState : NULL; + const VkPipelineDepthStencilStateCreateInfo *dss_info = + !raster_discard_enabled ? pCreateInfo->pDepthStencilState : NULL; + const VkPipelineRasterizationStateCreateInfo *rs_info = + !raster_discard_enabled ? pCreateInfo->pRasterizationState : NULL; + const VkPipelineColorBlendStateCreateInfo *cbs_info = + !raster_discard_enabled ? pCreateInfo->pColorBlendState : NULL; + const VkPipelineMultisampleStateCreateInfo *ms_info = + !raster_discard_enabled ? pCreateInfo->pMultisampleState : NULL; + VkResult result; + + pvr_pipeline_init(device, PVR_PIPELINE_TYPE_GRAPHICS, &gfx_pipeline->base); + + pvr_finishme("ignoring pCreateInfo flags."); + pvr_finishme("ignoring pipeline cache."); + + gfx_pipeline->raster_state.discard_enable = raster_discard_enabled; + gfx_pipeline->raster_state.cull_mode = + pCreateInfo->pRasterizationState->cullMode; + gfx_pipeline->raster_state.front_face = + pCreateInfo->pRasterizationState->frontFace; + gfx_pipeline->raster_state.depth_bias_enable = + pCreateInfo->pRasterizationState->depthBiasEnable; + gfx_pipeline->raster_state.depth_clamp_enable = + pCreateInfo->pRasterizationState->depthClampEnable; + + /* FIXME: Handle depthClampEnable. */ + + pvr_graphics_pipeline_init_depth_and_stencil_state(gfx_pipeline, dss_info); + pvr_graphics_pipeline_init_dynamic_state(gfx_pipeline, + pCreateInfo->pDynamicState, + vs_info, + dss_info, + cbs_info, + rs_info); + + if (pCreateInfo->pInputAssemblyState) { + gfx_pipeline->input_asm_state.topology = + pCreateInfo->pInputAssemblyState->topology; + gfx_pipeline->input_asm_state.primitive_restart = + pCreateInfo->pInputAssemblyState->primitiveRestartEnable; + } + + memset(gfx_pipeline->stage_indices, ~0, sizeof(gfx_pipeline->stage_indices)); + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + VkShaderStageFlagBits vk_stage = pCreateInfo->pStages[i].stage; + gl_shader_stage gl_stage = vk_to_mesa_shader_stage(vk_stage); + /* From the Vulkan 1.2.192 spec for VkPipelineShaderStageCreateInfo: + * + * "stage must not be VK_SHADER_STAGE_ALL_GRAPHICS, + * or VK_SHADER_STAGE_ALL." + * + * So we don't handle that. + * + * We also don't handle VK_SHADER_STAGE_TESSELLATION_* and + * VK_SHADER_STAGE_GEOMETRY_BIT stages as 'tessellationShader' and + * 'geometryShader' are set to false in the VkPhysicalDeviceFeatures + * structure returned by the driver. + */ + switch (pCreateInfo->pStages[i].stage) { + case VK_SHADER_STAGE_VERTEX_BIT: + case VK_SHADER_STAGE_FRAGMENT_BIT: + gfx_pipeline->stage_indices[gl_stage] = i; + break; + default: + unreachable("Unsupported stage."); + } + } + + gfx_pipeline->base.layout = + pvr_pipeline_layout_from_handle(pCreateInfo->layout); + + if (ms_info) { + gfx_pipeline->rasterization_samples = ms_info->rasterizationSamples; + gfx_pipeline->sample_mask = + (ms_info->pSampleMask) ? ms_info->pSampleMask[0] : 0xFFFFFFFF; + } else { + gfx_pipeline->rasterization_samples = VK_SAMPLE_COUNT_1_BIT; + gfx_pipeline->sample_mask = 0xFFFFFFFF; + } + + /* Compiles and uploads shaders and PDS programs. */ + result = pvr_graphics_pipeline_compile(device, + pipeline_cache, + pCreateInfo, + allocator, + gfx_pipeline); + if (result != VK_SUCCESS) { + pvr_pipeline_finish(&gfx_pipeline->base); + return result; + } + + return VK_SUCCESS; +} + +/* If allocator == NULL, the internal one will be used. */ +static VkResult +pvr_graphics_pipeline_create(struct pvr_device *device, + struct pvr_pipeline_cache *pipeline_cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *allocator, + VkPipeline *const pipeline_out) +{ + struct pvr_graphics_pipeline *gfx_pipeline; + VkResult result; + + gfx_pipeline = vk_zalloc2(&device->vk.alloc, + allocator, + sizeof(*gfx_pipeline), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!gfx_pipeline) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Compiles and uploads shaders and PDS programs too. */ + result = pvr_graphics_pipeline_init(device, + pipeline_cache, + pCreateInfo, + allocator, + gfx_pipeline); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, allocator, gfx_pipeline); + return result; + } + + *pipeline_out = pvr_pipeline_to_handle(&gfx_pipeline->base); + + return VK_SUCCESS; +} + +VkResult +pvr_CreateGraphicsPipelines(VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkGraphicsPipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + PVR_FROM_HANDLE(pvr_pipeline_cache, pipeline_cache, pipelineCache); + PVR_FROM_HANDLE(pvr_device, device, _device); + VkResult result = VK_SUCCESS; + + for (uint32_t i = 0; i < createInfoCount; i++) { + const VkResult local_result = + pvr_graphics_pipeline_create(device, + pipeline_cache, + &pCreateInfos[i], + pAllocator, + &pPipelines[i]); + if (local_result != VK_SUCCESS) { + result = local_result; + pPipelines[i] = VK_NULL_HANDLE; + } + } + + return result; +} + +/***************************************************************************** + Other functions +*****************************************************************************/ + +void pvr_DestroyPipeline(VkDevice _device, + VkPipeline _pipeline, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_pipeline, pipeline, _pipeline); + PVR_FROM_HANDLE(pvr_device, device, _device); + + if (!pipeline) + return; + + switch (pipeline->type) { + case PVR_PIPELINE_TYPE_GRAPHICS: { + struct pvr_graphics_pipeline *const gfx_pipeline = + to_pvr_graphics_pipeline(pipeline); + + pvr_graphics_pipeline_destroy(device, pAllocator, gfx_pipeline); + break; + } + + case PVR_PIPELINE_TYPE_COMPUTE: { + struct pvr_compute_pipeline *const compute_pipeline = + to_pvr_compute_pipeline(pipeline); + + pvr_compute_pipeline_destroy(device, pAllocator, compute_pipeline); + break; + } + + default: + unreachable("Unknown pipeline type."); + } +} diff --git a/src/imagination/vulkan/pvr_pipeline_cache.c b/src/imagination/vulkan/pvr_pipeline_cache.c new file mode 100644 index 00000000000..fb203c42678 --- /dev/null +++ b/src/imagination/vulkan/pvr_pipeline_cache.c @@ -0,0 +1,155 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "pvr_device_info.h" +#include "pvr_private.h" +#include "util/blob.h" +#include "vk_log.h" +#include "vk_object.h" +#include "vulkan/util/vk_util.h" + +static void pvr_pipeline_cache_load(struct pvr_pipeline_cache *cache, + const void *data, + size_t size) +{ + struct pvr_device *device = cache->device; + struct pvr_physical_device *pdevice = device->pdevice; + struct vk_pipeline_cache_header header; + struct blob_reader blob; + + blob_reader_init(&blob, data, size); + + blob_copy_bytes(&blob, &header, sizeof(header)); + if (blob.overrun) + return; + + if (header.header_size < sizeof(header)) + return; + if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) + return; + if (header.vendor_id != VK_VENDOR_ID_IMAGINATION) + return; + if (header.device_id != pdevice->dev_info.ident.device_id) + return; + if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0) + return; + + /* TODO: There isn't currently any cached data so there's nothing to load + * at this point. Once there is something to load then load it now. + */ +} + +VkResult pvr_CreatePipelineCache(VkDevice _device, + const VkPipelineCacheCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineCache *pPipelineCache) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_pipeline_cache *cache; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + cache = vk_object_alloc(&device->vk, + pAllocator, + sizeof(*cache), + VK_OBJECT_TYPE_PIPELINE_CACHE); + if (!cache) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + cache->device = device; + + if (pCreateInfo->initialDataSize > 0) { + pvr_pipeline_cache_load(cache, + pCreateInfo->pInitialData, + pCreateInfo->initialDataSize); + } + + *pPipelineCache = pvr_pipeline_cache_to_handle(cache); + + return VK_SUCCESS; +} + +void pvr_DestroyPipelineCache(VkDevice _device, + VkPipelineCache _cache, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_pipeline_cache, cache, _cache); + + if (!cache) + return; + + vk_object_free(&device->vk, pAllocator, cache); +} + +VkResult pvr_GetPipelineCacheData(VkDevice _device, + VkPipelineCache _cache, + size_t *pDataSize, + void *pData) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_physical_device *pdevice = device->pdevice; + struct blob blob; + + if (pData) + blob_init_fixed(&blob, pData, *pDataSize); + else + blob_init_fixed(&blob, NULL, SIZE_MAX); + + struct vk_pipeline_cache_header header = { + .header_size = sizeof(struct vk_pipeline_cache_header), + .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, + .vendor_id = VK_VENDOR_ID_IMAGINATION, + .device_id = pdevice->dev_info.ident.device_id, + }; + memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE); + blob_write_bytes(&blob, &header, sizeof(header)); + + /* TODO: Once there's some data to cache then this should be written to + * 'blob'. + */ + + *pDataSize = blob.size; + + blob_finish(&blob); + + return VK_SUCCESS; +} + +VkResult pvr_MergePipelineCaches(VkDevice _device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache *pSrcCaches) +{ + /* TODO: Once there's some data to cache then this will need to be able to + * merge caches together. + */ + + return VK_SUCCESS; +} diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h new file mode 100644 index 00000000000..8e681d8c8a5 --- /dev/null +++ b/src/imagination/vulkan/pvr_private.h @@ -0,0 +1,1427 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * based in part on radv driver which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_PRIVATE_H +#define PVR_PRIVATE_H + +#include +#include +#include +#include + +#include "compiler/shader_enums.h" +#include "hwdef/rogue_hw_defs.h" +#include "pvr_csb.h" +#include "pvr_device_info.h" +#include "pvr_entrypoints.h" +#include "pvr_hw_pass.h" +#include "pvr_job_render.h" +#include "pvr_limits.h" +#include "pvr_pds.h" +#include "pvr_winsys.h" +#include "rogue/rogue.h" +#include "util/bitscan.h" +#include "util/format/u_format.h" +#include "util/log.h" +#include "util/macros.h" +#include "util/u_dynarray.h" +#include "vk_command_buffer.h" +#include "vk_device.h" +#include "vk_image.h" +#include "vk_instance.h" +#include "vk_log.h" +#include "vk_physical_device.h" +#include "vk_queue.h" +#include "wsi_common.h" + +#ifdef HAVE_VALGRIND +# include +# include +# define VG(x) x +#else +# define VG(x) ((void)0) +#endif + +#define VK_VENDOR_ID_IMAGINATION 0x1010 + +#define PVR_STATE_PBE_DWORDS 2U + +#define PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT \ + (uint32_t)(VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT + 1U) + +/* TODO: move into a common surface library? */ +enum pvr_memlayout { + PVR_MEMLAYOUT_UNDEFINED = 0, /* explicitly treat 0 as undefined */ + PVR_MEMLAYOUT_LINEAR, + PVR_MEMLAYOUT_TWIDDLED, + PVR_MEMLAYOUT_3DTWIDDLED, +}; + +enum pvr_cmd_buffer_status { + PVR_CMD_BUFFER_STATUS_INVALID = 0, /* explicitly treat 0 as invalid */ + PVR_CMD_BUFFER_STATUS_INITIAL, + PVR_CMD_BUFFER_STATUS_RECORDING, + PVR_CMD_BUFFER_STATUS_EXECUTABLE, +}; + +enum pvr_texture_state { + PVR_TEXTURE_STATE_SAMPLE, + PVR_TEXTURE_STATE_STORAGE, + PVR_TEXTURE_STATE_ATTACHMENT, + PVR_TEXTURE_STATE_MAX_ENUM, +}; + +enum pvr_sub_cmd_type { + PVR_SUB_CMD_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */ + PVR_SUB_CMD_TYPE_GRAPHICS, + PVR_SUB_CMD_TYPE_COMPUTE, + PVR_SUB_CMD_TYPE_TRANSFER, +}; + +enum pvr_depth_stencil_usage { + PVR_DEPTH_STENCIL_USAGE_UNDEFINED = 0, /* explicitly treat 0 as undefined */ + PVR_DEPTH_STENCIL_USAGE_NEEDED, + PVR_DEPTH_STENCIL_USAGE_NEVER, +}; + +enum pvr_job_type { + PVR_JOB_TYPE_GEOM, + PVR_JOB_TYPE_FRAG, + PVR_JOB_TYPE_COMPUTE, + PVR_JOB_TYPE_TRANSFER, + PVR_JOB_TYPE_MAX +}; + +enum pvr_pipeline_type { + PVR_PIPELINE_TYPE_INVALID = 0, /* explicitly treat 0 as undefined */ + PVR_PIPELINE_TYPE_GRAPHICS, + PVR_PIPELINE_TYPE_COMPUTE, +}; + +enum pvr_pipeline_stage_bits { + PVR_PIPELINE_STAGE_GEOM_BIT = BITFIELD_BIT(PVR_JOB_TYPE_GEOM), + PVR_PIPELINE_STAGE_FRAG_BIT = BITFIELD_BIT(PVR_JOB_TYPE_FRAG), + PVR_PIPELINE_STAGE_COMPUTE_BIT = BITFIELD_BIT(PVR_JOB_TYPE_COMPUTE), + PVR_PIPELINE_STAGE_TRANSFER_BIT = BITFIELD_BIT(PVR_JOB_TYPE_TRANSFER), +}; + +#define PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS \ + (PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT) + +#define PVR_PIPELINE_STAGE_ALL_BITS \ + (PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS | PVR_PIPELINE_STAGE_TRANSFER_BIT) + +/* TODO: This number must be changed when we add compute support. */ +#define PVR_NUM_SYNC_PIPELINE_STAGES 3U + +/* Warning: Do not define an invalid stage as 0 since other code relies on 0 + * being the first shader stage. This allows for stages to be split or added + * in the future. Defining 0 as invalid will very likely cause problems. + */ +enum pvr_stage_allocation { + PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY, + PVR_STAGE_ALLOCATION_FRAGMENT, + PVR_STAGE_ALLOCATION_COMPUTE, + PVR_STAGE_ALLOCATION_COUNT +}; + +/* Scissor accumulation state defines + * - Disabled means that a clear has been detected, and scissor accumulation + * should stop. + * - Check for clear is when there's no clear loadops, but there could be + * another clear call that would be broken via scissoring + * - Enabled means that a scissor has been set in the pipeline, and + * accumulation can continue + */ +enum pvr_scissor_accum_state { + PVR_SCISSOR_ACCUM_INVALID = 0, /* Explicitly treat 0 as invalid */ + PVR_SCISSOR_ACCUM_DISABLED, + PVR_SCISSOR_ACCUM_CHECK_FOR_CLEAR, + PVR_SCISSOR_ACCUM_ENABLED, +}; + +struct pvr_bo; +struct pvr_compute_ctx; +struct pvr_compute_pipeline; +struct pvr_free_list; +struct pvr_graphics_pipeline; +struct pvr_instance; +struct pvr_render_ctx; +struct rogue_compiler; + +struct pvr_descriptor_limits { + uint32_t max_per_stage_resources; + uint32_t max_per_stage_samplers; + uint32_t max_per_stage_uniform_buffers; + uint32_t max_per_stage_storage_buffers; + uint32_t max_per_stage_sampled_images; + uint32_t max_per_stage_storage_images; + uint32_t max_per_stage_input_attachments; +}; + +struct pvr_physical_device { + struct vk_physical_device vk; + + /* Back-pointer to instance */ + struct pvr_instance *instance; + + char *name; + int master_fd; + int render_fd; + char *master_path; + char *render_path; + + struct pvr_winsys *ws; + struct pvr_device_info dev_info; + + VkPhysicalDeviceMemoryProperties memory; + + uint8_t pipeline_cache_uuid[VK_UUID_SIZE]; + + struct wsi_device wsi_device; + + struct rogue_compiler *compiler; +}; + +struct pvr_instance { + struct vk_instance vk; + + int physical_devices_count; + struct pvr_physical_device physical_device; +}; + +struct pvr_queue { + struct vk_queue vk; + + struct pvr_device *device; + + struct pvr_render_ctx *gfx_ctx; + struct pvr_compute_ctx *compute_ctx; + + struct pvr_winsys_syncobj *completion[PVR_JOB_TYPE_MAX]; +}; + +struct pvr_semaphore { + struct vk_object_base base; + + struct pvr_winsys_syncobj *syncobj; +}; + +struct pvr_fence { + struct vk_object_base base; + + struct pvr_winsys_syncobj *syncobj; +}; + +struct pvr_vertex_binding { + struct pvr_buffer *buffer; + VkDeviceSize offset; +}; + +struct pvr_pds_upload { + struct pvr_bo *pvr_bo; + /* Offset from the pds heap base address. */ + uint32_t data_offset; + /* Offset from the pds heap base address. */ + uint32_t code_offset; + + /* data_size + code_size = program_size. */ + uint32_t data_size; + uint32_t code_size; +}; + +struct pvr_device { + struct vk_device vk; + struct pvr_instance *instance; + struct pvr_physical_device *pdevice; + + int master_fd; + int render_fd; + + struct pvr_winsys *ws; + struct pvr_winsys_heaps heaps; + + struct pvr_free_list *global_free_list; + + struct pvr_queue *queues; + uint32_t queue_count; + + /* Running count of the number of job submissions across all queue. */ + uint32_t global_queue_job_count; + + /* Running count of the number of presentations across all queues. */ + uint32_t global_queue_present_count; + + uint32_t pixel_event_data_size_in_dwords; + + struct pvr_pds_upload pds_compute_fence_program; + + VkPhysicalDeviceFeatures features; +}; + +struct pvr_device_memory { + struct vk_object_base base; + struct pvr_winsys_bo *bo; +}; + +struct pvr_mip_level { + /* Offset of the mip level in bytes */ + uint32_t offset; + + /* Aligned mip level size in bytes */ + uint32_t size; + + /* Aligned row length in bytes */ + uint32_t pitch; + + /* Aligned height in bytes */ + uint32_t height_pitch; +}; + +struct pvr_image { + struct vk_image vk; + + /* vma this image is bound to */ + struct pvr_winsys_vma *vma; + + /* Device address the image is mapped to in device virtual address space */ + pvr_dev_addr_t dev_addr; + + /* Derived and other state */ + VkExtent3D physical_extent; + enum pvr_memlayout memlayout; + VkDeviceSize layer_size; + VkDeviceSize size; + + VkDeviceSize alignment; + + struct pvr_mip_level mip_levels[14]; +}; + +struct pvr_buffer { + struct vk_object_base base; + + /* Saved information from pCreateInfo */ + VkDeviceSize size; + + /* Derived and other state */ + uint32_t alignment; + /* vma this buffer is bound to */ + struct pvr_winsys_vma *vma; + /* Device address the buffer is mapped to in device virtual address space */ + pvr_dev_addr_t dev_addr; +}; + +struct pvr_image_view { + struct vk_image_view vk; + + /* Saved information from pCreateInfo. */ + const struct pvr_image *image; + + /* Prepacked Texture Image dword 0 and 1. It will be copied to the + * descriptor info during pvr_UpdateDescriptorSets. + * + * We create separate texture states for sampling, storage and input + * attachment cases. + */ + uint64_t texture_state[PVR_TEXTURE_STATE_MAX_ENUM][2]; +}; + +struct pvr_sampler { + struct vk_object_base base; +}; + +struct pvr_descriptor_size_info { + /* Non-spillable size for storage in the common store. */ + uint32_t primary; + + /* Spillable size to accommodate limitation of the common store. */ + uint32_t secondary; + + uint32_t alignment; +}; + +struct pvr_descriptor_set_layout_binding { + VkDescriptorType type; + + /* "M" in layout(set = N, binding = M) + * Can be used to index bindings in the descriptor_set_layout. Not the + * original user specified binding number as those might be non-contiguous. + */ + uint32_t binding_number; + + uint32_t descriptor_count; + + /* Index into the flattened descriptor set */ + uint16_t descriptor_index; + + VkShaderStageFlags shader_stages; + /* Mask composed by shifted PVR_STAGE_ALLOCATION_... + * Makes it easier to check active shader stages by just shifting and + * ANDing instead of using VkShaderStageFlags and match the PVR_STAGE_... + */ + uint32_t shader_stage_mask; + + struct { + uint32_t primary; + uint32_t secondary; + } per_stage_offset_in_dwords[PVR_STAGE_ALLOCATION_COUNT]; + + /* Index at which the samplers can be found in the descriptor_set_layout. + * 0 when the samplers are at index 0 or no samplers are present. + * Check descriptor_count to differentiate. It will be 0 for 0 samplers. + */ + uint32_t immutable_samplers_index; +}; + +/* All sizes are in dwords. */ +struct pvr_descriptor_set_layout_mem_layout { + uint32_t primary_offset; + uint32_t primary_size; + + uint32_t secondary_offset; + uint32_t secondary_size; + + uint32_t primary_dynamic_size; + uint32_t secondary_dynamic_size; +}; + +struct pvr_descriptor_set_layout { + struct vk_object_base base; + + /* Total amount of descriptors contained in this set. */ + uint32_t descriptor_count; + + /* Count of dynamic buffers. */ + uint32_t dynamic_buffer_count; + + uint32_t binding_count; + struct pvr_descriptor_set_layout_binding *bindings; + + uint32_t immutable_sampler_count; + struct pvr_sampler **immutable_samplers; + + /* Shader stages requiring access to descriptors in this set. */ + VkShaderStageFlags shader_stages; + + /* Count of each VkDescriptorType per shader stage. Dynamically allocated + * arrays per stage as to not hard code the max descriptor type here. + * + * Note: when adding a new type, it might not numerically follow the + * previous type so a sparse array will be created. You might want to + * readjust how these arrays are created and accessed. + */ + uint32_t *per_stage_descriptor_count[PVR_STAGE_ALLOCATION_COUNT]; + + uint32_t total_size_in_dwords; + struct pvr_descriptor_set_layout_mem_layout + memory_layout_in_dwords_per_stage[PVR_STAGE_ALLOCATION_COUNT]; +}; + +struct pvr_descriptor_pool { + struct vk_object_base base; + + VkAllocationCallbacks alloc; + + /* Saved information from pCreateInfo. */ + uint32_t max_sets; + + uint32_t total_size_in_dwords; + uint32_t current_size_in_dwords; + + /* Derived and other state. */ + /* List of the descriptor sets created using this pool. */ + struct list_head descriptor_sets; +}; + +struct pvr_descriptor { + VkDescriptorType type; + + /* TODO: Follow anv_descriptor layout when adding support for + * other descriptor types. + */ + pvr_dev_addr_t buffer_dev_addr; + VkDeviceSize buffer_desc_range; + VkDeviceSize buffer_create_info_size; +}; + +struct pvr_descriptor_set { + struct vk_object_base base; + + const struct pvr_descriptor_set_layout *layout; + const struct pvr_descriptor_pool *pool; + + struct pvr_bo *pvr_bo; + + /* Links this descriptor set into pvr_descriptor_pool::descriptor_sets list. + */ + struct list_head link; + + /* Array of size layout::descriptor_count. */ + struct pvr_descriptor descriptors[0]; +}; + +struct pvr_descriptor_state { + struct pvr_descriptor_set *descriptor_sets[PVR_MAX_DESCRIPTOR_SETS]; + uint32_t valid_mask; +}; + +struct pvr_transfer_cmd { + /* Node to link this cmd into the transfer_cmds list in + * pvr_sub_cmd::transfer structure. + */ + struct list_head link; + + struct pvr_buffer *src; + struct pvr_buffer *dst; + uint32_t region_count; + VkBufferCopy2 regions[0]; +}; + +struct pvr_sub_cmd { + /* This links the subcommand in pvr_cmd_buffer:sub_cmds list. */ + struct list_head link; + + enum pvr_sub_cmd_type type; + + union { + struct { + const struct pvr_framebuffer *framebuffer; + + struct pvr_render_job job; + + struct pvr_bo *depth_bias_bo; + struct pvr_bo *scissor_bo; + + /* Tracking how the loaded depth/stencil values are being used. */ + enum pvr_depth_stencil_usage depth_usage; + enum pvr_depth_stencil_usage stencil_usage; + + /* Tracking whether the subcommand modifies depth/stencil. */ + bool modifies_depth; + bool modifies_stencil; + + /* Control stream builder object */ + struct pvr_csb control_stream; + + uint32_t hw_render_idx; + + uint32_t max_tiles_in_flight; + + bool empty_cmd; + + /* True if any fragment shader used in this sub command uses atomic + * operations. + */ + bool frag_uses_atomic_ops; + + bool disable_compute_overlap; + + /* True if any fragment shader used in this sub command has side + * effects. + */ + bool frag_has_side_effects; + + /* True if any vertex shader used in this sub command contains both + * texture reads and texture writes. + */ + bool vertex_uses_texture_rw; + + /* True if any fragment shader used in this sub command contains + * both texture reads and texture writes. + */ + bool frag_uses_texture_rw; + } gfx; + + struct { + /* Control stream builder object. */ + struct pvr_csb control_stream; + + struct pvr_winsys_compute_submit_info submit_info; + + uint32_t num_shared_regs; + + /* True if any shader used in this sub command uses atomic + * operations. + */ + bool uses_atomic_ops; + + bool uses_barrier; + } compute; + + struct { + /* List of pvr_transfer_cmd type structures. */ + struct list_head transfer_cmds; + } transfer; + }; +}; + +struct pvr_render_pass_info { + const struct pvr_render_pass *pass; + struct pvr_framebuffer *framebuffer; + + struct pvr_image_view **attachments; + + uint32_t subpass_idx; + uint32_t current_hw_subpass; + + VkRect2D render_area; + + uint32_t clear_value_count; + VkClearValue *clear_values; + + VkPipelineBindPoint pipeline_bind_point; + + bool process_empty_tiles; + bool enable_bg_tag; + uint32_t userpass_spawn; + + /* Have we had to scissor a depth/stencil clear because render area was not + * tile aligned? + */ + bool scissor_ds_clear; +}; + +struct pvr_emit_state { + bool ppp_control : 1; + bool isp : 1; + bool isp_fb : 1; + bool isp_ba : 1; + bool isp_bb : 1; + bool isp_dbsc : 1; + bool pds_fragment_stateptr0 : 1; + bool pds_fragment_stateptr1 : 1; + bool pds_fragment_stateptr2 : 1; + bool pds_fragment_stateptr3 : 1; + bool region_clip : 1; + bool viewport : 1; + bool wclamp : 1; + bool output_selects : 1; + bool varying_word0 : 1; + bool varying_word1 : 1; + bool varying_word2 : 1; + bool stream_out : 1; +}; + +struct pvr_ppp_state { + uint32_t header; + + struct { + /* TODO: Can we get rid of the "control" field? */ + struct pvr_cmd_struct(TA_STATE_ISPCTL) control_struct; + uint32_t control; + + uint32_t front_a; + uint32_t front_b; + uint32_t back_a; + uint32_t back_b; + } isp; + + struct { + uint16_t scissor_index; + uint16_t depthbias_index; + } depthbias_scissor_indices; + + struct { + uint32_t pixel_shader_base; + uint32_t texture_uniform_code_base; + uint32_t size_info1; + uint32_t size_info2; + uint32_t varying_base; + uint32_t texture_state_data_base; + uint32_t uniform_state_data_base; + } pds; + + struct { + uint32_t word0; + uint32_t word1; + } region_clipping; + + struct { + uint32_t a0; + uint32_t m0; + uint32_t a1; + uint32_t m1; + uint32_t a2; + uint32_t m2; + } viewports[PVR_MAX_VIEWPORTS]; + + uint32_t viewport_count; + + uint32_t output_selects; + + uint32_t varying_word[2]; + + uint32_t ppp_control; +}; + +#define PVR_DYNAMIC_STATE_BIT_VIEWPORT BITFIELD_BIT(0U) +#define PVR_DYNAMIC_STATE_BIT_SCISSOR BITFIELD_BIT(1U) +#define PVR_DYNAMIC_STATE_BIT_LINE_WIDTH BITFIELD_BIT(2U) +#define PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS BITFIELD_BIT(3U) +#define PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK BITFIELD_BIT(4U) +#define PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK BITFIELD_BIT(5U) +#define PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE BITFIELD_BIT(6U) +#define PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS BITFIELD_BIT(7U) + +#define PVR_DYNAMIC_STATE_ALL_BITS \ + ((PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS << 1U) - 1U) + +struct pvr_dynamic_state { + /* Identifies which pipeline state is static or dynamic. + * To test for dynamic: & PVR_STATE_BITS_... + */ + uint32_t mask; + + struct { + /* TODO: fixme in the original code - figure out what. */ + uint32_t count; + VkViewport viewports[PVR_MAX_VIEWPORTS]; + } viewport; + + struct { + /* TODO: fixme in the original code - figure out what. */ + uint32_t count; + VkRect2D scissors[PVR_MAX_VIEWPORTS]; + } scissor; + + /* Saved information from pCreateInfo. */ + float line_width; + + struct { + /* Saved information from pCreateInfo. */ + float constant_factor; + float clamp; + float slope_factor; + } depth_bias; + float blend_constants[4]; + struct { + uint32_t front; + uint32_t back; + } compare_mask; + struct { + uint32_t front; + uint32_t back; + } write_mask; + struct { + uint32_t front; + uint32_t back; + } reference; +}; + +struct pvr_cmd_buffer_draw_state { + uint32_t base_instance; + uint32_t base_vertex; + bool draw_indirect; + bool draw_indexed; +}; + +struct pvr_cmd_buffer_state { + VkResult status; + + /* Pipeline binding. */ + const struct pvr_graphics_pipeline *gfx_pipeline; + + const struct pvr_compute_pipeline *compute_pipeline; + + struct pvr_render_pass_info render_pass_info; + + struct pvr_sub_cmd *current_sub_cmd; + + struct pvr_ppp_state ppp_state; + + union { + struct pvr_emit_state emit_state; + /* This is intended to allow setting and clearing of all bits. This + * shouldn't be used to access specific bits of ppp_state. + */ + uint32_t emit_state_bits; + }; + + struct { + /* FIXME: Check if we need a dirty state flag for the given scissor + * accumulation state. + * Check whether these members should be moved in the top level struct + * and this struct replaces with just pvr_dynamic_state "dynamic". + */ + enum pvr_scissor_accum_state scissor_accum_state; + VkRect2D scissor_accum_bounds; + + struct pvr_dynamic_state common; + } dynamic; + + struct pvr_vertex_binding vertex_bindings[PVR_MAX_VERTEX_INPUT_BINDINGS]; + + struct { + struct pvr_buffer *buffer; + VkDeviceSize offset; + VkIndexType type; + } index_buffer_binding; + + struct { + uint8_t data[PVR_MAX_PUSH_CONSTANTS_SIZE]; + VkShaderStageFlags dirty_stages; + } push_constants; + + /* Array size of barriers_needed is based on number of sync pipeline + * stages. + */ + uint32_t barriers_needed[4]; + + struct pvr_descriptor_state gfx_desc_state; + struct pvr_descriptor_state compute_desc_state; + + VkFormat depth_format; + + struct { + bool viewport : 1; + bool scissor : 1; + + bool compute_pipeline_binding : 1; + bool compute_desc_dirty : 1; + + bool gfx_pipeline_binding : 1; + bool gfx_desc_dirty : 1; + + bool vertex_bindings : 1; + bool index_buffer_binding : 1; + bool vertex_descriptors : 1; + bool fragment_descriptors : 1; + + bool line_width : 1; + + bool depth_bias : 1; + + bool blend_constants : 1; + + bool compare_mask : 1; + bool write_mask : 1; + bool reference : 1; + + bool userpass_spawn : 1; + + /* Some draw state needs to be tracked for changes between draw calls + * i.e. if we get a draw with baseInstance=0, followed by a call with + * baseInstance=1 that needs to cause us to select a different PDS + * attrib program and update the BASE_INSTANCE PDS const. If only + * baseInstance changes then we just have to update the data section. + */ + bool draw_base_instance : 1; + bool draw_variant : 1; + } dirty; + + struct pvr_cmd_buffer_draw_state draw_state; + + struct { + uint32_t code_offset; + const struct pvr_pds_info *info; + } pds_shader; + + uint32_t max_shared_regs; + + /* Address of data segment for vertex attrib upload program. */ + uint32_t pds_vertex_attrib_offset; + + uint32_t pds_fragment_uniform_data_offset; +}; + +static_assert( + sizeof(((struct pvr_cmd_buffer_state *)(0))->emit_state) <= + sizeof(((struct pvr_cmd_buffer_state *)(0))->emit_state_bits), + "Size of emit_state_bits must be greater that or equal to emit_state."); + +struct pvr_cmd_buffer { + struct vk_command_buffer vk; + + struct pvr_device *device; + + /* Buffer status, invalid/initial/recording/executable */ + enum pvr_cmd_buffer_status status; + + /* Buffer usage flags */ + VkCommandBufferUsageFlags usage_flags; + + struct util_dynarray depth_bias_array; + + struct util_dynarray scissor_array; + uint32_t scissor_words[2]; + + struct pvr_cmd_buffer_state state; + + /* List of pvr_bo structs associated with this cmd buffer. */ + struct list_head bo_list; + + struct list_head sub_cmds; +}; + +struct pvr_pipeline_layout { + struct vk_object_base base; + + uint32_t set_count; + /* Contains set_count amount of descriptor set layouts. */ + struct pvr_descriptor_set_layout *set_layout[PVR_MAX_DESCRIPTOR_SETS]; + + VkShaderStageFlags push_constants_shader_stages; + + VkShaderStageFlags shader_stages; + + /* Per stage masks indicating which set in the layout contains any + * descriptor of the appropriate types: VK..._{SAMPLER, SAMPLED_IMAGE, + * UNIFORM_TEXEL_BUFFER, UNIFORM_BUFFER, STORAGE_BUFFER}. + * Shift by the set's number to check the mask (1U << set_num). + */ + uint32_t per_stage_descriptor_masks[PVR_STAGE_ALLOCATION_COUNT]; + + /* Array of descriptor offsets at which the set's descriptors' start, per + * stage, within all the sets in the pipeline layout per descriptor type. + * Note that we only store into for specific descriptor types + * VK_DESCRIPTOR_TYPE_{SAMPLER, SAMPLED_IMAGE, UNIFORM_TEXEL_BUFFER, + * UNIFORM_BUFFER, STORAGE_BUFFER}, the rest will be 0. + */ + uint32_t + descriptor_offsets[PVR_MAX_DESCRIPTOR_SETS][PVR_STAGE_ALLOCATION_COUNT] + [PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT]; + + /* There is no accounting for dynamics in here. They will be garbage values. + */ + struct pvr_descriptor_set_layout_mem_layout + register_layout_in_dwords_per_stage[PVR_STAGE_ALLOCATION_COUNT] + [PVR_MAX_DESCRIPTOR_SETS]; + + /* All sizes in dwords. */ + struct pvr_pipeline_layout_reg_info { + uint32_t primary_dynamic_size_in_dwords; + uint32_t secondary_dynamic_size_in_dwords; + } per_stage_reg_info[PVR_STAGE_ALLOCATION_COUNT]; +}; + +struct pvr_pipeline_cache { + struct vk_object_base base; + + struct pvr_device *device; +}; + +struct pvr_stage_allocation_uniform_state { + struct pvr_pds_upload pds_code; + /* Since we upload the code segment separately from the data segment + * pds_code->data_size might be 0 whilst + * pds_info->data_size_in_dwords might be >0 in the case of this struct + * referring to the code upload. + */ + struct pvr_pds_info pds_info; +}; + +struct pvr_pds_attrib_program { + struct pvr_pds_info info; + /* The uploaded PDS program stored here only contains the code segment, + * meaning the data size will be 0, unlike the data size stored in the + * 'info' member above. + */ + struct pvr_pds_upload program; +}; + +struct pvr_pipeline_stage_state { + uint32_t const_shared_reg_count; + uint32_t const_shared_reg_offset; + uint32_t temps_count; + + uint32_t coefficient_size; + + /* True if this shader uses any atomic operations. */ + bool uses_atomic_ops; + + /* True if this shader uses both texture reads and texture writes. */ + bool uses_texture_rw; + + /* Only used for compute stage. */ + bool uses_barrier; + + /* True if this shader has side effects */ + bool has_side_effects; + + /* True if this shader is simply a nop.end. */ + bool empty_program; +}; + +struct pvr_vertex_shader_state { + /* Pointer to a buffer object that contains the shader binary. */ + struct pvr_bo *bo; + uint32_t entry_offset; + + /* 2 since we only need STATE_VARYING{0,1} state words. */ + uint32_t varying[2]; + + struct pvr_pds_attrib_program + pds_attrib_programs[PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT]; + + struct pvr_pipeline_stage_state stage_state; + /* FIXME: Move this into stage_state? */ + struct pvr_stage_allocation_uniform_state uniform_state; + uint32_t vertex_input_size; + uint32_t vertex_output_size; + uint32_t output_selects; + uint32_t user_clip_planes_mask; +}; + +struct pvr_fragment_shader_state { + /* Pointer to a buffer object that contains the shader binary. */ + struct pvr_bo *bo; + uint32_t entry_offset; + + struct pvr_pipeline_stage_state stage_state; + /* FIXME: Move this into stage_state? */ + struct pvr_stage_allocation_uniform_state uniform_state; + uint32_t pass_type; + + struct pvr_pds_upload pds_coeff_program; + struct pvr_pds_upload pds_fragment_program; +}; + +struct pvr_pipeline { + struct vk_object_base base; + + enum pvr_pipeline_type type; + + /* Saved information from pCreateInfo. */ + struct pvr_pipeline_layout *layout; +}; + +struct pvr_compute_pipeline { + struct pvr_pipeline base; + + struct { + /* Pointer to a buffer object that contains the shader binary. */ + struct pvr_bo *bo; + + struct { + uint32_t base_workgroup : 1; + } flags; + + struct pvr_stage_allocation_uniform_state uniform; + + struct pvr_pds_upload primary_program; + struct pvr_pds_info primary_program_info; + + struct pvr_pds_upload primary_program_base_workgroup_variant; + struct pvr_pds_info primary_program_base_workgroup_variant_info; + /* Offset within the PDS data section at which the base workgroup id + * resides. + */ + uint32_t base_workgroup_ids_dword_offset; + } state; +}; + +struct pvr_graphics_pipeline { + struct pvr_pipeline base; + + VkSampleCountFlagBits rasterization_samples; + struct pvr_raster_state { + /* Derived and other state. */ + /* Indicates whether primitives are discarded immediately before the + * rasterization stage. + */ + bool discard_enable; + VkCullModeFlags cull_mode; + VkFrontFace front_face; + bool depth_bias_enable; + bool depth_clamp_enable; + } raster_state; + struct { + VkPrimitiveTopology topology; + bool primitive_restart; + } input_asm_state; + uint32_t sample_mask; + + struct pvr_dynamic_state dynamic_state; + + VkCompareOp depth_compare_op; + bool depth_write_disable; + + struct { + VkCompareOp compare_op; + /* SOP1 */ + VkStencilOp fail_op; + /* SOP2 */ + VkStencilOp depth_fail_op; + /* SOP3 */ + VkStencilOp pass_op; + } stencil_front, stencil_back; + + /* Derived and other state */ + size_t stage_indices[MESA_SHADER_FRAGMENT + 1]; + + struct pvr_vertex_shader_state vertex_shader_state; + struct pvr_fragment_shader_state fragment_shader_state; +}; + +struct pvr_render_target { + struct pvr_rt_dataset *rt_dataset; + + pthread_mutex_t mutex; + + bool valid; +}; + +struct pvr_framebuffer { + struct vk_object_base base; + + /* Saved information from pCreateInfo. */ + uint32_t width; + uint32_t height; + uint32_t layers; + + uint32_t attachment_count; + struct pvr_image_view **attachments; + + /* Derived and other state. */ + struct pvr_bo *ppp_state_bo; + /* PPP state size in dwords. */ + size_t ppp_state_size; + + uint32_t render_targets_count; + struct pvr_render_target *render_targets; +}; + +struct pvr_render_pass_attachment { + /* Saved information from pCreateInfo. */ + VkAttachmentLoadOp load_op; + + VkAttachmentStoreOp store_op; + + VkAttachmentLoadOp stencil_load_op; + + VkAttachmentStoreOp stencil_store_op; + + VkFormat vk_format; + uint32_t sample_count; + VkImageLayout initial_layout; + + /* Derived and other state. */ + /* True if the attachment format includes a stencil component. */ + bool has_stencil; + + /* Can this surface be resolved by the PBE. */ + bool is_pbe_downscalable; + + uint32_t index; +}; + +struct pvr_render_subpass { + /* Saved information from pCreateInfo. */ + /* The number of samples per color attachment (or depth attachment if + * z-only). + */ + /* FIXME: rename to 'samples' to match struct pvr_image */ + uint32_t sample_count; + + uint32_t color_count; + uint32_t *color_attachments; + uint32_t *resolve_attachments; + + uint32_t input_count; + uint32_t *input_attachments; + + uint32_t *depth_stencil_attachment; + + /* Derived and other state. */ + uint32_t dep_count; + uint32_t *dep_list; + + /* Array with dep_count elements. flush_on_dep[x] is true if this subpass + * and the subpass dep_list[x] can't be in the same hardware render. + */ + bool *flush_on_dep; + + uint32_t index; + + uint32_t userpass_spawn; + + VkPipelineBindPoint pipeline_bind_point; +}; + +struct pvr_render_pass { + struct vk_object_base base; + + /* Saved information from pCreateInfo. */ + uint32_t attachment_count; + + struct pvr_render_pass_attachment *attachments; + + uint32_t subpass_count; + + struct pvr_render_subpass *subpasses; + + struct pvr_renderpass_hwsetup *hw_setup; + + /* Derived and other state. */ + /* FIXME: rename to 'max_samples' as we use 'samples' elsewhere */ + uint32_t max_sample_count; + + /* The maximum number of tile buffers to use in any subpass. */ + uint32_t max_tilebuffer_count; +}; + +struct pvr_load_op { + bool is_hw_object; + + uint32_t clear_mask; + + struct pvr_bo *usc_frag_prog_bo; + uint32_t const_shareds_count; + uint32_t shareds_dest_offset; + uint32_t shareds_count; + + struct pvr_pds_upload pds_frag_prog; + + struct pvr_pds_upload pds_tex_state_prog; + uint32_t temps_count; +}; + +VkResult pvr_wsi_init(struct pvr_physical_device *pdevice); +void pvr_wsi_finish(struct pvr_physical_device *pdevice); + +VkResult pvr_queues_create(struct pvr_device *device, + const VkDeviceCreateInfo *pCreateInfo); +void pvr_queues_destroy(struct pvr_device *device); + +VkResult pvr_bind_memory(struct pvr_device *device, + struct pvr_device_memory *mem, + VkDeviceSize offset, + VkDeviceSize size, + VkDeviceSize alignment, + struct pvr_winsys_vma **const vma_out, + pvr_dev_addr_t *const dev_addr_out); +void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma); + +VkResult pvr_gpu_upload(struct pvr_device *device, + struct pvr_winsys_heap *heap, + const void *data, + size_t size, + uint64_t alignment, + struct pvr_bo **const pvr_bo_out); +VkResult pvr_gpu_upload_pds(struct pvr_device *device, + const uint32_t *data, + uint32_t data_size_dwords, + uint32_t data_alignment, + const uint32_t *code, + uint32_t code_size_dwords, + uint32_t code_alignment, + uint64_t min_alignment, + struct pvr_pds_upload *const pds_upload_out); + +VkResult pvr_gpu_upload_usc(struct pvr_device *device, + const void *code, + size_t code_size, + uint64_t code_alignment, + struct pvr_bo **const pvr_bo_out); + +VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer, + struct pvr_transfer_cmd *transfer_cmd); + +VkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer, + struct pvr_winsys_heap *heap, + uint64_t size, + uint32_t flags, + struct pvr_bo **const pvr_bo_out); + +static inline struct pvr_compute_pipeline * +to_pvr_compute_pipeline(struct pvr_pipeline *pipeline) +{ + assert(pipeline->type == PVR_PIPELINE_TYPE_COMPUTE); + return container_of(pipeline, struct pvr_compute_pipeline, base); +} + +static inline struct pvr_graphics_pipeline * +to_pvr_graphics_pipeline(struct pvr_pipeline *pipeline) +{ + assert(pipeline->type == PVR_PIPELINE_TYPE_GRAPHICS); + return container_of(pipeline, struct pvr_graphics_pipeline, base); +} + +/* FIXME: Place this in USC specific header? */ +/* clang-format off */ +static inline enum PVRX(PDSINST_DOUTU_SAMPLE_RATE) +pvr_sample_rate_from_usc_msaa_mode(enum rogue_msaa_mode msaa_mode) +/* clang-format on */ +{ + switch (msaa_mode) { + case ROGUE_MSAA_MODE_PIXEL: + return PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE); + case ROGUE_MSAA_MODE_SELECTIVE: + return PVRX(PDSINST_DOUTU_SAMPLE_RATE_SELECTIVE); + case ROGUE_MSAA_MODE_FULL: + return PVRX(PDSINST_DOUTU_SAMPLE_RATE_FULL); + default: + unreachable("Undefined MSAA mode."); + } +} + +VkResult pvr_pds_fragment_program_create_and_upload( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + const struct pvr_bo *fragment_shader_bo, + uint32_t fragment_temp_count, + enum rogue_msaa_mode msaa_mode, + bool has_phase_rate_change, + struct pvr_pds_upload *const pds_upload_out); + +#define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \ + VK_FROM_HANDLE(__pvr_type, __name, __handle) + +VK_DEFINE_HANDLE_CASTS(pvr_cmd_buffer, + vk.base, + VkCommandBuffer, + VK_OBJECT_TYPE_COMMAND_BUFFER) +VK_DEFINE_HANDLE_CASTS(pvr_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) +VK_DEFINE_HANDLE_CASTS(pvr_instance, + vk.base, + VkInstance, + VK_OBJECT_TYPE_INSTANCE) +VK_DEFINE_HANDLE_CASTS(pvr_physical_device, + vk.base, + VkPhysicalDevice, + VK_OBJECT_TYPE_PHYSICAL_DEVICE) +VK_DEFINE_HANDLE_CASTS(pvr_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE) + +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_device_memory, + base, + VkDeviceMemory, + VK_OBJECT_TYPE_DEVICE_MEMORY) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline_cache, + base, + VkPipelineCache, + VK_OBJECT_TYPE_PIPELINE_CACHE) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_buffer, base, VkBuffer, VK_OBJECT_TYPE_BUFFER) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image_view, + vk.base, + VkImageView, + VK_OBJECT_TYPE_IMAGE_VIEW) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set_layout, + base, + VkDescriptorSetLayout, + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set, + base, + VkDescriptorSet, + VK_OBJECT_TYPE_DESCRIPTOR_SET) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_pool, + base, + VkDescriptorPool, + VK_OBJECT_TYPE_DESCRIPTOR_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_sampler, + base, + VkSampler, + VK_OBJECT_TYPE_SAMPLER) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_semaphore, + base, + VkSemaphore, + VK_OBJECT_TYPE_SEMAPHORE) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_fence, base, VkFence, VK_OBJECT_TYPE_FENCE) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline_layout, + base, + VkPipelineLayout, + VK_OBJECT_TYPE_PIPELINE_LAYOUT) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline, + base, + VkPipeline, + VK_OBJECT_TYPE_PIPELINE) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_framebuffer, + base, + VkFramebuffer, + VK_OBJECT_TYPE_FRAMEBUFFER) +VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_render_pass, + base, + VkRenderPass, + VK_OBJECT_TYPE_RENDER_PASS) + +/** + * Warn on ignored extension structs. + * + * The Vulkan spec requires us to ignore unsupported or unknown structs in + * a pNext chain. In debug mode, emitting warnings for ignored structs may + * help us discover structs that we should not have ignored. + * + * + * From the Vulkan 1.0.38 spec: + * + * Any component of the implementation (the loader, any enabled layers, + * and drivers) must skip over, without processing (other than reading the + * sType and pNext members) any chained structures with sType values not + * defined by extensions supported by that component. + */ +#define pvr_debug_ignored_stype(sType) \ + mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType)) + +/* Debug helper macros. */ +#define PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer) \ + do { \ + struct pvr_cmd_buffer *const _cmd_buffer = (cmd_buffer); \ + if (_cmd_buffer->status != PVR_CMD_BUFFER_STATUS_RECORDING) { \ + vk_errorf(_cmd_buffer, \ + VK_ERROR_OUT_OF_DEVICE_MEMORY, \ + "Command buffer is not in recording state"); \ + return; \ + } else if (_cmd_buffer->state.status < VK_SUCCESS) { \ + vk_errorf(_cmd_buffer, \ + _cmd_buffer->state.status, \ + "Skipping function as command buffer has " \ + "previous build error"); \ + return; \ + } \ + } while (0) + +/** + * Print a FINISHME message, including its source location. + */ +#define pvr_finishme(format, ...) \ + do { \ + static bool reported = false; \ + if (!reported) { \ + mesa_logw("%s:%d: FINISHME: " format, \ + __FILE__, \ + __LINE__, \ + ##__VA_ARGS__); \ + reported = true; \ + } \ + } while (false) + +/* A non-fatal assert. Useful for debugging. */ +#ifdef DEBUG +# define pvr_assert(x) \ + ({ \ + if (unlikely(!(x))) \ + mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \ + }) +#else +# define pvr_assert(x) +#endif + +#endif /* PVR_PRIVATE_H */ diff --git a/src/imagination/vulkan/pvr_query.c b/src/imagination/vulkan/pvr_query.c new file mode 100644 index 00000000000..0c6300730f2 --- /dev/null +++ b/src/imagination/vulkan/pvr_query.c @@ -0,0 +1,88 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pvr_private.h" + +VkResult pvr_CreateQueryPool(VkDevice _device, + const VkQueryPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkQueryPool *pQueryPool) +{ + assert(!"Unimplemented"); + return VK_SUCCESS; +} + +void pvr_DestroyQueryPool(VkDevice _device, + VkQueryPool queryPool, + const VkAllocationCallbacks *pAllocator) +{ + assert(!"Unimplemented"); +} + +VkResult pvr_GetQueryPoolResults(VkDevice _device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + size_t dataSize, + void *pData, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + assert(!"Unimplemented"); + return VK_SUCCESS; +} + +void pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags) +{ + assert(!"Unimplemented"); +} + +void pvr_CmdEndQuery(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query) +{ + assert(!"Unimplemented"); +} diff --git a/src/imagination/vulkan/pvr_queue.c b/src/imagination/vulkan/pvr_queue.c new file mode 100644 index 00000000000..00b75752f6c --- /dev/null +++ b/src/imagination/vulkan/pvr_queue.c @@ -0,0 +1,773 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * based in part on radv driver which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * This file implements VkQueue, VkFence, and VkSemaphore + */ + +#include +#include +#include +#include +#include +#include + +#include "pvr_job_compute.h" +#include "pvr_job_context.h" +#include "pvr_job_render.h" +#include "pvr_limits.h" +#include "pvr_private.h" +#include "util/macros.h" +#include "util/u_atomic.h" +#include "vk_alloc.h" +#include "vk_log.h" +#include "vk_object.h" +#include "vk_queue.h" +#include "vk_util.h" + +static VkResult pvr_queue_init(struct pvr_device *device, + struct pvr_queue *queue, + const VkDeviceQueueCreateInfo *pCreateInfo, + uint32_t index_in_family) +{ + struct pvr_compute_ctx *compute_ctx; + struct pvr_render_ctx *gfx_ctx; + VkResult result; + + result = + vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family); + if (result != VK_SUCCESS) + return result; + + result = pvr_compute_ctx_create(device, + PVR_WINSYS_CTX_PRIORITY_MEDIUM, + &compute_ctx); + if (result != VK_SUCCESS) + goto err_vk_queue_finish; + + result = + pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx); + if (result != VK_SUCCESS) + goto err_compute_ctx_destroy; + + queue->device = device; + queue->gfx_ctx = gfx_ctx; + queue->compute_ctx = compute_ctx; + + for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++) + queue->completion[i] = NULL; + + return VK_SUCCESS; + +err_compute_ctx_destroy: + pvr_compute_ctx_destroy(compute_ctx); + +err_vk_queue_finish: + vk_queue_finish(&queue->vk); + + return result; +} + +VkResult pvr_queues_create(struct pvr_device *device, + const VkDeviceCreateInfo *pCreateInfo) +{ + VkResult result; + + /* Check requested queue families and queues */ + assert(pCreateInfo->queueCreateInfoCount == 1); + assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0); + assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES); + + const VkDeviceQueueCreateInfo *queue_create = queue_create = + &pCreateInfo->pQueueCreateInfos[0]; + + device->queues = vk_alloc(&device->vk.alloc, + queue_create->queueCount * sizeof(*device->queues), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!device->queues) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + device->queue_count = 0; + + for (uint32_t i = 0; i < queue_create->queueCount; i++) { + result = pvr_queue_init(device, &device->queues[i], queue_create, i); + if (result != VK_SUCCESS) + goto err_queues_finish; + + device->queue_count++; + } + + return VK_SUCCESS; + +err_queues_finish: + pvr_queues_destroy(device); + return result; +} + +static void pvr_queue_finish(struct pvr_queue *queue) +{ + for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++) { + if (queue->completion[i]) + queue->device->ws->ops->syncobj_destroy(queue->completion[i]); + } + + pvr_render_ctx_destroy(queue->gfx_ctx); + pvr_compute_ctx_destroy(queue->compute_ctx); + + vk_queue_finish(&queue->vk); +} + +void pvr_queues_destroy(struct pvr_device *device) +{ + for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++) + pvr_queue_finish(&device->queues[q_idx]); + + vk_free(&device->vk.alloc, device->queues); +} + +VkResult pvr_QueueWaitIdle(VkQueue _queue) +{ + PVR_FROM_HANDLE(pvr_queue, queue, _queue); + + return queue->device->ws->ops->syncobjs_wait(queue->device->ws, + queue->completion, + ARRAY_SIZE(queue->completion), + true, + UINT64_MAX); +} + +VkResult pvr_CreateFence(VkDevice _device, + const VkFenceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFence *pFence) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_fence *fence; + VkResult result; + + fence = vk_object_alloc(&device->vk, + pAllocator, + sizeof(*fence), + VK_OBJECT_TYPE_FENCE); + if (!fence) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* We don't really need to create a syncobj here unless it's a signaled + * fence. + */ + if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) { + result = + device->ws->ops->syncobj_create(device->ws, true, &fence->syncobj); + if (result != VK_SUCCESS) { + vk_object_free(&device->vk, pAllocator, fence); + return result; + } + } else { + fence->syncobj = NULL; + } + + *pFence = pvr_fence_to_handle(fence); + + return VK_SUCCESS; +} + +void pvr_DestroyFence(VkDevice _device, + VkFence _fence, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_fence, fence, _fence); + + if (!fence) + return; + + if (fence->syncobj) + device->ws->ops->syncobj_destroy(fence->syncobj); + + vk_object_free(&device->vk, pAllocator, fence); +} + +VkResult +pvr_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences) +{ + struct pvr_winsys_syncobj *syncobjs[fenceCount]; + PVR_FROM_HANDLE(pvr_device, device, _device); + + for (uint32_t i = 0; i < fenceCount; i++) { + PVR_FROM_HANDLE(pvr_fence, fence, pFences[i]); + + syncobjs[i] = fence->syncobj; + } + + return device->ws->ops->syncobjs_reset(device->ws, syncobjs, fenceCount); +} + +VkResult pvr_GetFenceStatus(VkDevice _device, VkFence _fence) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_fence, fence, _fence); + VkResult result; + + result = + device->ws->ops->syncobjs_wait(device->ws, &fence->syncobj, 1U, true, 0U); + if (result == VK_TIMEOUT) + return VK_NOT_READY; + + return result; +} + +VkResult pvr_WaitForFences(VkDevice _device, + uint32_t fenceCount, + const VkFence *pFences, + VkBool32 waitAll, + uint64_t timeout) +{ + struct pvr_winsys_syncobj *syncobjs[fenceCount]; + PVR_FROM_HANDLE(pvr_device, device, _device); + + for (uint32_t i = 0; i < fenceCount; i++) { + PVR_FROM_HANDLE(pvr_fence, fence, pFences[i]); + + syncobjs[i] = fence->syncobj; + } + + return device->ws->ops->syncobjs_wait(device->ws, + syncobjs, + fenceCount, + !!waitAll, + timeout); +} + +VkResult pvr_CreateSemaphore(VkDevice _device, + const VkSemaphoreCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSemaphore *pSemaphore) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_semaphore *semaphore; + + semaphore = vk_object_alloc(&device->vk, + pAllocator, + sizeof(*semaphore), + VK_OBJECT_TYPE_SEMAPHORE); + if (!semaphore) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + semaphore->syncobj = NULL; + + *pSemaphore = pvr_semaphore_to_handle(semaphore); + + return VK_SUCCESS; +} + +void pvr_DestroySemaphore(VkDevice _device, + VkSemaphore _semaphore, + const VkAllocationCallbacks *pAllocator) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + PVR_FROM_HANDLE(pvr_semaphore, semaphore, _semaphore); + + if (semaphore->syncobj) + device->ws->ops->syncobj_destroy(semaphore->syncobj); + + vk_object_free(&device->vk, pAllocator, semaphore); +} + +static enum pvr_pipeline_stage_bits +pvr_convert_stage_mask(VkPipelineStageFlags stage_mask) +{ + enum pvr_pipeline_stage_bits stages = 0; + + if (stage_mask & VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT || + stage_mask & VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) { + return PVR_PIPELINE_STAGE_ALL_BITS; + } + + if (stage_mask & (VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT)) + stages |= PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS; + + if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) { + stages |= PVR_PIPELINE_STAGE_GEOM_BIT; + } + + if (stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { + stages |= PVR_PIPELINE_STAGE_FRAG_BIT; + } + + if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) { + assert(!"Unimplemented"); + } + + if (stage_mask & (VK_PIPELINE_STAGE_TRANSFER_BIT)) + stages |= PVR_PIPELINE_STAGE_TRANSFER_BIT; + + return stages; +} + +static VkResult pvr_process_graphics_cmd( + struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_cmd_buffer *cmd_buffer, + struct pvr_sub_cmd *sub_cmd, + const VkSemaphore *semaphores, + uint32_t semaphore_count, + uint32_t *stage_flags, + struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX]) +{ + const struct pvr_framebuffer *framebuffer = sub_cmd->gfx.framebuffer; + struct pvr_winsys_syncobj *syncobj_geom = NULL; + struct pvr_winsys_syncobj *syncobj_frag = NULL; + uint32_t bo_count = 0; + VkResult result; + + STACK_ARRAY(struct pvr_winsys_job_bo, bos, framebuffer->attachment_count); + if (!bos) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* FIXME: DoShadowLoadOrStore() */ + + /* FIXME: If the framebuffer being rendered to has multiple layers then we + * need to split submissions that run a fragment job into two. + */ + if (sub_cmd->gfx.job.run_frag && framebuffer->layers > 1) + pvr_finishme("Split job submission for framebuffers with > 1 layers"); + + /* Get any imported buffers used in framebuffer attachments. */ + for (uint32_t i = 0U; i < framebuffer->attachment_count; i++) { + if (!framebuffer->attachments[i]->image->vma->bo->is_imported) + continue; + + bos[bo_count].bo = framebuffer->attachments[i]->image->vma->bo; + bos[bo_count].flags = PVR_WINSYS_JOB_BO_FLAG_WRITE; + bo_count++; + } + + /* This passes ownership of the wait fences to pvr_render_job_submit(). */ + result = pvr_render_job_submit(queue->gfx_ctx, + &sub_cmd->gfx.job, + bos, + bo_count, + semaphores, + semaphore_count, + stage_flags, + &syncobj_geom, + &syncobj_frag); + STACK_ARRAY_FINISH(bos); + if (result != VK_SUCCESS) + return result; + + /* Replace the completion fences. */ + if (syncobj_geom) { + if (completions[PVR_JOB_TYPE_GEOM]) + device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_GEOM]); + + completions[PVR_JOB_TYPE_GEOM] = syncobj_geom; + } + + if (syncobj_frag) { + if (completions[PVR_JOB_TYPE_FRAG]) + device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_FRAG]); + + completions[PVR_JOB_TYPE_FRAG] = syncobj_frag; + } + + /* FIXME: DoShadowLoadOrStore() */ + + return result; +} + +static VkResult pvr_process_compute_cmd( + struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_sub_cmd *sub_cmd, + const VkSemaphore *semaphores, + uint32_t semaphore_count, + uint32_t *stage_flags, + struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX]) +{ + struct pvr_winsys_syncobj *syncobj = NULL; + VkResult result; + + /* This passes ownership of the wait fences to pvr_compute_job_submit(). */ + result = pvr_compute_job_submit(queue->compute_ctx, + sub_cmd, + semaphores, + semaphore_count, + stage_flags, + &syncobj); + if (result != VK_SUCCESS) + return result; + + /* Replace the completion fences. */ + if (syncobj) { + if (completions[PVR_JOB_TYPE_COMPUTE]) + device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_COMPUTE]); + + completions[PVR_JOB_TYPE_COMPUTE] = syncobj; + } + + return result; +} + +/* FIXME: Implement gpu based transfer support. */ +static VkResult pvr_process_transfer_cmds( + struct pvr_device *device, + struct pvr_sub_cmd *sub_cmd, + const VkSemaphore *semaphores, + uint32_t semaphore_count, + uint32_t *stage_flags, + struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX]) +{ + /* Wait for transfer semaphores here before doing any transfers. */ + for (uint32_t i = 0; i < semaphore_count; i++) { + PVR_FROM_HANDLE(pvr_semaphore, sem, semaphores[i]); + + if (sem->syncobj && stage_flags[i] & PVR_PIPELINE_STAGE_TRANSFER_BIT) { + VkResult result = device->ws->ops->syncobjs_wait(device->ws, + &sem->syncobj, + 1, + true, + UINT64_MAX); + if (result != VK_SUCCESS) + return result; + + stage_flags[i] &= ~PVR_PIPELINE_STAGE_TRANSFER_BIT; + if (stage_flags[i] == 0) { + device->ws->ops->syncobj_destroy(sem->syncobj); + sem->syncobj = NULL; + } + } + } + + list_for_each_entry_safe (struct pvr_transfer_cmd, + transfer_cmd, + &sub_cmd->transfer.transfer_cmds, + link) { + bool src_mapped = false; + bool dst_mapped = false; + void *src_addr; + void *dst_addr; + void *ret_ptr; + + /* Map if bo is not mapped. */ + if (!transfer_cmd->src->vma->bo->map) { + src_mapped = true; + ret_ptr = device->ws->ops->buffer_map(transfer_cmd->src->vma->bo); + if (!ret_ptr) + return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED); + } + + if (!transfer_cmd->dst->vma->bo->map) { + dst_mapped = true; + ret_ptr = device->ws->ops->buffer_map(transfer_cmd->dst->vma->bo); + if (!ret_ptr) + return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED); + } + + src_addr = + transfer_cmd->src->vma->bo->map + transfer_cmd->src->vma->bo_offset; + dst_addr = + transfer_cmd->dst->vma->bo->map + transfer_cmd->dst->vma->bo_offset; + + for (uint32_t i = 0; i < transfer_cmd->region_count; i++) { + VkBufferCopy2 *region = &transfer_cmd->regions[i]; + + memcpy(dst_addr + region->dstOffset, + src_addr + region->srcOffset, + region->size); + } + + if (src_mapped) + device->ws->ops->buffer_unmap(transfer_cmd->src->vma->bo); + + if (dst_mapped) + device->ws->ops->buffer_unmap(transfer_cmd->dst->vma->bo); + } + + /* Given we are doing CPU based copy, completion fence should always be -1. + * This should be fixed when GPU based copy is implemented. + */ + assert(!completions[PVR_JOB_TYPE_TRANSFER]); + + return VK_SUCCESS; +} + +static VkResult pvr_set_semaphore_payloads( + struct pvr_device *device, + struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX], + const VkSemaphore *semaphores, + uint32_t semaphore_count) +{ + struct pvr_winsys_syncobj *syncobj = NULL; + VkResult result; + + if (!semaphore_count) + return VK_SUCCESS; + + for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) { + if (completions[i]) { + result = + device->ws->ops->syncobjs_merge(completions[i], syncobj, &syncobj); + if (result != VK_SUCCESS) + goto err_destroy_syncobj; + } + } + + for (uint32_t i = 0; i < semaphore_count; i++) { + PVR_FROM_HANDLE(pvr_semaphore, semaphore, semaphores[i]); + struct pvr_winsys_syncobj *dup_signal_fence; + + /* Duplicate signal_fence and store it in each signal semaphore. */ + result = + device->ws->ops->syncobjs_merge(syncobj, NULL, &dup_signal_fence); + if (result != VK_SUCCESS) + goto err_destroy_syncobj; + + if (semaphore->syncobj) + device->ws->ops->syncobj_destroy(semaphore->syncobj); + semaphore->syncobj = dup_signal_fence; + } + +err_destroy_syncobj: + if (syncobj) + device->ws->ops->syncobj_destroy(syncobj); + + return result; +} + +static VkResult pvr_set_fence_payload( + struct pvr_device *device, + struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX], + VkFence _fence) +{ + PVR_FROM_HANDLE(pvr_fence, fence, _fence); + struct pvr_winsys_syncobj *syncobj = NULL; + + for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) { + if (completions[i]) { + VkResult result = + device->ws->ops->syncobjs_merge(completions[i], syncobj, &syncobj); + if (result != VK_SUCCESS) { + device->ws->ops->syncobj_destroy(syncobj); + return result; + } + } + } + + if (fence->syncobj) + device->ws->ops->syncobj_destroy(fence->syncobj); + fence->syncobj = syncobj; + + return VK_SUCCESS; +} + +static VkResult pvr_process_cmd_buffer( + struct pvr_device *device, + struct pvr_queue *queue, + VkCommandBuffer commandBuffer, + const VkSemaphore *semaphores, + uint32_t semaphore_count, + uint32_t *stage_flags, + struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX]) +{ + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + VkResult result; + + assert(cmd_buffer->status == PVR_CMD_BUFFER_STATUS_EXECUTABLE); + + list_for_each_entry_safe (struct pvr_sub_cmd, + sub_cmd, + &cmd_buffer->sub_cmds, + link) { + switch (sub_cmd->type) { + case PVR_SUB_CMD_TYPE_GRAPHICS: + result = pvr_process_graphics_cmd(device, + queue, + cmd_buffer, + sub_cmd, + semaphores, + semaphore_count, + stage_flags, + completions); + break; + + case PVR_SUB_CMD_TYPE_COMPUTE: + result = pvr_process_compute_cmd(device, + queue, + sub_cmd, + semaphores, + semaphore_count, + stage_flags, + completions); + break; + + case PVR_SUB_CMD_TYPE_TRANSFER: + result = pvr_process_transfer_cmds(device, + sub_cmd, + semaphores, + semaphore_count, + stage_flags, + completions); + break; + + default: + pvr_finishme("Unsupported sub-command type %d", sub_cmd->type); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (result != VK_SUCCESS) { + cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INVALID; + return result; + } + + p_atomic_inc(&device->global_queue_job_count); + } + + return VK_SUCCESS; +} + +static VkResult pvr_process_empty_job( + struct pvr_device *device, + const VkSemaphore *semaphores, + uint32_t semaphore_count, + uint32_t *stage_flags, + struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX]) +{ + for (uint32_t i = 0; i < semaphore_count; i++) { + PVR_FROM_HANDLE(pvr_semaphore, semaphore, semaphores[i]); + + if (!semaphore->syncobj) + continue; + + for (uint32_t j = 0; j < PVR_NUM_SYNC_PIPELINE_STAGES; j++) { + if (stage_flags[i] & (1U << j)) { + VkResult result = + device->ws->ops->syncobjs_merge(semaphore->syncobj, + completions[j], + &completions[j]); + if (result != VK_SUCCESS) + return result; + } + } + + device->ws->ops->syncobj_destroy(semaphore->syncobj); + semaphore->syncobj = NULL; + } + + return VK_SUCCESS; +} + +static void +pvr_update_syncobjs(struct pvr_device *device, + struct pvr_winsys_syncobj *src[static PVR_JOB_TYPE_MAX], + struct pvr_winsys_syncobj *dst[static PVR_JOB_TYPE_MAX]) +{ + for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) { + if (src[i]) { + if (dst[i]) + device->ws->ops->syncobj_destroy(dst[i]); + + dst[i] = src[i]; + } + } +} + +VkResult pvr_QueueSubmit(VkQueue _queue, + uint32_t submitCount, + const VkSubmitInfo *pSubmits, + VkFence fence) +{ + PVR_FROM_HANDLE(pvr_queue, queue, _queue); + struct pvr_winsys_syncobj *completion_syncobjs[PVR_JOB_TYPE_MAX] = {}; + struct pvr_device *device = queue->device; + VkResult result; + + for (uint32_t i = 0; i < submitCount; i++) { + struct pvr_winsys_syncobj + *per_submit_completion_syncobjs[PVR_JOB_TYPE_MAX] = {}; + const VkSubmitInfo *desc = &pSubmits[i]; + uint32_t stage_flags[desc->waitSemaphoreCount]; + + for (uint32_t j = 0; j < desc->waitSemaphoreCount; j++) + stage_flags[j] = pvr_convert_stage_mask(desc->pWaitDstStageMask[j]); + + if (desc->commandBufferCount > 0U) { + for (uint32_t j = 0U; j < desc->commandBufferCount; j++) { + result = pvr_process_cmd_buffer(device, + queue, + desc->pCommandBuffers[j], + desc->pWaitSemaphores, + desc->waitSemaphoreCount, + stage_flags, + per_submit_completion_syncobjs); + if (result != VK_SUCCESS) + return result; + } + } else { + result = pvr_process_empty_job(device, + desc->pWaitSemaphores, + desc->waitSemaphoreCount, + stage_flags, + per_submit_completion_syncobjs); + if (result != VK_SUCCESS) + return result; + } + + if (desc->signalSemaphoreCount) { + result = pvr_set_semaphore_payloads(device, + per_submit_completion_syncobjs, + desc->pSignalSemaphores, + desc->signalSemaphoreCount); + if (result != VK_SUCCESS) + return result; + } + + pvr_update_syncobjs(device, + per_submit_completion_syncobjs, + completion_syncobjs); + } + + if (fence) { + result = pvr_set_fence_payload(device, completion_syncobjs, fence); + if (result != VK_SUCCESS) + return result; + } + + pvr_update_syncobjs(device, completion_syncobjs, queue->completion); + + return VK_SUCCESS; +} diff --git a/src/imagination/vulkan/pvr_shader.c b/src/imagination/vulkan/pvr_shader.c new file mode 100644 index 00000000000..a0063d8993e --- /dev/null +++ b/src/imagination/vulkan/pvr_shader.c @@ -0,0 +1,104 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "compiler/shader_enums.h" +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "pvr_private.h" +#include "pvr_shader.h" +#include "rogue/rogue.h" +#include "rogue/rogue_shader.h" +#include "spirv/nir_spirv.h" +#include "vk_format.h" +#include "vk_shader_module.h" +#include "vk_util.h" + +/** + * \file pvr_shader.c + * + * \brief Contains top-level functions to compile SPIR-V -> NIR -> Rogue, and + * interfaces with the compiler. + */ + +/** + * \brief Converts a SPIR-V shader to NIR. + * + * \param[in] ctx Shared multi-stage build context. + * \param[in] stage Shader stage. + * \param[in] create_info Shader creation info from Vulkan pipeline. + * \return A nir_shader* if successful, or NULL if unsuccessful. + */ +nir_shader *pvr_spirv_to_nir(struct rogue_build_ctx *ctx, + gl_shader_stage stage, + const VkPipelineShaderStageCreateInfo *create_info) +{ + VK_FROM_HANDLE(vk_shader_module, module, create_info->module); + struct nir_spirv_specialization *spec; + unsigned num_spec = 0; + nir_shader *nir; + + spec = + vk_spec_info_to_nir_spirv(create_info->pSpecializationInfo, &num_spec); + + nir = rogue_spirv_to_nir(ctx, + stage, + create_info->pName, + module->size / sizeof(uint32_t), + (uint32_t *)module->data, + num_spec, + spec); + + free(spec); + + return nir; +} + +/** + * \brief Converts a NIR shader to Rogue. + * + * \param[in] ctx Shared multi-stage build context. + * \param[in] nir NIR shader. + * \return A rogue_shader* if successful, or NULL if unsuccessful. + */ +struct rogue_shader *pvr_nir_to_rogue(struct rogue_build_ctx *ctx, + nir_shader *nir) +{ + return rogue_nir_to_rogue(ctx, nir); +} + +/** + * \brief Converts a Rogue shader to binary. + * + * \param[in] ctx Shared multi-stage build context. + * \param[in] shader Rogue shader. + * \return A rogue_shader_binary* if successful, or NULL if unsuccessful. + */ +struct rogue_shader_binary *pvr_rogue_to_binary(struct rogue_build_ctx *ctx, + struct rogue_shader *shader) +{ + return rogue_to_binary(ctx, shader); +} diff --git a/src/imagination/vulkan/pvr_shader.h b/src/imagination/vulkan/pvr_shader.h new file mode 100644 index 00000000000..c52265485d1 --- /dev/null +++ b/src/imagination/vulkan/pvr_shader.h @@ -0,0 +1,48 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_SHADER_H +#define PVR_SHADER_H + +#include + +#include "compiler/shader_enums.h" +#include "nir/nir.h" +#include "vulkan/vulkan.h" + +struct rogue_build_ctx; +struct rogue_compiler; +struct rogue_shader; + +nir_shader * +pvr_spirv_to_nir(struct rogue_build_ctx *ctx, + gl_shader_stage stage, + const VkPipelineShaderStageCreateInfo *create_info); + +struct rogue_shader *pvr_nir_to_rogue(struct rogue_build_ctx *ctx, + nir_shader *nir); + +struct rogue_shader_binary *pvr_rogue_to_binary(struct rogue_build_ctx *ctx, + struct rogue_shader *rogue); + +#endif /* PVR_SHADER_H */ diff --git a/src/imagination/vulkan/pvr_tex_state.c b/src/imagination/vulkan/pvr_tex_state.c new file mode 100644 index 00000000000..4ad37ee51bd --- /dev/null +++ b/src/imagination/vulkan/pvr_tex_state.c @@ -0,0 +1,208 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "hwdef/rogue_hw_defs.h" +#include "pvr_csb.h" +#include "pvr_device_info.h" +#include "pvr_formats.h" +#include "pvr_private.h" +#include "pvr_tex_state.h" +#include "util/macros.h" +#include "util/u_math.h" +#include "vk_format.h" +#include "vk_log.h" + +static enum ROGUE_TEXSTATE_SWIZ pvr_get_hw_swizzle(VkComponentSwizzle comp, + enum pipe_swizzle swz) +{ + switch (swz) { + case PIPE_SWIZZLE_0: + return ROGUE_TEXSTATE_SWIZ_SRC_ZERO; + case PIPE_SWIZZLE_1: + return ROGUE_TEXSTATE_SWIZ_SRC_ONE; + case PIPE_SWIZZLE_X: + return ROGUE_TEXSTATE_SWIZ_SRCCHAN_0; + case PIPE_SWIZZLE_Y: + return ROGUE_TEXSTATE_SWIZ_SRCCHAN_1; + case PIPE_SWIZZLE_Z: + return ROGUE_TEXSTATE_SWIZ_SRCCHAN_2; + case PIPE_SWIZZLE_W: + return ROGUE_TEXSTATE_SWIZ_SRCCHAN_3; + case PIPE_SWIZZLE_NONE: + if (comp == VK_COMPONENT_SWIZZLE_A) + return ROGUE_TEXSTATE_SWIZ_SRC_ONE; + else + return ROGUE_TEXSTATE_SWIZ_SRC_ZERO; + default: + unreachable("Unknown enum pipe_swizzle"); + }; +} + +VkResult +pvr_pack_tex_state(struct pvr_device *device, + struct pvr_texture_state_info *info, + uint64_t state[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS]) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + uint32_t texture_type; + + pvr_csb_pack (&state[0], TEXSTATE_IMAGE_WORD0, word0) { + /* Determine texture type */ + if (info->is_cube && info->tex_state_type == PVR_TEXTURE_STATE_SAMPLE) { + word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_CUBE); + } else if (info->mem_layout == PVR_MEMLAYOUT_TWIDDLED || + info->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) { + if (info->type == VK_IMAGE_VIEW_TYPE_3D) { + word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_3D); + } else if (info->type == VK_IMAGE_VIEW_TYPE_1D || + info->type == VK_IMAGE_VIEW_TYPE_1D_ARRAY) { + word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_1D); + } else if (info->type == VK_IMAGE_VIEW_TYPE_2D || + info->type == VK_IMAGE_VIEW_TYPE_2D_ARRAY) { + word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_2D); + } else { + return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED); + } + } else if (info->mem_layout == PVR_MEMLAYOUT_LINEAR) { + word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_STRIDE); + } else { + return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED); + } + + word0.texformat = pvr_get_tex_format(info->format); + word0.smpcnt = util_logbase2(info->sample_count); + word0.swiz0 = + pvr_get_hw_swizzle(VK_COMPONENT_SWIZZLE_R, info->swizzle[0]); + word0.swiz1 = + pvr_get_hw_swizzle(VK_COMPONENT_SWIZZLE_G, info->swizzle[1]); + word0.swiz2 = + pvr_get_hw_swizzle(VK_COMPONENT_SWIZZLE_B, info->swizzle[2]); + word0.swiz3 = + pvr_get_hw_swizzle(VK_COMPONENT_SWIZZLE_A, info->swizzle[3]); + + /* Gamma */ + if (vk_format_is_srgb(info->format)) { + /* Gamma for 2 Component Formats has to be handled differently. */ + if (vk_format_get_nr_components(info->format) == 2) { + /* Enable Gamma only for Channel 0 if Channel 1 is an Alpha + * Channel. + */ + if (vk_format_has_alpha(info->format)) { + word0.twocomp_gamma = PVRX(TEXSTATE_TWOCOMP_GAMMA_R); + } else { + /* Otherwise Enable Gamma for both the Channels. */ + word0.twocomp_gamma = PVRX(TEXSTATE_TWOCOMP_GAMMA_RG); + + /* If Channel 0 happens to be the Alpha Channel, the + * ALPHA_MSB bit would not be set thereby disabling Gamma + * for Channel 0. + */ + } + } else { + word0.gamma = PVRX(TEXSTATE_GAMMA_ON); + } + } + + word0.width = info->extent.width - 1; + if (info->type != VK_IMAGE_VIEW_TYPE_1D || + info->type != VK_IMAGE_VIEW_TYPE_1D_ARRAY) + word0.height = info->extent.height - 1; + } + + /* Texture type specific stuff (word 1) */ + if (texture_type == PVRX(TEXSTATE_TEXTYPE_STRIDE)) { + pvr_csb_pack (&state[1], TEXSTATE_STRIDE_IMAGE_WORD1, word1) { + word1.stride = info->stride; + word1.num_mip_levels = info->mip_levels; + word1.mipmaps_present = info->mipmaps_present; + + word1.texaddr = info->addr; + word1.texaddr.addr += info->offset; + + if (vk_format_is_alpha_on_msb(info->format)) + word1.alpha_msb = true; + + if (!PVR_HAS_FEATURE(dev_info, tpu_extended_integer_lookup) && + !PVR_HAS_FEATURE(dev_info, tpu_image_state_v2)) { + if (info->flags & PVR_TEXFLAGS_INDEX_LOOKUP || + info->flags & PVR_TEXFLAGS_BUFFER) + word1.index_lookup = true; + } + + if (info->flags & PVR_TEXFLAGS_BUFFER) + word1.mipmaps_present = false; + + if (PVR_HAS_FEATURE(dev_info, tpu_image_state_v2) && + vk_format_is_compressed(info->format)) + word1.tpu_image_state_v2_compression_mode = + PVRX(TEXSTATE_COMPRESSION_MODE_TPU); + } + } else { + pvr_csb_pack (&state[1], TEXSTATE_IMAGE_WORD1, word1) { + word1.num_mip_levels = info->mip_levels; + word1.mipmaps_present = info->mipmaps_present; + word1.baselevel = info->base_level; + + if (info->extent.depth > 0) { + word1.depth = info->extent.depth - 1; + } else if (PVR_HAS_FEATURE(dev_info, tpu_array_textures)) { + uint32_t array_layers = info->array_size; + + if (info->type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY && + info->tex_state_type == PVR_TEXTURE_STATE_SAMPLE) + array_layers /= 6; + + word1.depth = array_layers - 1; + } + + word1.texaddr = info->addr; + word1.texaddr.addr += info->offset; + + if (!PVR_HAS_FEATURE(dev_info, tpu_extended_integer_lookup) && + !PVR_HAS_FEATURE(dev_info, tpu_image_state_v2)) { + if (info->flags & PVR_TEXFLAGS_INDEX_LOOKUP || + info->flags & PVR_TEXFLAGS_BUFFER) + word1.index_lookup = true; + } + + if (info->flags & PVR_TEXFLAGS_BUFFER) + word1.mipmaps_present = false; + + if (info->flags & PVR_TEXFLAGS_BORDER) + word1.border = true; + + if (vk_format_is_alpha_on_msb(info->format)) + word1.alpha_msb = true; + + if (PVR_HAS_FEATURE(dev_info, tpu_image_state_v2) && + vk_format_is_compressed(info->format)) + word1.tpu_image_state_v2_compression_mode = + PVRX(TEXSTATE_COMPRESSION_MODE_TPU); + } + } + + return VK_SUCCESS; +} diff --git a/src/imagination/vulkan/pvr_tex_state.h b/src/imagination/vulkan/pvr_tex_state.h new file mode 100644 index 00000000000..f6f18d93173 --- /dev/null +++ b/src/imagination/vulkan/pvr_tex_state.h @@ -0,0 +1,111 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_TEX_STATE_H +#define PVR_TEX_STATE_H + +#include +#include + +#include "hwdef/rogue_hw_defs.h" +#include "pvr_private.h" +#include "util/macros.h" + +/** + * Texture requires 32bit index lookups instead of texture coordinate access. + */ +#define PVR_TEXFLAGS_INDEX_LOOKUP BITFIELD_BIT(0U) + +/** Texture has border texels present. */ +#define PVR_TEXFLAGS_BORDER BITFIELD_BIT(1U) + +/** + * Resource is actually a buffer, not a texture, and therefore LOD is ignored. + * Coordinates are integers. + */ +#define PVR_TEXFLAGS_BUFFER BITFIELD_BIT(2U) + +/** Parameters for #pvr_pack_tex_state(). */ +struct pvr_texture_state_info { + VkFormat format; + enum pvr_memlayout mem_layout; + uint32_t flags; + VkImageViewType type; + bool is_cube; + enum pvr_texture_state tex_state_type; + VkExtent3D extent; + + /** + * For array textures, this holds the array dimension, in elements. This can + * be zero if texture is not an array. + */ + uint32_t array_size; + + /** Base mipmap level. This is the miplevel you want as the top level. */ + uint32_t base_level; + + /** + * Number of mipmap levels that should be accessed by HW. This is not + * necessarily the number of levels that are in memory. (See + * mipmaps_present) + */ + uint32_t mip_levels; + + /** + * True if the texture is mipmapped. + * Note: This is based on the number of mip levels the texture contains, not + * on the mip levels that are being used i.e. mip_levels. + */ + bool mipmaps_present; + + /** + * Number of samples per texel for multisampling. This should be 1 for none + * multisampled textures. + */ + uint32_t sample_count; + + /** Stride, in pixels. Only valid if mem_layout is stride or tiled. */ + uint32_t stride; + + /** + * For buffers, where TPU_BUFFER_LOOKUP is present, this defines + * the offset for the buffer, in texels. + */ + uint32_t offset; + + /** + * Precomputed (composed from createinfo->components and format swizzle) + * swizzles to pass in to the texture state. + */ + uint8_t swizzle[4]; + + /** Address of texture, which must be aligned to at least 32bits. */ + pvr_dev_addr_t addr; +}; + +VkResult +pvr_pack_tex_state(struct pvr_device *device, + struct pvr_texture_state_info *info, + uint64_t state[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS]); + +#endif /* PVR_TEX_STATE_H */ diff --git a/src/imagination/vulkan/pvr_wsi.c b/src/imagination/vulkan/pvr_wsi.c new file mode 100644 index 00000000000..aadf1869ace --- /dev/null +++ b/src/imagination/vulkan/pvr_wsi.c @@ -0,0 +1,124 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * based on intel anv code: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "pvr_private.h" +#include "pvr_winsys.h" +#include "util/u_atomic.h" +#include "wsi_common.h" + +static PFN_vkVoidFunction pvr_wsi_proc_addr(VkPhysicalDevice physicalDevice, + const char *pName) +{ + PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); + + return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName); +} + +VkResult pvr_wsi_init(struct pvr_physical_device *pdevice) +{ + VkResult result; + + result = wsi_device_init(&pdevice->wsi_device, + pvr_physical_device_to_handle(pdevice), + pvr_wsi_proc_addr, + &pdevice->vk.instance->alloc, + pdevice->master_fd, + NULL, + false); + if (result != VK_SUCCESS) + return result; + + pdevice->wsi_device.supports_modifiers = true; + pdevice->vk.wsi_device = &pdevice->wsi_device; + + return VK_SUCCESS; +} + +void pvr_wsi_finish(struct pvr_physical_device *pdevice) +{ + pdevice->vk.wsi_device = NULL; + wsi_device_finish(&pdevice->wsi_device, &pdevice->vk.instance->alloc); +} + +VkResult pvr_QueuePresentKHR(VkQueue _queue, + const VkPresentInfoKHR *pPresentInfo) +{ + PVR_FROM_HANDLE(pvr_queue, queue, _queue); + VkResult result; + + result = wsi_common_queue_present(&queue->device->pdevice->wsi_device, + pvr_device_to_handle(queue->device), + _queue, + 0, + pPresentInfo); + if (result != VK_SUCCESS) + return result; + + p_atomic_inc(&queue->device->global_queue_present_count); + + return VK_SUCCESS; +} + +VkResult pvr_AcquireNextImage2KHR(VkDevice _device, + const VkAcquireNextImageInfoKHR *pAcquireInfo, + uint32_t *pImageIndex) +{ + PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_winsys_syncobj *handles[2]; + uint32_t count = 0U; + VkResult result; + VkResult ret; + + result = wsi_common_acquire_next_image2(&device->pdevice->wsi_device, + _device, + pAcquireInfo, + pImageIndex); + if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR) + return result; + + if (pAcquireInfo->fence) { + PVR_FROM_HANDLE(pvr_fence, fence, pAcquireInfo->fence); + handles[count++] = fence->syncobj; + } + + if (pAcquireInfo->semaphore) { + PVR_FROM_HANDLE(pvr_semaphore, semaphore, pAcquireInfo->semaphore); + handles[count++] = semaphore->syncobj; + } + + if (count == 0U) + return result; + + /* We need to preserve VK_SUBOPTIMAL_KHR status. */ + ret = device->ws->ops->syncobjs_signal(device->ws, handles, count); + if (ret != VK_SUCCESS) + return ret; + + return result; +} diff --git a/src/imagination/vulkan/usc/programs/pvr_cdm_load_sr.h b/src/imagination/vulkan/usc/programs/pvr_cdm_load_sr.h new file mode 100644 index 00000000000..99b36258282 --- /dev/null +++ b/src/imagination/vulkan/usc/programs/pvr_cdm_load_sr.h @@ -0,0 +1,120 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_CDM_LOAD_SR_H +#define PVR_CDM_LOAD_SR_H + +#include + +/* clang-format off */ +static const uint8_t pvr_cdm_load_sr_code[] = { + 0x25, 0x02, 0x87, 0x81, + 0x04, 0x00, 0x00, 0x00, + 0x84, 0x04, 0x25, 0x02, + 0x87, 0x80, 0x04, 0x00, + 0x00, 0x00, 0x85, 0x04, + 0x25, 0x02, 0x87, 0x83, + 0x04, 0x00, 0x00, 0x00, + 0x86, 0x04, 0x25, 0x02, + 0x87, 0x82, 0x04, 0x00, + 0x00, 0x00, 0x87, 0x04, + 0x56, 0x20, 0xF1, 0x85, + 0x02, 0x80, 0x81, 0xD0, + 0xC4, 0x08, 0x00, 0xFF, + 0x02, 0x80, 0x6A, 0xFF, + 0x67, 0xF0, 0x40, 0x20, + 0x41, 0x8C, 0x80, 0x40, + 0x00, 0x50, 0x8F, 0xC0, + 0x80, 0x02, 0x04, 0x81, + 0x60, 0x00, 0x0A, 0x01, + 0x00, 0x00, 0x25, 0x36, + 0x87, 0x87, 0x00, 0x00, + 0x40, 0x05, 0xD1, 0x06, + 0x55, 0x20, 0xF1, 0x81, + 0x02, 0x00, 0xC0, 0xC6, + 0x08, 0x00, 0x02, 0x80, + 0x6A, 0xFF, 0x46, 0x42, + 0xD0, 0x03, 0xEA, 0xD1, + 0x41, 0x00, 0x01, 0x00, + 0x00, 0x51, 0x27, 0x06, + 0xEB, 0x84, 0x50, 0x20, + 0x86, 0x87, 0x04, 0x00, + 0xC0, 0x06, 0x87, 0x22, + 0x25, 0x32, 0x87, 0x87, + 0x00, 0x1F, 0x40, 0xC5, + 0x0C, 0xFF, 0x25, 0x02, + 0x87, 0xC0, 0x0C, 0x00, + 0x00, 0x00, 0x83, 0x0C, + 0x47, 0x42, 0xD0, 0x03, + 0xEA, 0x85, 0x41, 0x90, + 0x01, 0x08, 0x00, 0x00, + 0x85, 0x0C, 0x47, 0x42, + 0xD0, 0x03, 0xEA, 0xC5, + 0x41, 0x90, 0x01, 0x08, + 0x00, 0x00, 0xC5, 0x0C, + 0x67, 0xF0, 0x40, 0x28, + 0x42, 0x8C, 0x80, 0x40, + 0x80, 0xC5, 0x80, 0x90, + 0x80, 0xFF, 0x04, 0x81, + 0x60, 0x00, 0xCC, 0xFF, + 0xFF, 0xFF, 0x66, 0xF0, + 0x40, 0x28, 0x42, 0x8C, + 0x80, 0x40, 0x00, 0x51, + 0xD0, 0x80, 0x04, 0x81, + 0x60, 0x00, 0x86, 0xFF, + 0xFF, 0xFF, 0x46, 0x40, + 0xF1, 0xB0, 0xE2, 0x81, + 0x4D, 0x01, 0x00, 0x00, + 0x00, 0xFF, 0x04, 0x81, + 0x60, 0x00, 0x32, 0x00, + 0x00, 0x00, 0x02, 0x80, + 0x6C, 0xC4, 0x45, 0x12, + 0xD3, 0x3F, 0x01, 0x00, + 0x00, 0xAB, 0x01, 0xFF, + 0x44, 0x12, 0xD3, 0x3F, + 0x00, 0x00, 0x00, 0x40, + 0x44, 0x10, 0xD3, 0x3F, + 0x40, 0x00, 0x00, 0xFF, + 0x02, 0x80, 0x6C, 0x84, + 0x04, 0x80, 0x60, 0x00, + 0x40, 0x00, 0x00, 0x00, + 0x46, 0x50, 0xFB, 0xB0, + 0x87, 0xE2, 0x81, 0x4B, + 0x03, 0x00, 0x00, 0x00, + 0x04, 0x81, 0x61, 0x00, + 0xF4, 0xFF, 0xFF, 0xFF, + 0x02, 0x80, 0x6C, 0xC4, + 0x45, 0x12, 0xD3, 0x3F, + 0x01, 0x00, 0x00, 0xAA, + 0x01, 0xFF, 0x44, 0x12, + 0xD3, 0x3F, 0x00, 0x00, + 0x00, 0x40, 0x44, 0x10, + 0xD3, 0x3F, 0x40, 0x00, + 0x00, 0xFF, 0x03, 0x80, + 0x6C, 0x84, 0xF1, 0xFF, + 0x04, 0x80, 0xEE, 0x00, + 0xF2, 0xFF, 0xFF, 0xFF +}; +/* clang-format on */ + +#endif /* PVR_CDM_LOAD_SR_H */ diff --git a/src/imagination/vulkan/usc/programs/pvr_end_of_tile.h b/src/imagination/vulkan/usc/programs/pvr_end_of_tile.h new file mode 100644 index 00000000000..2ec962cdda8 --- /dev/null +++ b/src/imagination/vulkan/usc/programs/pvr_end_of_tile.h @@ -0,0 +1,46 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_END_OF_TILE_H +#define PVR_END_OF_TILE_H + +#include + +/* clang-format off */ +static const uint8_t pvr_end_of_tile_program[] = { + 0xa9, 0xf2, 0x40, 0x00, + 0x47, 0x91, 0x00, 0x50, + 0x04, 0x00, 0x80, 0x40, + 0x00, 0x00, 0x80, 0x80, + 0x24, 0xff, 0xa9, 0xf2, + 0x40, 0x00, 0x47, 0x91, + 0x20, 0x20, 0x08, 0x00, + 0x80, 0x40, 0x00, 0x00, + 0x80, 0x80, 0x25, 0xff, + 0x45, 0xa0, 0x80, 0xc2, + 0xa4, 0x40, 0x00, 0x25, + 0x00, 0x00 +}; +/* clang-format on */ + +#endif /* PVR_END_OF_TILE_H */ diff --git a/src/imagination/vulkan/usc/programs/pvr_usc_compute_shader.h b/src/imagination/vulkan/usc/programs/pvr_usc_compute_shader.h new file mode 100644 index 00000000000..0d59c7577ff --- /dev/null +++ b/src/imagination/vulkan/usc/programs/pvr_usc_compute_shader.h @@ -0,0 +1,102 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Auto-generated file - don't edit */ + +#ifndef PVR_USC_COMPUTE_SHADER_H +#define PVR_USC_COMPUTE_SHADER_H + +#include + +/* clang-format off */ +uint8_t pvr_usc_compute_shader[] = { + 0x44, 0x12, 0xd3, 0x3f, + 0x00, 0x00, 0x00, 0x24, + 0x46, 0x40, 0xf9, 0xb0, + 0x87, 0x80, 0x40, 0xa0, + 0x2a, 0x30, 0x00, 0x02, + 0x04, 0x81, 0x61, 0x00, + 0x54, 0x00, 0x00, 0x00, + 0x02, 0x80, 0x6c, 0xc4, + 0x46, 0x40, 0xf9, 0xb0, + 0x87, 0x81, 0x40, 0xa0, + 0x2d, 0x10, 0x00, 0x02, + 0x46, 0x12, 0xd3, 0x3f, + 0x80, 0xca, 0x83, 0x10, + 0x00, 0x00, 0x25, 0xff, + 0x46, 0x13, 0xd3, 0x3f, + 0x80, 0xcb, 0x83, 0x10, + 0x00, 0x00, 0x25, 0xff, + 0x46, 0x40, 0xf9, 0xb0, + 0x87, 0x80, 0x40, 0xa0, + 0x25, 0x10, 0x00, 0x02, + 0x04, 0x81, 0x61, 0x00, + 0x14, 0x00, 0x00, 0x00, + 0x02, 0x80, 0x6c, 0x44, + 0x04, 0x80, 0x60, 0x00, + 0xc0, 0xff, 0xff, 0xff, + 0x02, 0x80, 0x6c, 0x04, + 0x89, 0x52, 0xdf, 0x3c, + 0xfc, 0xa0, 0x9c, 0x1e, + 0x87, 0x87, 0x80, 0xcf, + 0x90, 0x11, 0x01, 0xa0, + 0x25, 0xff, 0x46, 0x40, + 0xff, 0xd0, 0x87, 0xa5, + 0x40, 0xa0, 0x00, 0x10, + 0x00, 0x02, 0x44, 0x82, + 0x67, 0x38, 0x24, 0x00, + 0x24, 0xff, 0x04, 0x80, + 0x60, 0x04, 0x68, 0x00, + 0x00, 0x00, 0x45, 0x12, + 0xd3, 0x3f, 0xc0, 0x04, + 0x00, 0x00, 0x00, 0x25, + 0x27, 0x02, 0xeb, 0xa5, + 0x44, 0xa0, 0x00, 0x80, + 0x81, 0x08, 0x00, 0xc0, + 0x80, 0x04, 0x27, 0x04, + 0xeb, 0xa5, 0x44, 0xa0, + 0x00, 0x80, 0x81, 0x08, + 0x00, 0xc0, 0x81, 0x04, + 0x55, 0x20, 0xf1, 0x84, + 0x02, 0x00, 0x82, 0xc0, + 0x18, 0x00, 0x02, 0x80, + 0x6a, 0xff, 0x27, 0x02, + 0xeb, 0xa5, 0x44, 0xa0, + 0x02, 0x80, 0x83, 0x08, + 0x00, 0xc0, 0x80, 0x04, + 0x27, 0x04, 0xeb, 0xa5, + 0x44, 0xa0, 0x02, 0x80, + 0x83, 0x08, 0x00, 0xc0, + 0x81, 0x04, 0x66, 0x20, + 0xf2, 0x86, 0xb8, 0x28, + 0x00, 0x82, 0xc0, 0x18, + 0x00, 0xff, 0x02, 0x80, + 0x6a, 0xff, 0x45, 0x82, + 0x67, 0x24, 0x24, 0x00, + 0x24, 0xff, 0xf1, 0xff, + 0x04, 0x80, 0xee, 0x00, + 0xf2, 0xff, 0xff, 0xff +}; +/* clang-format on */ + +#endif /* PVR_USC_COMPUTE_SHADER_H */ diff --git a/src/imagination/vulkan/usc/programs/pvr_usc_fragment_shader.h b/src/imagination/vulkan/usc/programs/pvr_usc_fragment_shader.h new file mode 100644 index 00000000000..838a155beca --- /dev/null +++ b/src/imagination/vulkan/usc/programs/pvr_usc_fragment_shader.h @@ -0,0 +1,40 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Auto-generated file - don't edit */ + +#ifndef PVR_USC_FRAGMENT_SHADER_H +#define PVR_USC_FRAGMENT_SHADER_H + +#include + +/* clang-format off */ +static const uint8_t pvr_usc_fragment_shader[] = { + 0x58, 0x9a, 0x80, 0xd3, + 0x3f, 0x80, 0x08, 0x00, + 0x00, 0x00, 0x20, 0xff, + 0xf2, 0xff, 0xff, 0xff +}; +/* clang-format on */ + +#endif /* PVR_USC_FRAGMENT_SHADER_H */ diff --git a/src/imagination/vulkan/usc/programs/pvr_vdm_load_sr.h b/src/imagination/vulkan/usc/programs/pvr_vdm_load_sr.h new file mode 100644 index 00000000000..ab9709655f1 --- /dev/null +++ b/src/imagination/vulkan/usc/programs/pvr_vdm_load_sr.h @@ -0,0 +1,95 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Auto-generated file - don't edit */ + +#ifndef PVR_VDM_LOAD_SR_H +#define PVR_VDM_LOAD_SR_H + +#include + +/* clang-format off */ +static const uint8_t pvr_vdm_load_sr_code[] = +{ + 0x25, 0x02, 0x87, 0x81, + 0x04, 0x00, 0x00, 0x00, + 0x84, 0x04, 0x25, 0x02, + 0x87, 0x80, 0x04, 0x00, + 0x00, 0x00, 0x85, 0x04, + 0x25, 0x02, 0x87, 0x83, + 0x04, 0x00, 0x00, 0x00, + 0x86, 0x04, 0x25, 0x02, + 0x87, 0x82, 0x04, 0x00, + 0x00, 0x00, 0x87, 0x04, + 0x56, 0x20, 0xF1, 0x85, + 0x02, 0x80, 0x81, 0xD0, + 0xC4, 0x08, 0x00, 0xFF, + 0x02, 0x80, 0x6A, 0xFF, + 0x67, 0xF0, 0x40, 0x20, + 0x41, 0x8C, 0x80, 0x40, + 0x00, 0x50, 0x8F, 0xC0, + 0x80, 0x02, 0x04, 0x81, + 0x60, 0x00, 0x9A, 0x00, + 0x00, 0x00, 0x25, 0x36, + 0x87, 0x87, 0x00, 0x00, + 0x40, 0x05, 0xD1, 0x06, + 0x55, 0x20, 0xF1, 0x81, + 0x02, 0x00, 0xC0, 0xC6, + 0x08, 0x00, 0x02, 0x80, + 0x6A, 0xFF, 0x46, 0x42, + 0xD0, 0x03, 0xEA, 0xD1, + 0x41, 0x00, 0x01, 0x00, + 0x00, 0x51, 0x27, 0x06, + 0xEB, 0x84, 0x50, 0x20, + 0x86, 0x87, 0x04, 0x00, + 0xC0, 0x06, 0x87, 0x22, + 0x25, 0x32, 0x87, 0x87, + 0x00, 0x1F, 0x40, 0xC5, + 0x0C, 0xFF, 0x25, 0x02, + 0x87, 0xC0, 0x0C, 0x00, + 0x00, 0x00, 0x83, 0x0C, + 0x47, 0x42, 0xD0, 0x03, + 0xEA, 0x85, 0x41, 0x90, + 0x01, 0x08, 0x00, 0x00, + 0x85, 0x0C, 0x47, 0x42, + 0xD0, 0x03, 0xEA, 0xC5, + 0x41, 0x90, 0x01, 0x08, + 0x00, 0x00, 0xC5, 0x0C, + 0x67, 0xF0, 0x40, 0x28, + 0x42, 0x8C, 0x80, 0x40, + 0x80, 0xC5, 0x80, 0x90, + 0x80, 0xFF, 0x04, 0x81, + 0x60, 0x00, 0xCC, 0xFF, + 0xFF, 0xFF, 0x66, 0xF0, + 0x40, 0x28, 0x42, 0x8C, + 0x80, 0x40, 0x00, 0x51, + 0xD0, 0x80, 0x07, 0x81, + 0x60, 0x00, 0x86, 0xFF, + 0xFF, 0xFF, 0xF3, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + 0x04, 0x80, 0xEE, 0x00, + 0xF2, 0xFF, 0xFF, 0xFF +}; +/* clang-format on */ + +#endif /* PVR_VDM_LOAD_SR_H */ diff --git a/src/imagination/vulkan/usc/programs/pvr_vdm_store_sr.h b/src/imagination/vulkan/usc/programs/pvr_vdm_store_sr.h new file mode 100644 index 00000000000..40aa57e0a53 --- /dev/null +++ b/src/imagination/vulkan/usc/programs/pvr_vdm_store_sr.h @@ -0,0 +1,117 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Auto-generated file - don't edit */ + +#ifndef PVR_VDM_STORE_SR_H +#define PVR_VDM_STORE_SR_H + +#include + +/* clang-format off */ +static const uint8_t pvr_vdm_store_sr_code[] = +{ + 0x25, 0x02, 0x87, 0x81, + 0x04, 0x00, 0x00, 0x00, + 0x40, 0xFF, 0x25, 0x02, + 0x87, 0x80, 0x04, 0x00, + 0x00, 0x00, 0x41, 0xFF, + 0x25, 0x02, 0x87, 0x83, + 0x04, 0x00, 0x00, 0x00, + 0x86, 0x04, 0x25, 0x02, + 0x87, 0x82, 0x04, 0x00, + 0x00, 0x00, 0x87, 0x04, + 0x68, 0xF2, 0x40, 0x20, + 0x41, 0x8C, 0x80, 0x40, + 0x80, 0x27, 0x20, 0x8F, + 0xC0, 0x80, 0x02, 0x42, + 0x66, 0x20, 0xF2, 0x84, + 0xB8, 0x28, 0x80, 0xA2, + 0xC2, 0xA0, 0x00, 0xFF, + 0x44, 0x20, 0xE0, 0x00, + 0xC0, 0xA1, 0x00, 0x00, + 0x02, 0x80, 0x6A, 0xFF, + 0x04, 0x81, 0x60, 0x00, + 0xE8, 0x00, 0x00, 0x00, + 0x25, 0x36, 0x87, 0x87, + 0x00, 0x00, 0x40, 0x05, + 0xC3, 0x06, 0x25, 0x02, + 0xE2, 0x90, 0x50, 0x00, + 0x00, 0x00, 0x00, 0x45, + 0x25, 0x02, 0xE2, 0xC5, + 0x44, 0x00, 0x00, 0x00, + 0x00, 0x45, 0x89, 0xF2, + 0x40, 0x21, 0x49, 0x9C, + 0xC0, 0x00, 0x80, 0x40, + 0x00, 0x42, 0x8F, 0xC6, + 0x80, 0x02, 0x44, 0xFF, + 0x04, 0x81, 0x60, 0x00, + 0x62, 0x00, 0x00, 0x00, + 0x68, 0x20, 0xE2, 0x88, + 0xB8, 0x28, 0x80, 0x64, + 0x00, 0x00, 0x83, 0xC6, + 0x98, 0x08, 0x00, 0xFF, + 0x45, 0x20, 0xE0, 0x00, + 0x86, 0xE1, 0x10, 0x00, + 0x00, 0xFF, 0x02, 0x80, + 0x6A, 0xFF, 0x47, 0x42, + 0xD0, 0x03, 0xE2, 0xC5, + 0x41, 0x80, 0x85, 0x80, + 0x00, 0x00, 0x85, 0x0C, + 0x46, 0x42, 0xD0, 0x03, + 0xE2, 0xC3, 0x41, 0x00, + 0x01, 0x00, 0x00, 0x43, + 0x27, 0x06, 0xE3, 0x84, + 0x65, 0x20, 0x86, 0x87, + 0x04, 0x00, 0xC0, 0x06, + 0x87, 0x22, 0x66, 0xF0, + 0x40, 0x28, 0x42, 0x8C, + 0x80, 0x40, 0x00, 0x43, + 0xC4, 0x80, 0x04, 0x81, + 0x60, 0x00, 0xAE, 0xFF, + 0xFF, 0xFF, 0x46, 0x42, + 0xD0, 0x03, 0xE2, 0x84, + 0x4F, 0x00, 0x03, 0x00, + 0x00, 0x46, 0x67, 0xF2, + 0x40, 0x00, 0x41, 0x8C, + 0x80, 0x40, 0x00, 0x42, + 0xC6, 0x84, 0x42, 0xFF, + 0x66, 0xF0, 0x40, 0x20, + 0x41, 0x8C, 0x80, 0x40, + 0x00, 0x42, 0xC2, 0x80, + 0x04, 0x81, 0x60, 0x00, + 0x28, 0x00, 0x00, 0x00, + 0x68, 0x20, 0xE2, 0x88, + 0xB8, 0x28, 0x80, 0x62, + 0x00, 0x42, 0x83, 0xC6, + 0x98, 0x08, 0x00, 0xFF, + 0x45, 0x20, 0xE0, 0x00, + 0x86, 0xE1, 0x10, 0x00, + 0x00, 0xFF, 0x03, 0x80, + 0x6A, 0xFF, 0xF1, 0xFF, + 0x04, 0x80, 0xEE, 0x00, + 0xF2, 0xFF, 0xFF, 0xFF +}; +/* clang-format on */ + +#endif /* PVR_VDM_STORE_SR_H */ diff --git a/src/imagination/vulkan/vk_format.h b/src/imagination/vulkan/vk_format.h new file mode 100644 index 00000000000..627dc867877 --- /dev/null +++ b/src/imagination/vulkan/vk_format.h @@ -0,0 +1,122 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * based in part on radv driver which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Based on u_format.h which is: + * Copyright 2009-2010 VMware, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* clang-format off */ +#ifndef VK_FORMAT_H +#define VK_FORMAT_H + +#include +#include + +#include + +#include "util/u_endian.h" + +static inline bool +vk_format_is_alpha_on_msb(VkFormat vk_format) +{ + const struct util_format_description *desc = + vk_format_description(vk_format); + + return (desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) && +#if defined(UTIL_ARCH_BIG_ENDIAN) + desc->swizzle[3] == PIPE_SWIZZLE_X; +#else + desc->swizzle[3] == PIPE_SWIZZLE_W; +#endif +} + +static inline boolean +vk_format_has_alpha(VkFormat vk_format) +{ + return util_format_has_alpha(vk_format_to_pipe_format(vk_format)); +} + +static inline boolean +vk_format_is_pure_integer(VkFormat vk_format) +{ + return util_format_is_pure_integer(vk_format_to_pipe_format(vk_format)); +} + +static inline uint +vk_format_get_blocksizebits(VkFormat vk_format) +{ + return util_format_get_blocksizebits(vk_format_to_pipe_format(vk_format)); +} + +static inline uint +vk_format_get_channel_width(VkFormat vk_format, uint32_t channel) +{ + const struct util_format_description *desc = + vk_format_description(vk_format); + + return desc->channel[channel].size; +} + +static inline boolean +vk_format_has_32bit_component(VkFormat vk_format) +{ + const struct util_format_description *desc = + vk_format_description(vk_format); + + for (uint32_t i = 0; i < desc->nr_channels; i++) { + if (desc->channel[i].size == 32U) + return true; + } + + return false; +} + +static inline uint +vk_format_get_component_size_in_bits(VkFormat vk_format, + enum util_format_colorspace colorspace, + uint32_t component) +{ + return util_format_get_component_bits(vk_format_to_pipe_format(vk_format), + colorspace, + component); +} + +static inline boolean +vk_format_is_normalized(VkFormat vk_format) +{ + const struct util_format_description *desc = + vk_format_description(vk_format); + + for (uint32_t i = 0; i < desc->nr_channels; i++) { + if (!desc->channel[i].normalized) + return false; + } + + return true; +} + +#endif /* VK_FORMAT_H */ diff --git a/src/imagination/vulkan/winsys/powervr/pvr_drm.c b/src/imagination/vulkan/winsys/powervr/pvr_drm.c new file mode 100644 index 00000000000..3138fb6531b --- /dev/null +++ b/src/imagination/vulkan/winsys/powervr/pvr_drm.c @@ -0,0 +1,37 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "pvr_drm_public.h" +#include "pvr_private.h" +#include "pvr_winsys.h" + +struct pvr_winsys *pvr_drm_winsys_create(int master_fd, + int render_fd, + const VkAllocationCallbacks *alloc) +{ + pvr_finishme("Add implementation once powervr UAPI is stable."); + + return NULL; +} diff --git a/src/imagination/vulkan/winsys/powervr/pvr_drm_public.h b/src/imagination/vulkan/winsys/powervr/pvr_drm_public.h new file mode 100644 index 00000000000..1326e0373c3 --- /dev/null +++ b/src/imagination/vulkan/winsys/powervr/pvr_drm_public.h @@ -0,0 +1,35 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_DRM_PUBLIC_H +#define PVR_DRM_PUBLIC_H + +#include + +#include "pvr_winsys.h" + +struct pvr_winsys *pvr_drm_winsys_create(int master_fd, + int render_fd, + const VkAllocationCallbacks *alloc); + +#endif /* PVR_DRM_PUBLIC_H */ diff --git a/src/imagination/vulkan/winsys/pvr_winsys.c b/src/imagination/vulkan/winsys/pvr_winsys.c new file mode 100644 index 00000000000..94f9384ba6c --- /dev/null +++ b/src/imagination/vulkan/winsys/pvr_winsys.c @@ -0,0 +1,78 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "powervr/pvr_drm_public.h" +#include "pvr_private.h" +#include "pvr_winsys.h" +#include "vk_log.h" + +#if defined(PVR_SUPPORT_SERVICES_DRIVER) +# include "pvrsrvkm/pvr_srv_public.h" +#endif + +void pvr_winsys_destroy(struct pvr_winsys *ws) +{ + ws->ops->destroy(ws); +} + +struct pvr_winsys *pvr_winsys_create(int master_fd, + int render_fd, + const VkAllocationCallbacks *alloc) +{ +#if defined(PVR_SUPPORT_SERVICES_DRIVER) + drmVersionPtr version; + bool services_driver; + + version = drmGetVersion(render_fd); + if (!version) { + vk_errorf(NULL, + VK_ERROR_INCOMPATIBLE_DRIVER, + "Failed to query kernel driver version for device."); + return NULL; + } + + if (strcmp(version->name, "pvr") == 0) { + services_driver = true; + } else if (strcmp(version->name, "powervr") == 0) { + services_driver = false; + } else { + drmFreeVersion(version); + vk_errorf( + NULL, + VK_ERROR_INCOMPATIBLE_DRIVER, + "Device does not use any of the supported pvrsrvkm or powervr kernel driver."); + return NULL; + } + + drmFreeVersion(version); + + if (services_driver) + return pvr_srv_winsys_create(master_fd, render_fd, alloc); +#endif + + return pvr_drm_winsys_create(master_fd, render_fd, alloc); +} diff --git a/src/imagination/vulkan/winsys/pvr_winsys.h b/src/imagination/vulkan/winsys/pvr_winsys.h new file mode 100644 index 00000000000..a73ede5f897 --- /dev/null +++ b/src/imagination/vulkan/winsys/pvr_winsys.h @@ -0,0 +1,462 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Based on radv_radeon_winsys.h which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_WINSYS_H +#define PVR_WINSYS_H + +#include +#include +#include +#include + +#include "hwdef/rogue_hw_defs.h" +#include "pvr_rogue_fw.h" +#include "pvr_limits.h" +#include "util/macros.h" +#include "util/vma.h" + +struct pvr_device_info; + +/* device virtual address */ +typedef struct pvr_dev_addr { + uint64_t addr; +} pvr_dev_addr_t; + +/* clang-format off */ +#define PVR_DEV_ADDR_INVALID (pvr_dev_addr_t){ .addr = 0 } +/* clang-format on */ + +struct pvr_winsys_heaps { + struct pvr_winsys_heap *general_heap; + struct pvr_winsys_heap *pds_heap; + struct pvr_winsys_heap *rgn_hdr_heap; + struct pvr_winsys_heap *usc_heap; +}; + +struct pvr_winsys_static_data_offsets { + uint64_t eot; + uint64_t fence; + uint64_t vdm_sync; + uint64_t yuv_csc; +}; + +struct pvr_winsys_heap { + struct pvr_winsys *ws; + + pvr_dev_addr_t base_addr; + pvr_dev_addr_t reserved_addr; + + uint64_t size; + uint64_t reserved_size; + + uint32_t page_size; + uint32_t log2_page_size; + + struct util_vma_heap vma_heap; + int ref_count; + pthread_mutex_t lock; + + /* These are the offsets from the base at which static data might be + * uploaded. Some of these might be invalid since the kernel might not + * return all of these offsets per each heap as they might not be + * applicable. + * You should know which to use beforehand. There should be no need to check + * whether an offset is valid or invalid. + */ + struct pvr_winsys_static_data_offsets static_data_offsets; +}; + +enum pvr_winsys_bo_type { + PVR_WINSYS_BO_TYPE_GPU = 0, + PVR_WINSYS_BO_TYPE_DISPLAY = 1, +}; + +/** + * \brief Flag passed to #pvr_winsys_ops.buffer_create to indicate that the + * buffer should be CPU accessible. This is required in order to map the buffer + * using #pvr_winsys_ops.buffer_map. + */ +#define PVR_WINSYS_BO_FLAG_CPU_ACCESS BITFIELD_BIT(0U) +/** + * \brief Flag passed to #pvr_winsys_ops.buffer_create to indicate that, when + * the buffer is mapped to the GPU using #pvr_winsys.vma_map, it should be + * mapped uncached. + */ +#define PVR_WINSYS_BO_FLAG_GPU_UNCACHED BITFIELD_BIT(1U) +/** + * \brief Flag passed to #pvr_winsys_ops.buffer_create to indicate that, when + * the buffer is mapped to the GPU using #pvr_winsys.vma_map, it should only be + * accessible to the Parameter Manager unit and firmware processor. + */ +#define PVR_WINSYS_BO_FLAG_PM_FW_PROTECT BITFIELD_BIT(2U) +/** + * \brief Flag passed to #pvr_winsys_ops.buffer_create to indicate that the + * buffer should be zeroed at allocation time. + */ +#define PVR_WINSYS_BO_FLAG_ZERO_ON_ALLOC BITFIELD_BIT(3U) + +struct pvr_winsys_bo { + struct pvr_winsys *ws; + void *map; + uint64_t size; + + bool is_imported; +}; + +struct pvr_winsys_vma { + struct pvr_winsys_heap *heap; + + /* Buffer and offset this vma is bound to. */ + struct pvr_winsys_bo *bo; + VkDeviceSize bo_offset; + + pvr_dev_addr_t dev_addr; + uint64_t size; + uint64_t mapped_size; +}; + +struct pvr_winsys_syncobj { + struct pvr_winsys *ws; +}; + +struct pvr_winsys_free_list { + struct pvr_winsys *ws; +}; + +struct pvr_winsys_rt_dataset_create_info { + /* Local freelist */ + struct pvr_winsys_free_list *local_free_list; + + /* ISP register values */ + uint32_t isp_merge_lower_x; + uint32_t isp_merge_lower_y; + uint32_t isp_merge_scale_x; + uint32_t isp_merge_scale_y; + uint32_t isp_merge_upper_x; + uint32_t isp_merge_upper_y; + uint32_t isp_mtile_size; + + /* PPP register values */ + uint64_t ppp_multi_sample_ctl; + uint64_t ppp_multi_sample_ctl_y_flipped; + uint32_t ppp_screen; + + /* TE register values */ + uint32_t te_aa; + uint32_t te_mtile1; + uint32_t te_mtile2; + uint32_t te_screen; + + /* Allocations and associated information */ + pvr_dev_addr_t vheap_table_dev_addr; + pvr_dev_addr_t rtc_dev_addr; + + pvr_dev_addr_t tpc_dev_addr; + uint32_t tpc_stride; + uint32_t tpc_size; + + struct { + pvr_dev_addr_t pm_mlist_dev_addr; + pvr_dev_addr_t macrotile_array_dev_addr; + pvr_dev_addr_t rgn_header_dev_addr; + } rt_datas[ROGUE_NUM_RTDATAS]; + uint64_t rgn_header_size; + + /* Miscellaneous */ + uint32_t mtile_stride; + uint16_t max_rts; +}; + +struct pvr_winsys_rt_dataset { + struct pvr_winsys *ws; +}; + +enum pvr_winsys_ctx_priority { + PVR_WINSYS_CTX_PRIORITY_LOW, + PVR_WINSYS_CTX_PRIORITY_MEDIUM, + PVR_WINSYS_CTX_PRIORITY_HIGH, +}; + +struct pvr_winsys_render_ctx_create_info { + enum pvr_winsys_ctx_priority priority; + pvr_dev_addr_t vdm_callstack_addr; + + struct pvr_winsys_render_ctx_static_state { + uint64_t vdm_ctx_state_base_addr; + uint64_t geom_ctx_state_base_addr; + + struct { + uint64_t vdm_ctx_store_task0; + uint32_t vdm_ctx_store_task1; + uint64_t vdm_ctx_store_task2; + + uint64_t vdm_ctx_resume_task0; + uint32_t vdm_ctx_resume_task1; + uint64_t vdm_ctx_resume_task2; + } geom_state[2]; + } static_state; +}; + +struct pvr_winsys_render_ctx { + struct pvr_winsys *ws; +}; + +struct pvr_winsys_compute_ctx_create_info { + enum pvr_winsys_ctx_priority priority; + + struct pvr_winsys_compute_ctx_static_state { + uint64_t cdm_ctx_state_base_addr; + + uint64_t cdm_ctx_store_pds0; + uint64_t cdm_ctx_store_pds0_b; + uint64_t cdm_ctx_store_pds1; + + uint64_t cdm_ctx_terminate_pds; + uint64_t cdm_ctx_terminate_pds1; + + uint64_t cdm_ctx_resume_pds0; + uint64_t cdm_ctx_resume_pds0_b; + } static_state; +}; + +struct pvr_winsys_compute_ctx { + struct pvr_winsys *ws; +}; + +#define PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP BITFIELD_BIT(0U) +#define PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE BITFIELD_BIT(1U) + +struct pvr_winsys_compute_submit_info { + uint32_t frame_num; + uint32_t job_num; + + /* semaphores and stage_flags are arrays of length semaphore_count. */ + const VkSemaphore *semaphores; + uint32_t *stage_flags; + uint32_t semaphore_count; + + struct { + uint64_t tpu_border_colour_table; + uint64_t cdm_item; + uint64_t compute_cluster; + uint64_t cdm_ctrl_stream_base; + uint32_t tpu; + uint32_t cdm_resume_pds1; + } regs; + + /* Must be 0 or a combination of PVR_WINSYS_COMPUTE_FLAG_* flags. */ + uint32_t flags; +}; + +#define PVR_WINSYS_JOB_BO_FLAG_WRITE BITFIELD_BIT(0U) + +struct pvr_winsys_job_bo { + struct pvr_winsys_bo *bo; + /* Must be 0 or a combination of PVR_WINSYS_JOB_BO_FLAG_* flags. */ + uint32_t flags; +}; + +#define PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY BITFIELD_BIT(0U) +#define PVR_WINSYS_GEOM_FLAG_LAST_GEOMETRY BITFIELD_BIT(1U) +#define PVR_WINSYS_GEOM_FLAG_SINGLE_CORE BITFIELD_BIT(2U) + +#define PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT BITFIELD_BIT(0U) +#define PVR_WINSYS_FRAG_FLAG_STENCIL_BUFFER_PRESENT BITFIELD_BIT(1U) +#define PVR_WINSYS_FRAG_FLAG_PREVENT_CDM_OVERLAP BITFIELD_BIT(2U) +#define PVR_WINSYS_FRAG_FLAG_SINGLE_CORE BITFIELD_BIT(3U) + +struct pvr_winsys_render_submit_info { + struct pvr_winsys_rt_dataset *rt_dataset; + uint8_t rt_data_idx; + + uint32_t frame_num; + uint32_t job_num; + + uint32_t bo_count; + const struct pvr_winsys_job_bo *bos; + + /* FIXME: should this be flags instead? */ + bool run_frag; + + /* semaphores and stage_flags are arrays of length semaphore_count. */ + const VkSemaphore *semaphores; + uint32_t *stage_flags; + uint32_t semaphore_count; + + struct pvr_winsys_geometry_state { + struct { + uint32_t pds_ctrl; + uint32_t ppp_ctrl; + uint32_t te_psg; + uint32_t tpu; + uint64_t tpu_border_colour_table; + uint64_t vdm_ctrl_stream_base; + uint32_t vdm_ctx_resume_task0_size; + } regs; + + /* Must be 0 or a combination of PVR_WINSYS_GEOM_FLAG_* flags. */ + uint32_t flags; + } geometry; + + struct pvr_winsys_fragment_state { + struct { + uint32_t event_pixel_pds_data; + uint32_t event_pixel_pds_info; + uint32_t isp_aa; + uint32_t isp_bgobjdepth; + uint32_t isp_bgobjvals; + uint32_t isp_ctl; + uint64_t isp_dbias_base; + uint64_t isp_oclqry_base; + uint64_t isp_scissor_base; + uint64_t isp_stencil_load_store_base; + uint64_t isp_zload_store_base; + uint64_t isp_zlsctl; + uint64_t isp_zls_pixels; + uint64_t pbe_word[PVR_MAX_COLOR_ATTACHMENTS] + [ROGUE_NUM_PBESTATE_REG_WORDS]; + uint32_t pixel_phantom; + uint64_t pds_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS]; + uint64_t pds_pr_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS]; + uint32_t tpu; + uint64_t tpu_border_colour_table; + uint32_t usc_pixel_output_ctrl; + } regs; + + /* Must be 0 or a combination of PVR_WINSYS_FRAG_FLAG_* flags. */ + uint32_t flags; + uint32_t zls_stride; + uint32_t sls_stride; + } fragment; +}; + +struct pvr_winsys_ops { + void (*destroy)(struct pvr_winsys *ws); + int (*device_info_init)(struct pvr_winsys *ws, + struct pvr_device_info *dev_info); + void (*get_heaps_info)(struct pvr_winsys *ws, + struct pvr_winsys_heaps *heaps); + + VkResult (*buffer_create)(struct pvr_winsys *ws, + uint64_t size, + uint64_t alignment, + enum pvr_winsys_bo_type type, + uint32_t flags, + struct pvr_winsys_bo **const bo_out); + VkResult (*buffer_create_from_fd)(struct pvr_winsys *ws, + int fd, + struct pvr_winsys_bo **const bo_out); + void (*buffer_destroy)(struct pvr_winsys_bo *bo); + + VkResult (*buffer_get_fd)(struct pvr_winsys_bo *bo, int *const fd_out); + + void *(*buffer_map)(struct pvr_winsys_bo *bo); + void (*buffer_unmap)(struct pvr_winsys_bo *bo); + + struct pvr_winsys_vma *(*heap_alloc)(struct pvr_winsys_heap *heap, + uint64_t size, + uint64_t alignment); + void (*heap_free)(struct pvr_winsys_vma *vma); + + pvr_dev_addr_t (*vma_map)(struct pvr_winsys_vma *vma, + struct pvr_winsys_bo *bo, + uint64_t offset, + uint64_t size); + void (*vma_unmap)(struct pvr_winsys_vma *vma); + + VkResult (*syncobj_create)(struct pvr_winsys *ws, + bool signaled, + struct pvr_winsys_syncobj **const syncobj_out); + void (*syncobj_destroy)(struct pvr_winsys_syncobj *syncobj); + VkResult (*syncobjs_reset)(struct pvr_winsys *ws, + struct pvr_winsys_syncobj **const syncobjs, + uint32_t count); + VkResult (*syncobjs_signal)(struct pvr_winsys *ws, + struct pvr_winsys_syncobj **const syncobjs, + uint32_t count); + VkResult (*syncobjs_wait)(struct pvr_winsys *ws, + struct pvr_winsys_syncobj **const syncobjs, + uint32_t count, + bool wait_all, + uint64_t timeout); + VkResult (*syncobjs_merge)(struct pvr_winsys_syncobj *src, + struct pvr_winsys_syncobj *target, + struct pvr_winsys_syncobj **out); + + VkResult (*free_list_create)( + struct pvr_winsys *ws, + struct pvr_winsys_vma *free_list_vma, + uint32_t initial_num_pages, + uint32_t max_num_pages, + uint32_t grow_num_pages, + uint32_t grow_threshold, + struct pvr_winsys_free_list *parent_free_list, + struct pvr_winsys_free_list **const free_list_out); + void (*free_list_destroy)(struct pvr_winsys_free_list *free_list); + + VkResult (*render_target_dataset_create)( + struct pvr_winsys *ws, + const struct pvr_winsys_rt_dataset_create_info *create_info, + struct pvr_winsys_rt_dataset **const rt_dataset_out); + void (*render_target_dataset_destroy)( + struct pvr_winsys_rt_dataset *rt_dataset); + + VkResult (*render_ctx_create)( + struct pvr_winsys *ws, + struct pvr_winsys_render_ctx_create_info *create_info, + struct pvr_winsys_render_ctx **const ctx_out); + void (*render_ctx_destroy)(struct pvr_winsys_render_ctx *ctx); + VkResult (*render_submit)( + const struct pvr_winsys_render_ctx *ctx, + const struct pvr_winsys_render_submit_info *submit_info, + struct pvr_winsys_syncobj **const syncobj_geom_out, + struct pvr_winsys_syncobj **const syncobj_frag_out); + + VkResult (*compute_ctx_create)( + struct pvr_winsys *ws, + const struct pvr_winsys_compute_ctx_create_info *create_info, + struct pvr_winsys_compute_ctx **const ctx_out); + void (*compute_ctx_destroy)(struct pvr_winsys_compute_ctx *ctx); + VkResult (*compute_submit)( + const struct pvr_winsys_compute_ctx *ctx, + const struct pvr_winsys_compute_submit_info *submit_info, + struct pvr_winsys_syncobj **const syncobj_out); +}; + +struct pvr_winsys { + uint64_t page_size; + uint32_t log2_page_size; + + const struct pvr_winsys_ops *ops; +}; + +void pvr_winsys_destroy(struct pvr_winsys *ws); +struct pvr_winsys *pvr_winsys_create(int master_fd, + int render_fd, + const VkAllocationCallbacks *alloc); + +#endif /* PVR_WINSYS_H */ diff --git a/src/imagination/vulkan/winsys/pvr_winsys_helper.c b/src/imagination/vulkan/winsys/pvr_winsys_helper.c new file mode 100644 index 00000000000..ae94bab2eaf --- /dev/null +++ b/src/imagination/vulkan/winsys/pvr_winsys_helper.c @@ -0,0 +1,420 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "pvr_private.h" +#include "pvr_winsys.h" +#include "pvr_winsys_helper.h" +#include "util/u_atomic.h" +#include "vk_log.h" + +int pvr_winsys_helper_display_buffer_create(int master_fd, + uint64_t size, + uint32_t *const handle_out) +{ + struct drm_mode_create_dumb args = { + .width = size, + .height = 1, + .bpp = 8, + }; + int ret; + + ret = drmIoctl(master_fd, DRM_IOCTL_MODE_CREATE_DUMB, &args); + if (ret) + return ret; + + *handle_out = args.handle; + + return 0; +} + +int pvr_winsys_helper_display_buffer_destroy(int master_fd, uint32_t handle) +{ + struct drm_mode_destroy_dumb args = { + .handle = handle, + }; + + return drmIoctl(master_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &args); +} + +/* reserved_size can be 0 when no reserved area is needed. reserved_address must + * be 0 if reserved_size is 0. + */ +VkResult pvr_winsys_helper_winsys_heap_init( + struct pvr_winsys *const ws, + pvr_dev_addr_t base_address, + uint64_t size, + pvr_dev_addr_t reserved_address, + uint64_t reserved_size, + uint32_t log2_page_size, + const struct pvr_winsys_static_data_offsets *const static_data_offsets, + struct pvr_winsys_heap *const heap) +{ + const bool reserved_area_bottom_of_heap = reserved_address.addr == + base_address.addr; + const uint64_t vma_heap_begin_addr = + base_address.addr + + (uint64_t)reserved_area_bottom_of_heap * reserved_size; + const uint64_t vma_heap_size = size - reserved_size; + + assert(base_address.addr); + assert(reserved_size <= size); + + /* As per the reserved_base powervr-km uapi documentation the reserved + * region can only be at the beginning of the heap or at the end. + * reserved_address is 0 if there is no reserved region. + * pvrsrv-km doesn't explicitly provide this info and it's assumed that it's + * always at the beginning. + */ + assert(reserved_area_bottom_of_heap || + reserved_address.addr + reserved_size == base_address.addr + size || + (!reserved_address.addr && !reserved_size)); + + heap->ws = ws; + heap->base_addr = base_address; + heap->reserved_addr = reserved_address; + + heap->size = size; + heap->reserved_size = reserved_size; + + heap->page_size = 1 << log2_page_size; + heap->log2_page_size = log2_page_size; + + util_vma_heap_init(&heap->vma_heap, vma_heap_begin_addr, vma_heap_size); + + heap->vma_heap.alloc_high = false; + + /* It's expected that the heap destroy function to be the last thing that's + * called, so we start the ref_count at 0. + */ + p_atomic_set(&heap->ref_count, 0); + + if (pthread_mutex_init(&heap->lock, NULL)) + return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED); + + heap->static_data_offsets = *static_data_offsets; + + return VK_SUCCESS; +} + +bool pvr_winsys_helper_winsys_heap_finish(struct pvr_winsys_heap *const heap) +{ + if (p_atomic_read(&heap->ref_count) != 0) + return false; + + pthread_mutex_destroy(&heap->lock); + util_vma_heap_finish(&heap->vma_heap); + + return true; +} + +bool pvr_winsys_helper_heap_alloc(struct pvr_winsys_heap *const heap, + uint64_t size, + uint64_t alignment, + struct pvr_winsys_vma *const vma_out) +{ + struct pvr_winsys_vma vma = { + .heap = heap, + }; + + assert(util_is_power_of_two_nonzero(alignment)); + + /* pvr_srv_winsys_buffer_create() page aligns the size. We must do the same + * here to ensure enough heap space is allocated to be able to map the + * buffer to the GPU. + * We have to do this for the powervr kernel mode driver as well, as it + * returns a page aligned size when allocating buffers. + */ + alignment = MAX2(alignment, heap->page_size); + + size = ALIGN_POT(size, alignment); + vma.size = size; + + pthread_mutex_lock(&heap->lock); + vma.dev_addr.addr = + util_vma_heap_alloc(&heap->vma_heap, size, heap->page_size); + pthread_mutex_unlock(&heap->lock); + + if (!vma.dev_addr.addr) { + vk_error(NULL, VK_ERROR_OUT_OF_DEVICE_MEMORY); + return false; + } + + p_atomic_inc(&heap->ref_count); + + *vma_out = vma; + + return true; +} + +void pvr_winsys_helper_heap_free(struct pvr_winsys_vma *const vma) +{ + struct pvr_winsys_heap *const heap = vma->heap; + + /* A vma with an existing device mapping should not be freed. */ + assert(!vma->bo); + + pthread_mutex_lock(&heap->lock); + util_vma_heap_free(&heap->vma_heap, vma->dev_addr.addr, vma->size); + pthread_mutex_unlock(&heap->lock); + + p_atomic_dec(&heap->ref_count); +} + +/* Note: the function assumes the heap allocation in the reserved memory area + * can be freed with the regular heap allocation free function. The free + * function gets called on mapping failure. + */ +static VkResult +pvr_buffer_create_and_map(struct pvr_winsys *const ws, + heap_alloc_reserved_func heap_alloc_reserved, + struct pvr_winsys_heap *heap, + pvr_dev_addr_t dev_addr, + uint64_t size, + uint64_t alignment, + struct pvr_winsys_vma **const vma_out) +{ + struct pvr_winsys_vma *vma; + struct pvr_winsys_bo *bo; + pvr_dev_addr_t addr; + VkResult result; + + /* Address should not be NULL, this function is used to allocate and map + * reserved addresses and is only supposed to be used internally. + */ + assert(dev_addr.addr); + + result = ws->ops->buffer_create(ws, + size, + alignment, + PVR_WINSYS_BO_TYPE_GPU, + PVR_WINSYS_BO_FLAG_CPU_ACCESS, + &bo); + if (result != VK_SUCCESS) + return result; + + vma = heap_alloc_reserved(heap, dev_addr, size, alignment); + if (!vma) { + result = VK_ERROR_OUT_OF_DEVICE_MEMORY; + goto err_pvr_winsys_buffer_destroy; + } + + addr = ws->ops->vma_map(vma, bo, 0, size); + if (!addr.addr) { + result = VK_ERROR_MEMORY_MAP_FAILED; + goto err_pvr_winsys_heap_free; + } + + /* Note this won't destroy bo as its being used by VMA, once vma is + * unmapped, bo will be destroyed automatically. + */ + ws->ops->buffer_destroy(bo); + + *vma_out = vma; + + return VK_SUCCESS; + +err_pvr_winsys_heap_free: + ws->ops->heap_free(vma); + +err_pvr_winsys_buffer_destroy: + ws->ops->buffer_destroy(bo); + + return result; +} + +static void inline pvr_buffer_destroy_and_unmap(struct pvr_winsys_vma *vma) +{ + const struct pvr_winsys *const ws = vma->heap->ws; + + /* Buffer object associated with the vma will be automatically destroyed + * once vma is unmapped. + */ + ws->ops->vma_unmap(vma); + ws->ops->heap_free(vma); +} + +VkResult pvr_winsys_helper_allocate_static_memory( + struct pvr_winsys *const ws, + heap_alloc_reserved_func heap_alloc_reserved, + struct pvr_winsys_heap *const general_heap, + struct pvr_winsys_heap *const pds_heap, + struct pvr_winsys_heap *const usc_heap, + struct pvr_winsys_vma **const general_vma_out, + struct pvr_winsys_vma **const pds_vma_out, + struct pvr_winsys_vma **const usc_vma_out) +{ + struct pvr_winsys_vma *general_vma; + struct pvr_winsys_vma *pds_vma; + struct pvr_winsys_vma *usc_vma; + VkResult result; + + result = pvr_buffer_create_and_map(ws, + heap_alloc_reserved, + general_heap, + general_heap->reserved_addr, + general_heap->reserved_size, + general_heap->page_size, + &general_vma); + if (result != VK_SUCCESS) + return result; + + result = pvr_buffer_create_and_map(ws, + heap_alloc_reserved, + pds_heap, + pds_heap->reserved_addr, + pds_heap->reserved_size, + pds_heap->page_size, + &pds_vma); + if (result != VK_SUCCESS) + goto err_pvr_buffer_destroy_and_unmap_general; + + result = pvr_buffer_create_and_map(ws, + heap_alloc_reserved, + usc_heap, + usc_heap->reserved_addr, + pds_heap->reserved_size, + usc_heap->page_size, + &usc_vma); + if (result != VK_SUCCESS) + goto err_pvr_buffer_destroy_and_unmap_pds; + + *general_vma_out = general_vma; + *pds_vma_out = pds_vma; + *usc_vma_out = usc_vma; + + return VK_SUCCESS; + +err_pvr_buffer_destroy_and_unmap_pds: + pvr_buffer_destroy_and_unmap(pds_vma); + +err_pvr_buffer_destroy_and_unmap_general: + pvr_buffer_destroy_and_unmap(general_vma); + + return result; +} + +void pvr_winsys_helper_free_static_memory( + struct pvr_winsys_vma *const general_vma, + struct pvr_winsys_vma *const pds_vma, + struct pvr_winsys_vma *const usc_vma) +{ + pvr_buffer_destroy_and_unmap(usc_vma); + pvr_buffer_destroy_and_unmap(pds_vma); + pvr_buffer_destroy_and_unmap(general_vma); +} + +static void pvr_setup_static_vdm_sync(uint8_t *const pds_ptr, + uint64_t pds_sync_offset_in_bytes, + uint8_t *const usc_ptr, + uint64_t usc_sync_offset_in_bytes) +{ + /* TODO: this needs to be auto-generated */ + const uint8_t state_update[] = { 0x44, 0xA0, 0x80, 0x05, + 0x00, 0x00, 0x00, 0xFF }; + + struct pvr_pds_kickusc_program ppp_state_update_program = { 0 }; + + memcpy(usc_ptr + usc_sync_offset_in_bytes, + state_update, + sizeof(state_update)); + + pvr_pds_setup_doutu(&ppp_state_update_program.usc_task_control, + usc_sync_offset_in_bytes, + 0, + PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE), + false); + + pvr_pds_kick_usc(&ppp_state_update_program, + (uint32_t *)&pds_ptr[pds_sync_offset_in_bytes], + 0, + false, + PDS_GENERATE_CODEDATA_SEGMENTS); +} + +static void +pvr_setup_static_pixel_event_program(uint8_t *const pds_ptr, + uint64_t pds_eot_offset_in_bytes) +{ + struct pvr_pds_event_program pixel_event_program = { 0 }; + + pvr_pds_generate_pixel_event(&pixel_event_program, + (uint32_t *)&pds_ptr[pds_eot_offset_in_bytes], + PDS_GENERATE_CODE_SEGMENT, + NULL); +} + +VkResult +pvr_winsys_helper_fill_static_memory(struct pvr_winsys *const ws, + struct pvr_winsys_vma *const general_vma, + struct pvr_winsys_vma *const pds_vma, + struct pvr_winsys_vma *const usc_vma) +{ + uint8_t *general_ptr, *pds_ptr, *usc_ptr; + VkResult result; + + general_ptr = ws->ops->buffer_map(general_vma->bo); + if (!general_ptr) + return VK_ERROR_MEMORY_MAP_FAILED; + + pds_ptr = ws->ops->buffer_map(pds_vma->bo); + if (!pds_ptr) { + result = VK_ERROR_MEMORY_MAP_FAILED; + goto error_pvr_srv_winsys_buffer_unmap_general; + } + + usc_ptr = ws->ops->buffer_map(usc_vma->bo); + if (!usc_ptr) { + result = VK_ERROR_MEMORY_MAP_FAILED; + goto error_pvr_srv_winsys_buffer_unmap_pds; + } + + pvr_setup_static_vdm_sync(pds_ptr, + pds_vma->heap->static_data_offsets.vdm_sync, + usc_ptr, + usc_vma->heap->static_data_offsets.vdm_sync); + + pvr_setup_static_pixel_event_program(pds_ptr, + pds_vma->heap->static_data_offsets.eot); + + /* TODO: Complete control block copying work. */ + + ws->ops->buffer_unmap(usc_vma->bo); + ws->ops->buffer_unmap(pds_vma->bo); + ws->ops->buffer_unmap(general_vma->bo); + + return VK_SUCCESS; + +error_pvr_srv_winsys_buffer_unmap_pds: + ws->ops->buffer_unmap(pds_vma->bo); + +error_pvr_srv_winsys_buffer_unmap_general: + ws->ops->buffer_unmap(general_vma->bo); + + return result; +} diff --git a/src/imagination/vulkan/winsys/pvr_winsys_helper.h b/src/imagination/vulkan/winsys/pvr_winsys_helper.h new file mode 100644 index 00000000000..dbf619a8064 --- /dev/null +++ b/src/imagination/vulkan/winsys/pvr_winsys_helper.h @@ -0,0 +1,80 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_WINSYS_HELPER_H +#define PVR_WINSYS_HELPER_H + +#include +#include + +#include "pvr_winsys.h" + +typedef struct pvr_winsys_vma *(*const heap_alloc_reserved_func)( + struct pvr_winsys_heap *const heap, + const pvr_dev_addr_t reserved_dev_addr, + uint64_t size, + uint64_t alignment); + +int pvr_winsys_helper_display_buffer_create(int master_fd, + uint64_t size, + uint32_t *const handle_out); +int pvr_winsys_helper_display_buffer_destroy(int master_fd, uint32_t handle); + +VkResult pvr_winsys_helper_winsys_heap_init( + struct pvr_winsys *const ws, + pvr_dev_addr_t base_address, + uint64_t size, + pvr_dev_addr_t reserved_address, + uint64_t reserved_size, + uint32_t log2_page_size, + const struct pvr_winsys_static_data_offsets *const static_data_offsets, + struct pvr_winsys_heap *const heap); +bool pvr_winsys_helper_winsys_heap_finish(struct pvr_winsys_heap *const heap); + +bool pvr_winsys_helper_heap_alloc(struct pvr_winsys_heap *const heap, + uint64_t size, + uint64_t alignment, + struct pvr_winsys_vma *const vma); +void pvr_winsys_helper_heap_free(struct pvr_winsys_vma *const vma); + +VkResult pvr_winsys_helper_allocate_static_memory( + struct pvr_winsys *const ws, + heap_alloc_reserved_func heap_alloc_reserved, + struct pvr_winsys_heap *const general_heap, + struct pvr_winsys_heap *const pds_heap, + struct pvr_winsys_heap *const usc_heap, + struct pvr_winsys_vma **const general_vma_out, + struct pvr_winsys_vma **const pds_vma_out, + struct pvr_winsys_vma **const usc_vma_out); +void pvr_winsys_helper_free_static_memory( + struct pvr_winsys_vma *const general_vma, + struct pvr_winsys_vma *const pds_vma, + struct pvr_winsys_vma *const usc_vma); + +VkResult +pvr_winsys_helper_fill_static_memory(struct pvr_winsys *const ws, + struct pvr_winsys_vma *const general_vma, + struct pvr_winsys_vma *const pds_vma, + struct pvr_winsys_vma *const usc_vma); + +#endif /* PVR_WINSYS_HELPER_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif.h b/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif.h new file mode 100644 index 00000000000..abcaf181061 --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif.h @@ -0,0 +1,440 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_ROGUE_FWIF_H +#define PVR_ROGUE_FWIF_H + +#include +#include +#include + +#include "pvr_rogue_fwif_shared.h" + +/** Indicates the number of RTDATAs per RTDATASET. */ +#define ROGUE_FWIF_NUM_RTDATAS 2U + +/** Render needs flipped sample positions. */ +#define ROGUE_FWIF_RENDERFLAGS_FLIP_SAMPLE_POSITIONS 0x00000001UL +/** + * The scene has been aborted, free the parameters and dummy process to + * completion. + */ +#define ROGUE_FWIF_RENDERFLAGS_ABORT 0x00000002UL +/** The TA before this was not marked as LAST. */ +#define ROGUE_FWIF_RENDERFLAGS_3D_ONLY 0x00000004UL +/** Use single core in a multi core setup. */ +#define ROGUE_FWIF_RENDERFLAGS_SINGLE_CORE 0x00000008UL +/** + * This render has visibility result associated with it. Setting this flag will + * cause the firmware to collect the visibility results. + */ +#define ROGUE_FWIF_RENDERFLAGS_GETVISRESULTS 0x00000020UL +/** Indicates whether a depth buffer is present. */ +#define ROGUE_FWIF_RENDERFLAGS_DEPTHBUFFER 0x00000080UL +/** Indicates whether a stencil buffer is present. */ +#define ROGUE_FWIF_RENDERFLAGS_STENCILBUFFER 0x00000100UL +/** This render needs DRM Security. */ +#define ROGUE_FWIF_RENDERFLAGS_SECURE 0x00002000UL +/** + * This flags goes in hand with ABORT and explicitly ensures no mem free is + * issued in case of first TA job. + */ +#define ROGUE_FWIF_RENDERFLAGS_ABORT_NOFREE 0x00004000UL +/** Force disabling of pixel merging. */ +#define ROGUE_FWIF_RENDERFLAGS_DISABLE_PIXELMERGE 0x00008000UL + +/** Force 4 lines of coeffs on render. */ +#define ROGUE_FWIF_RENDERFLAGS_CSRM_MAX_COEFFS 0x00020000UL + +/** Partial render must write to scratch buffer. */ +#define ROGUE_FWIF_RENDERFLAGS_SPMSCRATCHBUFFER 0x00080000UL + +/** Render uses paired tile feature, empty tiles must always be enabled. */ +#define ROGUE_FWIF_RENDERFLAGS_PAIRED_TILES 0x00100000UL + +#define ROGUE_FWIF_RENDERFLAGS_RESERVED 0x01000000UL + +/** Disallow compute overlapped with this render. */ +#define ROGUE_FWIF_RENDERFLAGS_PREVENT_CDM_OVERLAP 0x04000000UL +/** + * The host must indicate if this is the first and/or last command to be issued + * for the specified task. + */ +#define ROGUE_FWIF_TAFLAGS_FIRSTKICK 0x00000001UL +#define ROGUE_FWIF_TAFLAGS_LASTKICK 0x00000002UL +#define ROGUE_FWIF_TAFLAGS_FLIP_SAMPLE_POSITIONS 0x00000004UL +/** Use single core in a multi core setup. */ +#define ROGUE_FWIF_TAFLAGS_SINGLE_CORE 0x00000008UL + +/** Enable Tile Region Protection for this TA. */ +#define ROGUE_FWIF_TAFLAGS_TRP 0x00000010UL + +/** Indicates the particular TA needs to be aborted. */ +#define ROGUE_FWIF_TAFLAGS_TA_ABORT 0x00000100UL +#define ROGUE_FWIF_TAFLAGS_SECURE 0x00080000UL + +/** + * Indicates that the CSRM should be reconfigured to support maximum coeff + * space before this command is scheduled. + */ +#define ROGUE_FWIF_TAFLAGS_CSRM_MAX_COEFFS 0x00200000UL + +#define ROGUE_FWIF_TAFLAGS_PHR_TRIGGER 0x02000000UL + +/* Flags for transfer queue commands. */ +#define ROGUE_FWIF_CMDTRANSFER_FLAG_SECURE 0x00000001U +/** Use single core in a multi core setup. */ +#define ROGUE_FWIF_CMDTRANSFER_SINGLE_CORE 0x00000002U + +/* Flags for 2D commands. */ +#define ROGUE_FWIF_CMD2D_FLAG_SECURE 0x00000001U + +#define ROGUE_FWIF_CMD3DTQ_SLICE_WIDTH_MASK 0x00000038UL +#define ROGUE_FWIF_CMD3DTQ_SLICE_WIDTH_SHIFT (3) +#define ROGUE_FWIF_CMD3DTQ_SLICE_GRANULARITY (0x10U) + +/* Flags for compute commands. */ +#define ROGUE_FWIF_COMPUTE_FLAG_SECURE 0x00000001U +#define ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_OVERLAP 0x00000002U +#define ROGUE_FWIF_COMPUTE_FLAG_FORCE_TPU_CLK 0x00000004U + +#define ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_NON_TAOOM_OVERLAP 0x00000010U + +/** Use single core in a multi core setup. */ +#define ROGUE_FWIF_COMPUTE_FLAG_SINGLE_CORE 0x00000020U + +/*********************************************** + Parameter/HWRTData control structures. + ***********************************************/ + +/** + * Configuration registers which need to be loaded by the firmware before a TA + * job can be started. + */ +struct rogue_fwif_ta_regs { + uint64_t vdm_ctrl_stream_base; + uint64_t tpu_border_colour_table; + + uint32_t ppp_ctrl; + uint32_t te_psg; + uint32_t tpu; + + uint32_t vdm_context_resume_task0_size; + + /* FIXME: HIGH: FIX_HW_BRN_56279 changes the structure's layout, given we + * are supporting Features/ERNs/BRNs at runtime, we need to look into this + * and find a solution to keep layout intact. + */ + /* Available if FIX_HW_BRN_56279 is present. */ + uint32_t pds_ctrl; + + uint32_t view_idx; +}; + +/** + * Represents a TA command that can be used to tile a whole scene's objects as + * per TA behavior. + */ +struct rogue_fwif_cmd_ta { + /** + * rogue_fwif_cmd_ta_3d_shared field must always be at the beginning of the + * struct. + * + * The command struct (rogue_fwif_cmd_ta) is shared between Client and + * Firmware. Kernel is unable to perform read/write operations on the + * command struct, the SHARED region is the only exception from this rule. + * This region must be the first member so that Kernel can easily access it. + * For more info, see pvr_cmd_ta_3d definition. + */ + struct rogue_fwif_cmd_ta_3d_shared cmd_shared; + + struct rogue_fwif_ta_regs ALIGN(8) geom_regs; + uint32_t ALIGN(8) flags; + /** + * Holds the TA/3D fence value to allow the 3D partial render command + * to go through. + */ + struct rogue_fwif_ufo partial_render_ta_3d_fence; +}; + +static_assert( + offsetof(struct rogue_fwif_cmd_ta, cmd_shared) == 0U, + "rogue_fwif_cmd_ta_3d_shared must be the first member of rogue_fwif_cmd_ta"); + +static_assert( + sizeof(struct rogue_fwif_cmd_ta) <= ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE, + "kernel expects command size be increased to match current TA command size"); + +/** + * Configuration registers which need to be loaded by the firmware before ISP + * can be started. + */ +struct rogue_fwif_3d_regs { + /** + * All 32 bit values should be added in the top section. This then requires + * only a single ALIGN(8) to align all the 64 bit values in the second + * section. + */ + uint32_t usc_pixel_output_ctrl; + /* FIXME: HIGH: RGX_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL changes the + * structure's layout. + */ +#define ROGUE_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL 8U + uint32_t usc_clear_register[ROGUE_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL]; + + uint32_t isp_bgobjdepth; + uint32_t isp_bgobjvals; + uint32_t isp_aa; + uint32_t isp_ctl; + + uint32_t tpu; + + uint32_t event_pixel_pds_info; + + /* FIXME: HIGH: RGX_FEATURE_CLUSTER_GROUPING changes the structure's + * layout. + */ + uint32_t pixel_phantom; + + uint32_t view_idx; + + uint32_t event_pixel_pds_data; + /* FIXME: HIGH: MULTIBUFFER_OCLQRY changes the structure's layout. + * Commenting out for now as it's not supported by 4.V.2.51. + */ + /* uint32_t isp_oclqry_stride; */ + + /* All values below the ALIGN(8) must be 64 bit. */ + uint64_t ALIGN(8) isp_scissor_base; + uint64_t isp_dbias_base; + uint64_t isp_oclqry_base; + uint64_t isp_zlsctl; + uint64_t isp_zload_store_base; + uint64_t isp_stencil_load_store_base; + /* FIXME: HIGH: RGX_FEATURE_ZLS_SUBTILE changes the structure's layout. */ + uint64_t isp_zls_pixels; + + /* FIXME: HIGH: RGX_HW_REQUIRES_FB_CDC_ZLS_SETUP changes the structure's + * layout. + */ + uint64_t deprecated; + + /* FIXME: HIGH: RGX_PBE_WORDS_REQUIRED_FOR_RENDERS changes the structure's + * layout. + */ +#define ROGUE_PBE_WORDS_REQUIRED_FOR_RENDERS 2U + uint64_t pbe_word[8U][ROGUE_PBE_WORDS_REQUIRED_FOR_RENDERS]; + uint64_t tpu_border_colour_table; + uint64_t pds_bgnd[3U]; + uint64_t pds_pr_bgnd[3U]; +}; + +struct rogue_fwif_cmd_3d { + /** + * This struct is shared between Client and Firmware. + * Kernel is unable to perform read/write operations on the command struct, + * the SHARED region is our only exception from that rule. + * This region must be the first member so Kernel can easily access it. + * For more info, see rogue_fwif_cmd_ta_3d_shared definition. + */ + struct rogue_fwif_cmd_ta_3d_shared ALIGN(8) cmd_shared; + + struct rogue_fwif_3d_regs ALIGN(8) regs; + /** command control flags. */ + uint32_t flags; + /** Stride IN BYTES for Z-Buffer in case of RTAs. */ + uint32_t zls_stride; + /** Stride IN BYTES for S-Buffer in case of RTAs. */ + uint32_t sls_stride; +}; + +static_assert( + offsetof(struct rogue_fwif_cmd_3d, cmd_shared) == 0U, + "rogue_fwif_cmd_ta_3d_shared must be the first member of rogue_fwif_cmd_3d"); + +static_assert( + sizeof(struct rogue_fwif_cmd_3d) <= ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE, + "kernel expects command size be increased to match current 3D command size"); + +struct rogue_fwif_transfer_regs { + /** + * All 32 bit values should be added in the top section. This then requires + * only a single ALIGN(8) to align all the 8 byte values in the second + * section. + */ + uint32_t isp_bgobjvals; + + uint32_t usc_pixel_output_ctrl; + uint32_t usc_clear_register0; + uint32_t usc_clear_register1; + uint32_t usc_clear_register2; + uint32_t usc_clear_register3; + + uint32_t isp_mtile_size; + uint32_t isp_render_origin; + uint32_t isp_ctl; + + uint32_t isp_aa; + + uint32_t event_pixel_pds_info; + + uint32_t event_pixel_pds_code; + uint32_t event_pixel_pds_data; + + uint32_t isp_render; + uint32_t isp_rgn; + /* FIXME: HIGH: RGX_FEATURE_GPU_MULTICORE_SUPPORT changes the structure's + * layout. Commenting out for now as it's not supported by 4.V.2.51. + */ + /* uint32_t frag_screen; */ + /** All values below the RGXFW_ALIGN must be 64 bit. */ + uint64_t ALIGN(8) pds_bgnd0_base; + uint64_t pds_bgnd1_base; + uint64_t pds_bgnd3_sizeinfo; + + uint64_t isp_mtile_base; + /* FIXME: HIGH: RGX_PBE_WORDS_REQUIRED_FOR_TQS changes the structure's + * layout. + */ +#define ROGUE_PBE_WORDS_REQUIRED_FOR_TRANSFER 3 + /* TQ_MAX_RENDER_TARGETS * PBE_STATE_SIZE */ + uint64_t pbe_wordx_mrty[3 * ROGUE_PBE_WORDS_REQUIRED_FOR_TRANSFER]; +}; + +struct rogue_fwif_cmd_transfer { + struct rogue_fwif_cmd_common ALIGN(8) cmn; + struct rogue_fwif_transfer_regs ALIGN(8) regs; + + uint32_t flags; +}; + +static_assert( + offsetof(struct rogue_fwif_cmd_transfer, cmn) == 0U, + "rogue_fwif_cmd_common must be the first member of rogue_fwif_cmd_transfer"); + +static_assert( + sizeof(struct rogue_fwif_cmd_transfer) <= + ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE, + "kernel expects command size be increased to match current TRANSFER command size"); + +struct rogue_fwif_2d_regs { + uint64_t tla_cmd_stream; + uint64_t deprecated_0; + uint64_t deprecated_1; + uint64_t deprecated_2; + uint64_t deprecated_3; + /* FIXME: HIGH: FIX_HW_BRN_57193 changes the structure's layout. */ + uint64_t brn57193_tla_cmd_stream; +}; + +struct rogue_fwif_cmd_2d { + struct rogue_fwif_cmd_common ALIGN(8) cmn; + struct rogue_fwif_2d_regs ALIGN(8) regs; + + uint32_t flags; +}; + +static_assert( + offsetof(struct rogue_fwif_cmd_2d, cmn) == 0U, + "rogue_fwif_cmd_common must be the first member of rogue_fwif_cmd_2d"); + +static_assert( + sizeof(struct rogue_fwif_cmd_2d) <= ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE, + "kernel expects command size be increased to match current 2D command size"); + +/*********************************************** + Host interface structures. + ***********************************************/ + +/** + * Configuration registers which need to be loaded by the firmware before CDM + * can be started. + */ +struct rogue_fwif_cdm_regs { + uint64_t tpu_border_colour_table; + + /* FIXME: HIGH: RGX_FEATURE_COMPUTE_MORTON_CAPABLE changes the structure's + * layout. + */ + uint64_t cdm_item; + /* FIXME: HIGH: RGX_FEATURE_CLUSTER_GROUPING changes the structure's layout. + */ + uint64_t compute_cluster; + + /* FIXME: HIGH: RGX_FEATURE_TPU_DM_GLOBAL_REGISTERS changes the structure's + * layout. Commenting out for now as it's not supported by 4.V.2.51. + */ + /* uint64_t tpu_tag_cdm_ctrl; */ + uint64_t cdm_ctrl_stream_base; + + uint32_t tpu; + + uint32_t cdm_resume_pds1; +}; + +struct rogue_fwif_cmd_compute { + struct rogue_fwif_cmd_common ALIGN(8) cmn; + struct rogue_fwif_cdm_regs ALIGN(8) regs; + uint32_t ALIGN(8) flags; +}; + +static_assert( + offsetof(struct rogue_fwif_cmd_compute, cmn) == 0U, + "rogue_fwif_cmd_common must be the first member of rogue_fwif_cmd_compute"); + +static_assert( + sizeof(struct rogue_fwif_cmd_compute) <= + ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE, + "kernel expects command size be increased to match current COMPUTE command size"); + +/* TODO: Rename the RGX_* macros in the comments once they are imported. */ +/* Applied to RGX_CR_VDM_SYNC_PDS_DATA_BASE. */ +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_VDM_SYNC_OFFSET_BYTES 0U +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_VDM_SYNC_MAX_SIZE_BYTES 128U + +/** Applied to RGX_CR_EVENT_PIXEL_PDS_CODE. */ +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_EOT_OFFSET_BYTES 128U +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_EOT_MAX_SIZE_BYTES 128U + +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_TOTAL_BYTES 4096U + +/** Pointed to by PDS code at RGX_CR_VDM_SYNC_PDS_DATA_BASE. */ +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_USC_HEAP_VDM_SYNC_OFFSET_BYTES 0U +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_USC_HEAP_VDM_SYNC_MAX_SIZE_BYTES 128U + +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_USC_HEAP_TOTAL_BYTES 4096U + +/** + * Applied to RGX_CR_MCU_FENCE, and RGX_CR_PM_MTILE_ARRAY + * (defined(RGX_FEATURE_SIMPLE_INTERNAL_PARAMETER_FORMAT)). + */ +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_FENCE_OFFSET_BYTES 0U +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_FENCE_MAX_SIZE_BYTES 128U + +/** Applied to RGX_CR_TPU_YUV_CSC_COEFFICIENTS. */ +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_YUV_CSC_OFFSET_BYTES 128U +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_YUV_CSC_MAX_SIZE_BYTES 1024U + +#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_TOTAL_BYTES 4096U + +#endif /* PVR_ROGUE_FWIF_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_rf.h b/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_rf.h new file mode 100644 index 00000000000..35da3763eff --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_rf.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_ROGUE_FWIF_RF_H +#define PVR_ROGUE_FWIF_RF_H + +#include + +#include "pvr_rogue_fwif_shared.h" + +struct rogue_fwif_rf_regs { + uint64_t cdm_ctrl_stream_base; +}; + +/* Enables the reset framework in the firmware. */ +#define ROGUE_FWIF_RF_FLAG_ENABLE 0x00000001U + +struct rogue_fwif_rf_cmd { + uint32_t flags; + + /* THIS MUST BE THE LAST MEMBER OF THE CONTAINING STRUCTURE */ + struct rogue_fwif_rf_regs ALIGN(8) regs; +}; + +#endif /* PVR_ROGUE_FWIF_RF_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_shared.h b/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_shared.h new file mode 100644 index 00000000000..1a2c771c480 --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_shared.h @@ -0,0 +1,252 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_ROGUE_FWIF_SHARED_H +#define PVR_ROGUE_FWIF_SHARED_H + +#include +#include + +#define ALIGN(x) __attribute__((aligned(x))) + +/** + * Maximum number of UFOs in a CCB command. + * The number is based on having 32 sync prims (as originally), plus 32 sync + * checkpoints. + * Once the use of sync prims is no longer supported, we will retain + * the same total (64) as the number of sync checkpoints which may be + * supporting a fence is not visible to the client driver and has to + * allow for the number of different timelines involved in fence merges. + */ +#define ROGUE_FWIF_CCB_CMD_MAX_UFOS (32U + 32U) + +/** + * This is a generic limit imposed on any DM (TA,3D,CDM,TDM,2D,TRANSFER) + * command passed through the bridge. + * Just across the bridge in the server, any incoming kick command size is + * checked against this maximum limit. + * In case the incoming command size is larger than the specified limit, + * the bridge call is retired with error. + */ +#define ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE (1024U) + +#define ROGUE_FWIF_PRBUFFER_START (0) +#define ROGUE_FWIF_PRBUFFER_ZSBUFFER (0) +#define ROGUE_FWIF_PRBUFFER_MSAABUFFER (1) +#define ROGUE_FWIF_PRBUFFER_MAXSUPPORTED (2) + +struct rogue_fwif_dev_addr { + uint32_t addr; +}; + +struct rogue_fwif_dma_addr { + uint64_t ALIGN(8) dev_vaddr; + struct rogue_fwif_dev_addr fw_addr; +} ALIGN(8); + +struct rogue_fwif_ufo { + struct rogue_fwif_dev_addr ufo_addr; + uint32_t value; +}; + +struct rogue_fwif_cleanup_ctl { + /** Number of commands received by the FW. */ + uint32_t submitted_cmds; + + /** Number of commands executed by the FW. */ + uint32_t executed_cmds; +} ALIGN(8); + +/** + * Used to share frame numbers across UM-KM-FW, + * frame number is set in UM, + * frame number is required in both KM for HTB and FW for FW trace. + * + * May be used to house Kick flags in the future. + */ +struct rogue_fwif_cmd_common { + /** Associated frame number. */ + uint32_t frame_num; +}; + +/** + * TA and 3D commands require set of firmware addresses that are stored in the + * Kernel. Client has handle(s) to Kernel containers storing these addresses, + * instead of raw addresses. We have to patch/write these addresses in KM to + * prevent UM from controlling FW addresses directly. + * Structures for TA and 3D commands are shared between Client and Firmware + * (both single-BVNC). Kernel is implemented in a multi-BVNC manner, so it can't + * use TA|3D CMD type definitions directly. Therefore we have a SHARED block + * that is shared between UM-KM-FW across all BVNC configurations. + */ +struct rogue_fwif_cmd_ta_3d_shared { + /** Common command attributes. */ + struct rogue_fwif_cmd_common cmn; + + /** + * RTData associated with this command, this is used for context + * selection and for storing out HW-context, when TA is switched out for + * continuing later. + */ + struct rogue_fwif_dev_addr hw_rt_data; + + /** Supported PR Buffers like Z/S/MSAA Scratch. */ + struct rogue_fwif_dev_addr pr_buffers[ROGUE_FWIF_PRBUFFER_MAXSUPPORTED]; +}; + +/** + * Client Circular Command Buffer (CCCB) control structure. + * This is shared between the KM driver and the Firmware and holds byte offsets + * into the CCCB as well as the wrapping mask to aid wrap around. A given + * snapshot of this queue with Cmd 1 running on the GPU might be: + * + * Roff Doff Woff + * [..........|-1----------|=2===|=3===|=4===|~5~~~~|~6~~~~|~7~~~~|..........] + * < runnable commands >< !ready to run > + * + * Cmd 1 : Currently executing on the GPU data master. + * Cmd 2,3,4: Fence dependencies met, commands runnable. + * Cmd 5... : Fence dependency not met yet. + */ +struct rogue_fwif_cccb_ctl { + /** Host write offset into CCB. This must be aligned to 16 bytes. */ + uint32_t write_offset; + + /** + * Firmware read offset into CCB. Points to the command that is runnable + * on GPU, if R!=W. + */ + uint32_t read_offset; + + /** + * Firmware fence dependency offset. Points to commands not ready, i.e. + * fence dependencies are not met. + */ + uint32_t dep_offset; + + /** Offset wrapping mask, total capacity in bytes of the CCB-1. */ + uint32_t wrap_mask; +} ALIGN(8); + +#define ROGUE_FW_LOCAL_FREELIST 0U +#define ROGUE_FW_GLOBAL_FREELIST 1U +#define ROGUE_FW_FREELIST_TYPE_LAST ROGUE_FW_GLOBAL_FREELIST +#define ROGUE_FW_MAX_FREELISTS (ROGUE_FW_FREELIST_TYPE_LAST + 1U) + +struct rogue_fwif_ta_regs_cswitch { + uint64_t vdm_context_state_base_addr; + uint64_t vdm_context_state_resume_addr; + uint64_t ta_context_state_base_addr; + + struct { + uint64_t vdm_context_store_task0; + uint64_t vdm_context_store_task1; + uint64_t vdm_context_store_task2; + + /* VDM resume state update controls. */ + uint64_t vdm_context_resume_task0; + uint64_t vdm_context_resume_task1; + uint64_t vdm_context_resume_task2; + + uint64_t vdm_context_store_task3; + uint64_t vdm_context_store_task4; + + uint64_t vdm_context_resume_task3; + uint64_t vdm_context_resume_task4; + } ta_state[2]; +}; + +#define ROGUE_FWIF_TAREGISTERS_CSWITCH_SIZE \ + sizeof(struct rogue_fwif_taregisters_cswitch) + +struct rogue_fwif_cdm_regs_cswitch { + uint64_t cdm_context_state_base_addr; + uint64_t cdm_context_pds0; + uint64_t cdm_context_pds1; + uint64_t cdm_terminate_pds; + uint64_t cdm_terminate_pds1; + + /* CDM resume controls. */ + uint64_t cdm_resume_pds0; + uint64_t cdm_context_pds0_b; + uint64_t cdm_resume_pds0_b; +}; + +struct rogue_fwif_static_rendercontext_state { + /** Geom registers for ctx switch. */ + struct rogue_fwif_ta_regs_cswitch ALIGN(8) ctx_switch_regs; +}; + +#define ROGUE_FWIF_STATIC_RENDERCONTEXT_SIZE \ + sizeof(struct rogue_fwif_static_rendercontext_state) + +struct rogue_fwif_static_computecontext_state { + /** CDM registers for ctx switch. */ + struct rogue_fwif_cdm_regs_cswitch ALIGN(8) ctx_switch_regs; +}; + +#define ROGUE_FWIF_STATIC_COMPUTECONTEXT_SIZE \ + sizeof(struct rogue_fwif_static_computecontext_state) + +enum rogue_fwif_prbuffer_state { + ROGUE_FWIF_PRBUFFER_UNBACKED = 0, + ROGUE_FWIF_PRBUFFER_BACKED, + ROGUE_FWIF_PRBUFFER_BACKING_PENDING, + ROGUE_FWIF_PRBUFFER_UNBACKING_PENDING, +}; + +struct rogue_fwif_prbuffer { + /** Buffer ID. */ + uint32_t buffer_id; + /** Needs On-demand Z/S/MSAA Buffer allocation. */ + bool ALIGN(4) on_demand; + /** Z/S/MSAA -Buffer state. */ + enum rogue_fwif_prbuffer_state state; + /** Cleanup state. */ + struct rogue_fwif_cleanup_ctl cleanup_state; + /** Compatibility and other flags. */ + uint32_t pr_buffer_flags; +} ALIGN(8); + +/* Last reset reason for a context. */ +enum rogue_context_reset_reason { + /** No reset reason recorded. */ + ROGUE_CONTEXT_RESET_REASON_NONE = 0, + /** Caused a reset due to locking up. */ + ROGUE_CONTEXT_RESET_REASON_GUILTY_LOCKUP = 1, + /** Affected by another context locking up. */ + ROGUE_CONTEXT_RESET_REASON_INNOCENT_LOCKUP = 2, + /** Overran the global deadline. */ + ROGUE_CONTEXT_RESET_REASON_GUILTY_OVERRUNING = 3, + /** Affected by another context overrunning. */ + ROGUE_CONTEXT_RESET_REASON_INNOCENT_OVERRUNING = 4, + /** Forced reset to ensure scheduling requirements. */ + ROGUE_CONTEXT_RESET_REASON_HARD_CONTEXT_SWITCH = 5, +}; + +struct rogue_context_reset_reason_data { + enum rogue_context_reset_reason reset_reason; + uint32_t reset_ext_job_ref; +}; + +#endif /* PVR_ROGUE_FWIF_SHARED_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c new file mode 100644 index 00000000000..3d12ae3e7b8 --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c @@ -0,0 +1,521 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "pvr_csb.h" +#include "pvr_device_info.h" +#include "pvr_private.h" +#include "pvr_srv.h" +#include "pvr_srv_bo.h" +#include "pvr_srv_bridge.h" +#include "pvr_srv_job_compute.h" +#include "pvr_srv_job_render.h" +#include "pvr_srv_public.h" +#include "pvr_srv_syncobj.h" +#include "pvr_winsys.h" +#include "pvr_winsys_helper.h" +#include "util/log.h" +#include "util/os_misc.h" +#include "vk_log.h" + +/* Amount of space used to hold sync prim values (in bytes). */ +#define PVR_SRV_SYNC_PRIM_VALUE_SIZE 4U + +static VkResult pvr_srv_heap_init( + struct pvr_srv_winsys *srv_ws, + struct pvr_srv_winsys_heap *srv_heap, + uint32_t heap_idx, + const struct pvr_winsys_static_data_offsets *const static_data_offsets) +{ + pvr_dev_addr_t base_address; + uint32_t log2_page_size; + uint64_t reserved_size; + VkResult result; + uint64_t size; + + result = pvr_srv_get_heap_details(srv_ws->render_fd, + heap_idx, + 0, + NULL, + &base_address, + &size, + &reserved_size, + &log2_page_size); + if (result != VK_SUCCESS) + return result; + + result = pvr_winsys_helper_winsys_heap_init(&srv_ws->base, + base_address, + size, + base_address, + reserved_size, + log2_page_size, + static_data_offsets, + &srv_heap->base); + if (result != VK_SUCCESS) + return result; + + assert(srv_heap->base.page_size == srv_ws->base.page_size); + assert(srv_heap->base.log2_page_size == srv_ws->base.log2_page_size); + assert(srv_heap->base.reserved_size % PVR_SRV_RESERVED_SIZE_GRANULARITY == + 0); + + /* Create server-side counterpart of Device Memory heap */ + result = pvr_srv_int_heap_create(srv_ws->render_fd, + srv_heap->base.base_addr, + srv_heap->base.size, + srv_heap->base.log2_page_size, + srv_ws->server_memctx, + &srv_heap->server_heap); + if (result != VK_SUCCESS) { + pvr_winsys_helper_winsys_heap_finish(&srv_heap->base); + return result; + } + + return VK_SUCCESS; +} + +static bool pvr_srv_heap_finish(struct pvr_srv_winsys *srv_ws, + struct pvr_srv_winsys_heap *srv_heap) +{ + if (!pvr_winsys_helper_winsys_heap_finish(&srv_heap->base)) + return false; + + pvr_srv_int_heap_destroy(srv_ws->render_fd, srv_heap->server_heap); + + return true; +} + +static VkResult pvr_srv_memctx_init(struct pvr_srv_winsys *srv_ws) +{ + const struct pvr_winsys_static_data_offsets + general_heap_static_data_offsets = { + .yuv_csc = FWIF_GENERAL_HEAP_YUV_CSC_OFFSET_BYTES, + }; + const struct pvr_winsys_static_data_offsets pds_heap_static_data_offsets = { + .eot = FWIF_PDS_HEAP_EOT_OFFSET_BYTES, + .vdm_sync = FWIF_PDS_HEAP_VDM_SYNC_OFFSET_BYTES, + }; + const struct pvr_winsys_static_data_offsets usc_heap_static_data_offsets = { + .vdm_sync = FWIF_USC_HEAP_VDM_SYNC_OFFSET_BYTES, + }; + const struct pvr_winsys_static_data_offsets + rgn_hdr_heap_static_data_offsets = { 0 }; + + char heap_name[PVR_SRV_DEVMEM_HEAPNAME_MAXLENGTH]; + int general_heap_idx = -1; + int rgn_hdr_heap_idx = -1; + int pds_heap_idx = -1; + int usc_heap_idx = -1; + uint32_t heap_count; + VkResult result; + + result = pvr_srv_int_ctx_create(srv_ws->render_fd, + &srv_ws->server_memctx, + &srv_ws->server_memctx_data); + if (result != VK_SUCCESS) + return result; + + os_get_page_size(&srv_ws->base.page_size); + srv_ws->base.log2_page_size = util_logbase2(srv_ws->base.page_size); + + result = pvr_srv_get_heap_count(srv_ws->render_fd, &heap_count); + if (result != VK_SUCCESS) + goto err_pvr_srv_int_ctx_destroy; + + assert(heap_count > 0); + + for (uint32_t i = 0; i < heap_count; i++) { + result = pvr_srv_get_heap_details(srv_ws->render_fd, + i, + sizeof(heap_name), + heap_name, + NULL, + NULL, + NULL, + NULL); + if (result != VK_SUCCESS) + goto err_pvr_srv_int_ctx_destroy; + + if (general_heap_idx == -1 && + strncmp(heap_name, + PVR_SRV_GENERAL_HEAP_IDENT, + sizeof(PVR_SRV_GENERAL_HEAP_IDENT)) == 0) { + general_heap_idx = i; + } else if (pds_heap_idx == -1 && + strncmp(heap_name, + PVR_SRV_PDSCODEDATA_HEAP_IDENT, + sizeof(PVR_SRV_PDSCODEDATA_HEAP_IDENT)) == 0) { + pds_heap_idx = i; + } else if (rgn_hdr_heap_idx == -1 && + strncmp(heap_name, + PVR_SRV_RGNHDR_BRN_63142_HEAP_IDENT, + sizeof(PVR_SRV_RGNHDR_BRN_63142_HEAP_IDENT)) == 0) { + rgn_hdr_heap_idx = i; + } else if (usc_heap_idx == -1 && + strncmp(heap_name, + PVR_SRV_USCCODE_HEAP_IDENT, + sizeof(PVR_SRV_USCCODE_HEAP_IDENT)) == 0) { + usc_heap_idx = i; + } + } + + /* Check for and initialize required heaps. */ + if (general_heap_idx == -1 || pds_heap_idx == -1 || usc_heap_idx == -1) { + result = vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED); + goto err_pvr_srv_int_ctx_destroy; + } + + result = pvr_srv_heap_init(srv_ws, + &srv_ws->general_heap, + general_heap_idx, + &general_heap_static_data_offsets); + if (result != VK_SUCCESS) + goto err_pvr_srv_int_ctx_destroy; + + result = pvr_srv_heap_init(srv_ws, + &srv_ws->pds_heap, + pds_heap_idx, + &pds_heap_static_data_offsets); + if (result != VK_SUCCESS) + goto err_pvr_srv_heap_finish_general; + + result = pvr_srv_heap_init(srv_ws, + &srv_ws->usc_heap, + usc_heap_idx, + &usc_heap_static_data_offsets); + if (result != VK_SUCCESS) + goto err_pvr_srv_heap_finish_pds; + + /* Check for and set up optional heaps. */ + if (rgn_hdr_heap_idx != -1) { + result = pvr_srv_heap_init(srv_ws, + &srv_ws->rgn_hdr_heap, + rgn_hdr_heap_idx, + &rgn_hdr_heap_static_data_offsets); + if (result != VK_SUCCESS) + goto err_pvr_srv_heap_finish_usc; + srv_ws->rgn_hdr_heap_present = true; + } else { + srv_ws->rgn_hdr_heap_present = false; + } + + result = + pvr_winsys_helper_allocate_static_memory(&srv_ws->base, + pvr_srv_heap_alloc_reserved, + &srv_ws->general_heap.base, + &srv_ws->pds_heap.base, + &srv_ws->usc_heap.base, + &srv_ws->general_vma, + &srv_ws->pds_vma, + &srv_ws->usc_vma); + if (result != VK_SUCCESS) + goto err_pvr_srv_heap_finish_rgn_hdr; + + result = pvr_winsys_helper_fill_static_memory(&srv_ws->base, + srv_ws->general_vma, + srv_ws->pds_vma, + srv_ws->usc_vma); + if (result != VK_SUCCESS) + goto err_pvr_srv_free_static_memory; + + return VK_SUCCESS; + +err_pvr_srv_free_static_memory: + pvr_winsys_helper_free_static_memory(srv_ws->general_vma, + srv_ws->pds_vma, + srv_ws->usc_vma); + +err_pvr_srv_heap_finish_rgn_hdr: + if (srv_ws->rgn_hdr_heap_present) + pvr_srv_heap_finish(srv_ws, &srv_ws->rgn_hdr_heap); + +err_pvr_srv_heap_finish_usc: + pvr_srv_heap_finish(srv_ws, &srv_ws->usc_heap); + +err_pvr_srv_heap_finish_pds: + pvr_srv_heap_finish(srv_ws, &srv_ws->pds_heap); + +err_pvr_srv_heap_finish_general: + pvr_srv_heap_finish(srv_ws, &srv_ws->general_heap); + +err_pvr_srv_int_ctx_destroy: + pvr_srv_int_ctx_destroy(srv_ws->render_fd, srv_ws->server_memctx); + + return result; +} + +static void pvr_srv_memctx_finish(struct pvr_srv_winsys *srv_ws) +{ + pvr_winsys_helper_free_static_memory(srv_ws->general_vma, + srv_ws->pds_vma, + srv_ws->usc_vma); + + if (srv_ws->rgn_hdr_heap_present) { + if (!pvr_srv_heap_finish(srv_ws, &srv_ws->rgn_hdr_heap)) { + vk_errorf(NULL, + VK_ERROR_UNKNOWN, + "Region header heap in use, can not deinit"); + } + } + + if (!pvr_srv_heap_finish(srv_ws, &srv_ws->usc_heap)) + vk_errorf(NULL, VK_ERROR_UNKNOWN, "USC heap in use, can not deinit"); + + if (!pvr_srv_heap_finish(srv_ws, &srv_ws->pds_heap)) + vk_errorf(NULL, VK_ERROR_UNKNOWN, "PDS heap in use, can not deinit"); + + if (!pvr_srv_heap_finish(srv_ws, &srv_ws->general_heap)) { + vk_errorf(NULL, VK_ERROR_UNKNOWN, "General heap in use, can not deinit"); + } + + pvr_srv_int_ctx_destroy(srv_ws->render_fd, srv_ws->server_memctx); +} + +static VkResult pvr_srv_sync_prim_block_init(struct pvr_srv_winsys *srv_ws) +{ + /* We don't currently make use of this value, but we're required to provide + * a valid pointer to pvr_srv_alloc_sync_primitive_block. + */ + void *sync_block_pmr; + + return pvr_srv_alloc_sync_primitive_block(srv_ws->render_fd, + &srv_ws->sync_block_handle, + &sync_block_pmr, + &srv_ws->sync_block_size, + &srv_ws->sync_block_fw_addr); +} + +static void pvr_srv_sync_prim_block_finish(struct pvr_srv_winsys *srv_ws) +{ + pvr_srv_free_sync_primitive_block(srv_ws->render_fd, + srv_ws->sync_block_handle); + srv_ws->sync_block_handle = NULL; +} + +static void pvr_srv_winsys_destroy(struct pvr_winsys *ws) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); + int fd = srv_ws->render_fd; + + pvr_srv_sync_prim_block_finish(srv_ws); + pvr_srv_memctx_finish(srv_ws); + vk_free(srv_ws->alloc, srv_ws); + pvr_srv_connection_destroy(fd); +} + +static int pvr_srv_winsys_device_info_init(struct pvr_winsys *ws, + struct pvr_device_info *dev_info) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); + int ret; + + ret = pvr_device_info_init(dev_info, srv_ws->bvnc); + if (ret) { + mesa_logw("Unsupported BVNC: %u.%u.%u.%u\n", + PVR_BVNC_UNPACK_B(srv_ws->bvnc), + PVR_BVNC_UNPACK_V(srv_ws->bvnc), + PVR_BVNC_UNPACK_N(srv_ws->bvnc), + PVR_BVNC_UNPACK_C(srv_ws->bvnc)); + return ret; + } + + return 0; +} + +static void pvr_srv_winsys_get_heaps_info(struct pvr_winsys *ws, + struct pvr_winsys_heaps *heaps) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); + + heaps->general_heap = &srv_ws->general_heap.base; + heaps->pds_heap = &srv_ws->pds_heap.base; + heaps->usc_heap = &srv_ws->usc_heap.base; + + if (srv_ws->rgn_hdr_heap_present) + heaps->rgn_hdr_heap = &srv_ws->rgn_hdr_heap.base; + else + heaps->rgn_hdr_heap = &srv_ws->general_heap.base; +} + +static const struct pvr_winsys_ops srv_winsys_ops = { + .destroy = pvr_srv_winsys_destroy, + .device_info_init = pvr_srv_winsys_device_info_init, + .get_heaps_info = pvr_srv_winsys_get_heaps_info, + .buffer_create = pvr_srv_winsys_buffer_create, + .buffer_create_from_fd = pvr_srv_winsys_buffer_create_from_fd, + .buffer_destroy = pvr_srv_winsys_buffer_destroy, + .buffer_get_fd = pvr_srv_winsys_buffer_get_fd, + .buffer_map = pvr_srv_winsys_buffer_map, + .buffer_unmap = pvr_srv_winsys_buffer_unmap, + .heap_alloc = pvr_srv_winsys_heap_alloc, + .heap_free = pvr_srv_winsys_heap_free, + .vma_map = pvr_srv_winsys_vma_map, + .vma_unmap = pvr_srv_winsys_vma_unmap, + .syncobj_create = pvr_srv_winsys_syncobj_create, + .syncobj_destroy = pvr_srv_winsys_syncobj_destroy, + .syncobjs_reset = pvr_srv_winsys_syncobjs_reset, + .syncobjs_signal = pvr_srv_winsys_syncobjs_signal, + .syncobjs_wait = pvr_srv_winsys_syncobjs_wait, + .syncobjs_merge = pvr_srv_winsys_syncobjs_merge, + .free_list_create = pvr_srv_winsys_free_list_create, + .free_list_destroy = pvr_srv_winsys_free_list_destroy, + .render_target_dataset_create = pvr_srv_render_target_dataset_create, + .render_target_dataset_destroy = pvr_srv_render_target_dataset_destroy, + .render_ctx_create = pvr_srv_winsys_render_ctx_create, + .render_ctx_destroy = pvr_srv_winsys_render_ctx_destroy, + .render_submit = pvr_srv_winsys_render_submit, + .compute_ctx_create = pvr_srv_winsys_compute_ctx_create, + .compute_ctx_destroy = pvr_srv_winsys_compute_ctx_destroy, + .compute_submit = pvr_srv_winsys_compute_submit, +}; + +static bool pvr_is_driver_compatible(int render_fd) +{ + drmVersionPtr version; + + version = drmGetVersion(render_fd); + if (!version) + return false; + + assert(strcmp(version->name, "pvr") == 0); + + /* Only the 1.14 driver is supported for now. */ + if (version->version_major != PVR_SRV_VERSION_MAJ || + version->version_minor != PVR_SRV_VERSION_MIN) { + vk_errorf(NULL, + VK_ERROR_INCOMPATIBLE_DRIVER, + "Unsupported downstream driver version (%u.%u)", + version->version_major, + version->version_minor); + drmFreeVersion(version); + + return false; + } + + drmFreeVersion(version); + + return true; +} + +struct pvr_winsys *pvr_srv_winsys_create(int master_fd, + int render_fd, + const VkAllocationCallbacks *alloc) +{ + struct pvr_srv_winsys *srv_ws; + VkResult result; + uint64_t bvnc; + + if (!pvr_is_driver_compatible(render_fd)) + return NULL; + + result = pvr_srv_connection_create(render_fd, &bvnc); + if (result != VK_SUCCESS) + return NULL; + + srv_ws = + vk_zalloc(alloc, sizeof(*srv_ws), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!srv_ws) { + vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_pvr_srv_connection_destroy; + } + + srv_ws->base.ops = &srv_winsys_ops; + srv_ws->bvnc = bvnc; + srv_ws->master_fd = master_fd; + srv_ws->render_fd = render_fd; + srv_ws->alloc = alloc; + + result = pvr_srv_memctx_init(srv_ws); + if (result != VK_SUCCESS) + goto err_vk_free_srv_ws; + + result = pvr_srv_sync_prim_block_init(srv_ws); + if (result != VK_SUCCESS) + goto err_pvr_srv_memctx_finish; + + return &srv_ws->base; + +err_pvr_srv_memctx_finish: + pvr_srv_memctx_finish(srv_ws); + +err_vk_free_srv_ws: + vk_free(alloc, srv_ws); + +err_pvr_srv_connection_destroy: + pvr_srv_connection_destroy(render_fd); + + return NULL; +} + +struct pvr_srv_sync_prim *pvr_srv_sync_prim_alloc(struct pvr_srv_winsys *srv_ws) +{ + struct pvr_srv_sync_prim *sync_prim; + + if (p_atomic_read(&srv_ws->sync_block_offset) == srv_ws->sync_block_size) { + vk_error(NULL, VK_ERROR_UNKNOWN); + return NULL; + } + + sync_prim = vk_alloc(srv_ws->alloc, + sizeof(*sync_prim), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!sync_prim) { + vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + return NULL; + } + + /* p_atomic_add_return() returns the new value rather than the old one, so + * we have to subtract PVR_SRV_SYNC_PRIM_VALUE_SIZE to get the old value. + */ + sync_prim->offset = p_atomic_add_return(&srv_ws->sync_block_offset, + PVR_SRV_SYNC_PRIM_VALUE_SIZE); + sync_prim->offset -= PVR_SRV_SYNC_PRIM_VALUE_SIZE; + if (sync_prim->offset == srv_ws->sync_block_size) { + /* FIXME: need to free offset back to srv_ws->sync_block_offset. */ + vk_free(srv_ws->alloc, sync_prim); + + vk_error(NULL, VK_ERROR_UNKNOWN); + + return NULL; + } + + sync_prim->srv_ws = srv_ws; + + return sync_prim; +} + +/* FIXME: Add support for freeing offsets back to the sync block. */ +void pvr_srv_sync_prim_free(struct pvr_srv_sync_prim *sync_prim) +{ + if (sync_prim) { + struct pvr_srv_winsys *srv_ws = sync_prim->srv_ws; + + vk_free(srv_ws->alloc, sync_prim); + } +} diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.h new file mode 100644 index 00000000000..6dd9dcc4f8f --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.h @@ -0,0 +1,129 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_SRV_H +#define PVR_SRV_H + +#include +#include +#include + +#include "pvr_winsys.h" +#include "util/macros.h" +#include "util/vma.h" + +/******************************************* + Misc defines + *******************************************/ + +/* 64KB is MAX anticipated OS page size */ +#define PVR_SRV_RESERVED_SIZE_GRANULARITY 0x10000 + +#define PVR_SRV_DEVMEM_HEAPNAME_MAXLENGTH 160 + +#define PVR_SRV_GENERAL_HEAP_IDENT "General" +#define PVR_SRV_RGNHDR_BRN_63142_HEAP_IDENT "RgnHdr BRN63142" +#define PVR_SRV_PDSCODEDATA_HEAP_IDENT "PDS Code and Data" +#define PVR_SRV_USCCODE_HEAP_IDENT "USC Code" + +#define FWIF_PDS_HEAP_TOTAL_BYTES 4096 +#define FWIF_PDS_HEAP_VDM_SYNC_OFFSET_BYTES 0 +#define FWIF_PDS_HEAP_EOT_OFFSET_BYTES 128 +#define FWIF_GENERAL_HEAP_TOTAL_BYTES 4096 +#define FWIF_USC_HEAP_TOTAL_BYTES 4096 +#define FWIF_USC_HEAP_VDM_SYNC_OFFSET_BYTES 0 +#define FWIF_GENERAL_HEAP_YUV_CSC_OFFSET_BYTES 128U + +/******************************************* + structure definitions + *******************************************/ +struct pvr_srv_winsys_heap { + struct pvr_winsys_heap base; + + void *server_heap; +}; + +struct pvr_srv_winsys { + struct pvr_winsys base; + + int master_fd; + int render_fd; + + const VkAllocationCallbacks *alloc; + + /* Packed bvnc */ + uint64_t bvnc; + + void *server_memctx; + void *server_memctx_data; + + /* Required heaps */ + struct pvr_srv_winsys_heap general_heap; + struct pvr_srv_winsys_heap pds_heap; + struct pvr_srv_winsys_heap usc_heap; + + /* Optional heaps */ + bool rgn_hdr_heap_present; + struct pvr_srv_winsys_heap rgn_hdr_heap; + + /* vma's for reserved memory regions */ + struct pvr_winsys_vma *pds_vma; + struct pvr_winsys_vma *usc_vma; + struct pvr_winsys_vma *general_vma; + + /* Sync block used for allocating sync primitives. */ + void *sync_block_handle; + uint32_t sync_block_size; + uint32_t sync_block_fw_addr; + uint16_t sync_block_offset; +}; + +struct pvr_srv_sync_prim { + struct pvr_srv_winsys *srv_ws; + uint32_t offset; + uint32_t value; +}; + +/******************************************* + helper macros + *******************************************/ + +#define to_pvr_srv_winsys(ws) container_of((ws), struct pvr_srv_winsys, base) +#define to_pvr_srv_winsys_heap(heap) \ + container_of((heap), struct pvr_srv_winsys_heap, base) + +/******************************************* + functions + *******************************************/ + +struct pvr_srv_sync_prim * +pvr_srv_sync_prim_alloc(struct pvr_srv_winsys *srv_ws); +void pvr_srv_sync_prim_free(struct pvr_srv_sync_prim *sync_prim); + +static inline uint32_t +pvr_srv_sync_prim_get_fw_addr(const struct pvr_srv_sync_prim *const sync_prim) +{ + return sync_prim->srv_ws->sync_block_fw_addr + sync_prim->offset; +} + +#endif /* PVR_SRV_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.c new file mode 100644 index 00000000000..11c917092fc --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.c @@ -0,0 +1,596 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "pvr_private.h" +#include "pvr_srv.h" +#include "pvr_srv_bo.h" +#include "pvr_srv_bridge.h" +#include "pvr_winsys_helper.h" +#include "util/u_atomic.h" +#include "util/bitscan.h" +#include "util/macros.h" +#include "util/u_math.h" +#include "vk_log.h" + +/* Note: This function does not have an associated pvr_srv_free_display_pmr + * function, use pvr_srv_free_pmr instead. + */ +static VkResult pvr_srv_alloc_display_pmr(struct pvr_srv_winsys *srv_ws, + uint64_t size, + uint64_t srv_flags, + void **const pmr_out, + uint32_t *const handle_out) +{ + uint64_t aligment_out; + uint64_t size_out; + VkResult result; + uint32_t handle; + int ret; + int fd; + + ret = + pvr_winsys_helper_display_buffer_create(srv_ws->master_fd, size, &handle); + if (ret) + return vk_error(NULL, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + ret = drmPrimeHandleToFD(srv_ws->master_fd, handle, O_CLOEXEC, &fd); + if (ret) { + result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_display_buffer_destroy; + } + + result = pvr_srv_physmem_import_dmabuf(srv_ws->render_fd, + fd, + srv_flags, + pmr_out, + &size_out, + &aligment_out); + + assert(size_out >= size); + assert(aligment_out == srv_ws->base.page_size); + + /* close fd, not needed anymore */ + close(fd); + + if (result != VK_SUCCESS) + goto err_display_buffer_destroy; + + *handle_out = handle; + + return VK_SUCCESS; + +err_display_buffer_destroy: + pvr_winsys_helper_display_buffer_destroy(srv_ws->master_fd, handle); + + return result; +} + +static void buffer_acquire(struct pvr_srv_winsys_bo *srv_bo) +{ + p_atomic_inc(&srv_bo->ref_count); +} + +static void buffer_release(struct pvr_srv_winsys_bo *srv_bo) +{ + struct pvr_srv_winsys *srv_ws; + + /* If all references were dropped the pmr can be freed and unlocked */ + if (p_atomic_dec_return(&srv_bo->ref_count) == 0) { + srv_ws = to_pvr_srv_winsys(srv_bo->base.ws); + pvr_srv_free_pmr(srv_ws->render_fd, srv_bo->pmr); + + if (srv_bo->is_display_buffer) { + pvr_winsys_helper_display_buffer_destroy(srv_ws->master_fd, + srv_bo->handle); + } + + vk_free(srv_ws->alloc, srv_bo); + } +} + +static uint64_t pvr_srv_get_alloc_flags(uint32_t ws_flags) +{ + /* TODO: For now we assume that buffers should always be accessible to the + * kernel and that the PVR_WINSYS_BO_FLAG_CPU_ACCESS flag only applies to + * userspace mappings. Check to see if there's any situations where we + * wouldn't want this to be the case. + */ + uint64_t srv_flags = PVR_SRV_MEMALLOCFLAG_GPU_READABLE | + PVR_SRV_MEMALLOCFLAG_GPU_WRITEABLE | + PVR_SRV_MEMALLOCFLAG_KERNEL_CPU_MAPPABLE | + PVR_SRV_MEMALLOCFLAG_CPU_UNCACHED_WC; + + if (ws_flags & PVR_WINSYS_BO_FLAG_CPU_ACCESS) { + srv_flags |= PVR_SRV_MEMALLOCFLAG_CPU_READABLE | + PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE; + } + + if (ws_flags & PVR_WINSYS_BO_FLAG_GPU_UNCACHED) + srv_flags |= PVR_SRV_MEMALLOCFLAG_GPU_UNCACHED; + else + srv_flags |= PVR_SRV_MEMALLOCFLAG_GPU_CACHE_INCOHERENT; + + if (ws_flags & PVR_WINSYS_BO_FLAG_PM_FW_PROTECT) + srv_flags |= PVR_SRV_MEMALLOCFLAG_DEVICE_FLAG(PM_FW_PROTECT); + + if (ws_flags & PVR_WINSYS_BO_FLAG_ZERO_ON_ALLOC) + srv_flags |= PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC; + + return srv_flags; +} + +VkResult pvr_srv_winsys_buffer_create(struct pvr_winsys *ws, + uint64_t size, + uint64_t alignment, + enum pvr_winsys_bo_type type, + uint32_t ws_flags, + struct pvr_winsys_bo **const bo_out) +{ + const uint64_t srv_flags = pvr_srv_get_alloc_flags(ws_flags); + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); + struct pvr_srv_winsys_bo *srv_bo; + VkResult result; + + assert(util_is_power_of_two_nonzero(alignment)); + + /* Kernel will page align the size, we do the same here so we have access to + * all the allocated memory. + */ + alignment = MAX2(alignment, ws->page_size); + size = ALIGN_POT(size, alignment); + + srv_bo = vk_zalloc(srv_ws->alloc, + sizeof(*srv_bo), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!srv_bo) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + srv_bo->is_display_buffer = (type == PVR_WINSYS_BO_TYPE_DISPLAY); + if (srv_bo->is_display_buffer) { + result = pvr_srv_alloc_display_pmr(srv_ws, + size, + srv_flags & + PVR_SRV_MEMALLOCFLAGS_PMRFLAGSMASK, + &srv_bo->pmr, + &srv_bo->handle); + + srv_bo->base.is_imported = true; + } else { + result = + pvr_srv_alloc_pmr(srv_ws->render_fd, + size, + size, + 1, + 1, + srv_ws->base.log2_page_size, + (srv_flags & PVR_SRV_MEMALLOCFLAGS_PMRFLAGSMASK), + getpid(), + &srv_bo->pmr); + } + + if (result != VK_SUCCESS) + goto err_vk_free_srv_bo; + + srv_bo->base.size = size; + srv_bo->base.ws = ws; + srv_bo->flags = srv_flags; + + p_atomic_set(&srv_bo->ref_count, 1); + + *bo_out = &srv_bo->base; + + return VK_SUCCESS; + +err_vk_free_srv_bo: + vk_free(srv_ws->alloc, srv_bo); + + return result; +} + +VkResult +pvr_srv_winsys_buffer_create_from_fd(struct pvr_winsys *ws, + int fd, + struct pvr_winsys_bo **const bo_out) +{ + /* FIXME: PVR_SRV_MEMALLOCFLAG_CPU_UNCACHED_WC should be changed to + * PVR_SRV_MEMALLOCFLAG_CPU_CACHE_INCOHERENT, as dma-buf is always mapped + * as cacheable by the exporter. Flags are not passed to the exporter and it + * doesn't really change the behavior, but these can be used for internal + * checking so it should reflect the correct cachability of the buffer. + * Ref: pvr_GetMemoryFdPropertiesKHR + * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops + */ + static const uint64_t srv_flags = + PVR_SRV_MEMALLOCFLAG_CPU_READABLE | PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE | + PVR_SRV_MEMALLOCFLAG_CPU_UNCACHED_WC | PVR_SRV_MEMALLOCFLAG_GPU_READABLE | + PVR_SRV_MEMALLOCFLAG_GPU_WRITEABLE | + PVR_SRV_MEMALLOCFLAG_GPU_CACHE_INCOHERENT; + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); + struct pvr_srv_winsys_bo *srv_bo; + uint64_t aligment_out; + uint64_t size_out; + VkResult result; + + srv_bo = vk_zalloc(srv_ws->alloc, + sizeof(*srv_bo), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!srv_bo) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + result = pvr_srv_physmem_import_dmabuf(srv_ws->render_fd, + fd, + srv_flags, + &srv_bo->pmr, + &size_out, + &aligment_out); + if (result != VK_SUCCESS) + goto err_vk_free_srv_bo; + + assert(aligment_out == srv_ws->base.page_size); + + srv_bo->base.ws = ws; + srv_bo->base.size = size_out; + srv_bo->base.is_imported = true; + srv_bo->flags = srv_flags; + + p_atomic_set(&srv_bo->ref_count, 1); + + *bo_out = &srv_bo->base; + + return VK_SUCCESS; + +err_vk_free_srv_bo: + vk_free(srv_ws->alloc, srv_bo); + + return result; +} + +void pvr_srv_winsys_buffer_destroy(struct pvr_winsys_bo *bo) +{ + struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo); + + buffer_release(srv_bo); +} + +VkResult pvr_srv_winsys_buffer_get_fd(struct pvr_winsys_bo *bo, + int *const fd_out) +{ + struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo); + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(bo->ws); + int ret; + + if (!srv_bo->is_display_buffer) + return pvr_srv_physmem_export_dmabuf(srv_ws->render_fd, + srv_bo->pmr, + fd_out); + + /* For display buffers, export using saved buffer handle */ + ret = + drmPrimeHandleToFD(srv_ws->master_fd, srv_bo->handle, O_CLOEXEC, fd_out); + if (ret) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + return VK_SUCCESS; +} + +void *pvr_srv_winsys_buffer_map(struct pvr_winsys_bo *bo) +{ + struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo); + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(bo->ws); + const int prot = + (srv_bo->flags & PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE ? PROT_WRITE : 0) | + (srv_bo->flags & PVR_SRV_MEMALLOCFLAG_CPU_READABLE ? PROT_READ : 0); + + /* assert if memory is already mapped */ + assert(!bo->map); + + /* Map the full PMR to CPU space */ + bo->map = mmap(NULL, + bo->size, + prot, + MAP_SHARED, + srv_ws->render_fd, + (off_t)srv_bo->pmr << srv_ws->base.log2_page_size); + if (bo->map == MAP_FAILED) { + bo->map = NULL; + vk_error(NULL, VK_ERROR_MEMORY_MAP_FAILED); + return NULL; + } + + VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, + bo->size, + 0, + srv_bo->flags & + PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC)); + + buffer_acquire(srv_bo); + + return bo->map; +} + +void pvr_srv_winsys_buffer_unmap(struct pvr_winsys_bo *bo) +{ + struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo); + + /* output error if trying to unmap memory that is not previously mapped */ + assert(bo->map); + + /* Unmap the whole PMR from CPU space */ + if (munmap(bo->map, bo->size)) + vk_error(NULL, VK_ERROR_UNKNOWN); + + VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); + + bo->map = NULL; + + buffer_release(srv_bo); +} + +/* This function must be used to allocate inside reserved region and must be + * used internally only. This also means whoever is using it, must know what + * they are doing. + */ +struct pvr_winsys_vma * +pvr_srv_heap_alloc_reserved(struct pvr_winsys_heap *heap, + const pvr_dev_addr_t reserved_dev_addr, + uint64_t size, + uint64_t alignment) +{ + struct pvr_srv_winsys_heap *srv_heap = to_pvr_srv_winsys_heap(heap); + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(heap->ws); + struct pvr_srv_winsys_vma *srv_vma; + VkResult result; + uint64_t addr; + + assert(util_is_power_of_two_nonzero(alignment)); + + /* pvr_srv_winsys_buffer_create() page aligns the size. We must do the same + * here to ensure enough heap space is allocated to be able to map the + * buffer to the GPU. + */ + alignment = MAX2(alignment, heap->ws->page_size); + size = ALIGN_POT(size, alignment); + + srv_vma = vk_alloc(srv_ws->alloc, + sizeof(*srv_vma), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!srv_vma) { + vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + return NULL; + } + + /* Just check address is correct and aligned, locking is not required as + * user is responsible to provide a distinct address. + */ + if (reserved_dev_addr.addr < heap->base_addr.addr || + reserved_dev_addr.addr + size > + heap->base_addr.addr + heap->reserved_size || + reserved_dev_addr.addr & ((srv_ws->base.page_size) - 1)) + goto err_vk_free_srv_vma; + + addr = reserved_dev_addr.addr; + + /* Reserve the virtual range in the MMU and create a mapping structure */ + result = pvr_srv_int_reserve_addr(srv_ws->render_fd, + srv_heap->server_heap, + (pvr_dev_addr_t){ .addr = addr }, + size, + &srv_vma->reservation); + if (result != VK_SUCCESS) + goto err_vk_free_srv_vma; + + srv_vma->base.dev_addr.addr = addr; + srv_vma->base.bo = NULL; + srv_vma->base.heap = heap; + srv_vma->base.size = size; + + p_atomic_inc(&srv_heap->base.ref_count); + + return &srv_vma->base; + +err_vk_free_srv_vma: + vk_free(srv_ws->alloc, srv_vma); + + return NULL; +} + +struct pvr_winsys_vma *pvr_srv_winsys_heap_alloc(struct pvr_winsys_heap *heap, + uint64_t size, + uint64_t alignment) +{ + struct pvr_srv_winsys_heap *const srv_heap = to_pvr_srv_winsys_heap(heap); + struct pvr_srv_winsys *const srv_ws = to_pvr_srv_winsys(heap->ws); + struct pvr_srv_winsys_vma *srv_vma; + VkResult result; + bool ret; + + srv_vma = vk_alloc(srv_ws->alloc, + sizeof(*srv_vma), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!srv_vma) { + vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + return NULL; + } + + ret = pvr_winsys_helper_heap_alloc(heap, size, alignment, &srv_vma->base); + if (!ret) + goto err_pvr_srv_free_vma; + + /* Reserve the virtual range in the MMU and create a mapping structure. */ + result = pvr_srv_int_reserve_addr(srv_ws->render_fd, + srv_heap->server_heap, + srv_vma->base.dev_addr, + srv_vma->base.size, + &srv_vma->reservation); + if (result != VK_SUCCESS) + goto err_pvr_srv_free_allocation; + + return &srv_vma->base; + +err_pvr_srv_free_allocation: + pvr_winsys_helper_heap_free(&srv_vma->base); + +err_pvr_srv_free_vma: + vk_free(srv_ws->alloc, srv_vma); + + return NULL; +} + +void pvr_srv_winsys_heap_free(struct pvr_winsys_vma *vma) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(vma->heap->ws); + struct pvr_srv_winsys_vma *srv_vma = to_pvr_srv_winsys_vma(vma); + + /* A vma with an existing device mapping should not be freed. */ + assert(!srv_vma->base.bo); + + /* Remove mapping handle and underlying reservation. */ + pvr_srv_int_unreserve_addr(srv_ws->render_fd, srv_vma->reservation); + + /* Check if we are dealing with reserved address range. */ + if (vma->dev_addr.addr < + (vma->heap->base_addr.addr + vma->heap->reserved_size)) { + /* For the reserved addresses just decrement the reference count. */ + p_atomic_dec(&vma->heap->ref_count); + } else { + /* Free allocated virtual space. */ + pvr_winsys_helper_heap_free(vma); + } + + vk_free(srv_ws->alloc, srv_vma); +} + +/* * We assume the vma has been allocated with extra space to accommodate the + * offset. + * * The offset passed in is unchanged and can be used to calculate the extra + * size that needs to be mapped and final device virtual address. + */ +pvr_dev_addr_t pvr_srv_winsys_vma_map(struct pvr_winsys_vma *vma, + struct pvr_winsys_bo *bo, + uint64_t offset, + uint64_t size) +{ + struct pvr_srv_winsys_vma *srv_vma = to_pvr_srv_winsys_vma(vma); + struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo); + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(bo->ws); + const uint64_t srv_flags = srv_bo->flags & + PVR_SRV_MEMALLOCFLAGS_VIRTUAL_MASK; + const uint32_t virt_offset = offset & (vma->heap->page_size - 1); + const uint64_t aligned_virt_size = + ALIGN_POT(virt_offset + size, vma->heap->page_size); + VkResult result; + + /* Address should not be mapped already */ + assert(!srv_vma->base.bo); + + if (srv_bo->is_display_buffer) { + struct pvr_srv_winsys_heap *srv_heap = to_pvr_srv_winsys_heap(vma->heap); + + /* In case of display buffers, we only support to map whole PMR */ + if (offset != 0 || bo->size != ALIGN_POT(size, srv_ws->base.page_size) || + vma->size != bo->size) { + vk_error(NULL, VK_ERROR_MEMORY_MAP_FAILED); + return (pvr_dev_addr_t){ .addr = 0UL }; + } + + /* Map the requested pmr */ + result = pvr_srv_int_map_pmr(srv_ws->render_fd, + srv_heap->server_heap, + srv_vma->reservation, + srv_bo->pmr, + srv_flags, + &srv_vma->mapping); + + } else { + const uint32_t phys_page_offset = (offset - virt_offset) >> + srv_ws->base.log2_page_size; + const uint32_t phys_page_count = aligned_virt_size >> + srv_ws->base.log2_page_size; + + /* Check if bo and vma can accommodate the given size and offset */ + if (ALIGN_POT(offset + size, vma->heap->page_size) > bo->size || + aligned_virt_size > vma->size) { + vk_error(NULL, VK_ERROR_MEMORY_MAP_FAILED); + return (pvr_dev_addr_t){ .addr = 0UL }; + } + + /* Map the requested pages */ + result = pvr_srv_int_map_pages(srv_ws->render_fd, + srv_vma->reservation, + srv_bo->pmr, + phys_page_count, + phys_page_offset, + srv_flags, + vma->dev_addr); + } + + if (result != VK_SUCCESS) + return (pvr_dev_addr_t){ .addr = 0UL }; + + buffer_acquire(srv_bo); + + vma->bo = &srv_bo->base; + vma->bo_offset = offset; + vma->mapped_size = aligned_virt_size; + + return (pvr_dev_addr_t){ .addr = vma->dev_addr.addr + virt_offset }; +} + +void pvr_srv_winsys_vma_unmap(struct pvr_winsys_vma *vma) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(vma->heap->ws); + struct pvr_srv_winsys_vma *srv_vma = to_pvr_srv_winsys_vma(vma); + struct pvr_srv_winsys_bo *srv_bo; + + /* Address should be mapped */ + assert(srv_vma->base.bo); + + srv_bo = to_pvr_srv_winsys_bo(srv_vma->base.bo); + + if (srv_bo->is_display_buffer) { + /* Unmap the requested pmr */ + pvr_srv_int_unmap_pmr(srv_ws->render_fd, srv_vma->mapping); + } else { + /* Unmap requested pages */ + pvr_srv_int_unmap_pages(srv_ws->render_fd, + srv_vma->reservation, + vma->dev_addr, + vma->mapped_size >> srv_ws->base.log2_page_size); + } + + buffer_release(srv_bo); + + srv_vma->base.bo = NULL; +} diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.h new file mode 100644 index 00000000000..a924c9a5b64 --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.h @@ -0,0 +1,186 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_SRV_BO_H +#define PVR_SRV_BO_H + +#include + +#include "pvr_private.h" +#include "pvr_srv.h" +#include "pvr_winsys.h" +#include "util/macros.h" + +/******************************************* + MemAlloc flags + *******************************************/ + +/* TODO: remove unused and redundant flags */ +#define PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_OFFSET 26U +#define PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_MASK \ + (0x3ULL << PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_OFFSET) +#define PVR_SRV_MEMALLOCFLAG_CPU_CACHE_CLEAN BITFIELD_BIT(19U) +#define PVR_SRV_MEMALLOCFLAG_KERNEL_CPU_MAPPABLE BITFIELD_BIT(14U) +#define PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC BITFIELD_BIT(31U) +#define PVR_SRV_MEMALLOCFLAG_SVM_ALLOC BITFIELD_BIT(17U) +#define PVR_SRV_MEMALLOCFLAG_POISON_ON_ALLOC BITFIELD_BIT(30U) +#define PVR_SRV_MEMALLOCFLAG_POISON_ON_FREE BITFIELD_BIT(29U) +#define PVR_SRV_MEMALLOCFLAG_GPU_READABLE BITFIELD_BIT(0U) +#define PVR_SRV_MEMALLOCFLAG_GPU_WRITEABLE BITFIELD_BIT(1U) +#define PVR_SRV_MEMALLOCFLAG_GPU_CACHE_MODE_MASK (7ULL << 8U) +#define PVR_SRV_MEMALLOCFLAGS_GPU_MMUFLAGSMASK \ + (PVR_SRV_MEMALLOCFLAG_GPU_READABLE | PVR_SRV_MEMALLOCFLAG_GPU_WRITEABLE | \ + PVR_SRV_MEMALLOCFLAG_GPU_CACHE_MODE_MASK) +#define PVR_SRV_MEMALLOCFLAG_CPU_READABLE BITFIELD_BIT(4U) +#define PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE BITFIELD_BIT(5U) +#define PVR_SRV_MEMALLOCFLAG_CPU_CACHE_MODE_MASK (7ULL << 11U) +#define PVR_SRV_MEMALLOCFLAG_CPU_CACHE_INCOHERENT (3ULL << 11U) +#define PVR_SRV_MEMALLOCFLAGS_CPU_MMUFLAGSMASK \ + (PVR_SRV_MEMALLOCFLAG_CPU_READABLE | PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE | \ + PVR_SRV_MEMALLOCFLAG_CPU_CACHE_MODE_MASK) +#define PVR_SRV_MEMALLOCFLAG_NO_OSPAGES_ON_ALLOC BITFIELD_BIT(15U) +#define PVR_SRV_MEMALLOCFLAG_SPARSE_NO_DUMMY_BACKING BITFIELD_BIT(18U) +#define PVR_SRV_MEMALLOCFLAG_SPARSE_ZERO_BACKING BITFIELD_BIT(20U) +#define PVR_SRV_MEMALLOCFLAG_FW_ALLOC_OSID_MASK (7ULL << 23U) +#define PVR_SRV_MEMALLOCFLAG_VAL_SECURE_BUFFER BITFIELD64_BIT(34U) +#define PVR_SRV_MEMALLOCFLAG_VAL_SHARED_BUFFER BITFIELD64_BIT(35U) +#define PVR_SRV_PHYS_HEAP_HINT_SHIFT (60U) +#define PVR_SRV_PHYS_HEAP_HINT_MASK (0xFULL << PVR_SRV_PHYS_HEAP_HINT_SHIFT) +#define PVR_SRV_MEMALLOCFLAG_GPU_UNCACHED BITFIELD_BIT(8U) +#define PVR_SRV_MEMALLOCFLAG_GPU_CACHE_INCOHERENT (3ULL << 8U) +#define PVR_SRV_MEMALLOCFLAG_CPU_UNCACHED_WC (0ULL << 11U) +#define PVR_SRV_MEMALLOCFLAG_GPU_READ_PERMITTED BITFIELD_BIT(2U) +#define PVR_SRV_MEMALLOCFLAG_GPU_WRITE_PERMITTED BITFIELD_BIT(3U) +#define PVR_SRV_MEMALLOCFLAG_CPU_READ_PERMITTED BITFIELD_BIT(6U) +#define PVR_SRV_MEMALLOCFLAG_CPU_WRITE_PERMITTED BITFIELD_BIT(7U) + +#define PVR_SRV_MEMALLOCFLAGS_PMRFLAGSMASK \ + (PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_MASK | \ + PVR_SRV_MEMALLOCFLAG_CPU_CACHE_CLEAN | \ + PVR_SRV_MEMALLOCFLAG_KERNEL_CPU_MAPPABLE | \ + PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC | PVR_SRV_MEMALLOCFLAG_SVM_ALLOC | \ + PVR_SRV_MEMALLOCFLAG_POISON_ON_ALLOC | \ + PVR_SRV_MEMALLOCFLAG_POISON_ON_FREE | \ + PVR_SRV_MEMALLOCFLAGS_GPU_MMUFLAGSMASK | \ + PVR_SRV_MEMALLOCFLAGS_CPU_MMUFLAGSMASK | \ + PVR_SRV_MEMALLOCFLAG_NO_OSPAGES_ON_ALLOC | \ + PVR_SRV_MEMALLOCFLAG_SPARSE_NO_DUMMY_BACKING | \ + PVR_SRV_MEMALLOCFLAG_SPARSE_ZERO_BACKING | \ + PVR_SRV_MEMALLOCFLAG_FW_ALLOC_OSID_MASK | \ + PVR_SRV_MEMALLOCFLAG_VAL_SECURE_BUFFER | \ + PVR_SRV_MEMALLOCFLAG_VAL_SHARED_BUFFER | PVR_SRV_PHYS_HEAP_HINT_MASK) + +#define PVR_SRV_MEMALLOCFLAGS_PHYSICAL_MASK \ + (PVR_SRV_MEMALLOCFLAGS_CPU_MMUFLAGSMASK | \ + PVR_SRV_MEMALLOCFLAG_GPU_CACHE_MODE_MASK | \ + PVR_SRV_MEMALLOCFLAG_CPU_READ_PERMITTED | \ + PVR_SRV_MEMALLOCFLAG_CPU_WRITE_PERMITTED | \ + PVR_SRV_MEMALLOCFLAG_CPU_CACHE_CLEAN | \ + PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC | \ + PVR_SRV_MEMALLOCFLAG_POISON_ON_ALLOC | \ + PVR_SRV_MEMALLOCFLAG_POISON_ON_FREE | PVR_SRV_PHYS_HEAP_HINT_MASK) + +#define PVR_SRV_MEMALLOCFLAGS_VIRTUAL_MASK \ + (PVR_SRV_MEMALLOCFLAGS_GPU_MMUFLAGSMASK | \ + PVR_SRV_MEMALLOCFLAG_GPU_READ_PERMITTED | \ + PVR_SRV_MEMALLOCFLAG_GPU_WRITE_PERMITTED) + +/* Device specific MMU flags. */ +/*!< Memory that only the PM and Firmware can access */ +#define PM_FW_PROTECT BITFIELD_BIT(0U) + +/* Helper macro for setting device specific MMU flags. */ +#define PVR_SRV_MEMALLOCFLAG_DEVICE_FLAG(n) \ + (((n) << PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_OFFSET) & \ + PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_MASK) + +/******************************************* + struct definitions + *******************************************/ + +struct pvr_srv_winsys_bo { + struct pvr_winsys_bo base; + uint32_t ref_count; + void *pmr; + + bool is_display_buffer; + uint32_t handle; + uint64_t flags; +}; + +struct pvr_srv_winsys_vma { + struct pvr_winsys_vma base; + void *reservation; + + /* Required when mapping whole PMR, used for display buffers mapping. */ + void *mapping; +}; + +/******************************************* + function prototypes + *******************************************/ + +VkResult pvr_srv_winsys_buffer_create(struct pvr_winsys *ws, + uint64_t size, + uint64_t alignment, + enum pvr_winsys_bo_type type, + uint32_t ws_flags, + struct pvr_winsys_bo **const bo_out); +VkResult +pvr_srv_winsys_buffer_create_from_fd(struct pvr_winsys *ws, + int fd, + struct pvr_winsys_bo **const bo_out); +void pvr_srv_winsys_buffer_destroy(struct pvr_winsys_bo *bo); + +VkResult pvr_srv_winsys_buffer_get_fd(struct pvr_winsys_bo *bo, + int *const fd_out); + +void *pvr_srv_winsys_buffer_map(struct pvr_winsys_bo *bo); +void pvr_srv_winsys_buffer_unmap(struct pvr_winsys_bo *bo); + +struct pvr_winsys_vma * +pvr_srv_heap_alloc_reserved(struct pvr_winsys_heap *heap, + const pvr_dev_addr_t reserved_dev_addr, + uint64_t size, + uint64_t alignment); +struct pvr_winsys_vma *pvr_srv_winsys_heap_alloc(struct pvr_winsys_heap *heap, + uint64_t size, + uint64_t alignment); +void pvr_srv_winsys_heap_free(struct pvr_winsys_vma *vma); + +pvr_dev_addr_t pvr_srv_winsys_vma_map(struct pvr_winsys_vma *vma, + struct pvr_winsys_bo *bo, + uint64_t offset, + uint64_t size); +void pvr_srv_winsys_vma_unmap(struct pvr_winsys_vma *vma); + +/******************************************* + helper macros + *******************************************/ + +#define to_pvr_srv_winsys_bo(bo) \ + container_of((bo), struct pvr_srv_winsys_bo, base) +#define to_pvr_srv_winsys_vma(vma) \ + container_of((vma), struct pvr_srv_winsys_vma, base) + +#endif /* PVR_SRV_BO_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.c new file mode 100644 index 00000000000..064cf8f82ee --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.c @@ -0,0 +1,1293 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "pvr_private.h" +#include "pvr_srv.h" +#include "pvr_srv_bridge.h" +#include "util/log.h" +#include "util/macros.h" +#include "vk_log.h" + +#define vk_bridge_err(vk_err, bridge_func, bridge_ret) \ + vk_errorf(NULL, \ + vk_err, \ + "%s failed, PVR_SRV_ERROR: %d, Errno: %s", \ + bridge_func, \ + (bridge_ret).error, \ + strerror(errno)) + +static int pvr_srv_bridge_call(int fd, + uint8_t bridge_id, + uint32_t function_id, + void *input, + uint32_t input_buffer_size, + void *output, + uint32_t output_buffer_size) +{ + struct drm_srvkm_cmd cmd = { + .bridge_id = bridge_id, + .bridge_func_id = function_id, + .in_data_ptr = (uint64_t)(uintptr_t)input, + .out_data_ptr = (uint64_t)(uintptr_t)output, + .in_data_size = input_buffer_size, + .out_data_size = output_buffer_size, + }; + + int ret = drmIoctl(fd, DRM_IOCTL_SRVKM_CMD, &cmd); + if (unlikely(ret)) + return ret; + + VG(VALGRIND_MAKE_MEM_DEFINED(output, output_buffer_size)); + + return 0U; +} + +VkResult pvr_srv_connection_create(int fd, uint64_t *const bvnc_out) +{ + struct pvr_srv_bridge_connect_cmd cmd = { + .flags = PVR_SRV_FLAGS_CLIENT_64BIT_COMPAT, + .build_options = RGX_BUILD_OPTIONS, + .DDK_version = PVR_SRV_VERSION, + .DDK_build = PVR_SRV_VERSION_BUILD, + }; + + /* Initialize ret.error to a default error */ + struct pvr_srv_bridge_connect_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_SRVCORE, + PVR_SRV_BRIDGE_SRVCORE_CONNECT, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_SRVCORE_CONNECT", + ret); + } + + *bvnc_out = ret.bvnc; + + return VK_SUCCESS; +} + +void pvr_srv_connection_destroy(int fd) +{ + /* Initialize ret.error to a default error */ + struct pvr_srv_bridge_disconnect_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_SRVCORE, + PVR_SRV_BRIDGE_SRVCORE_DISCONNECT, + NULL, + 0, + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + vk_bridge_err(VK_ERROR_UNKNOWN, "PVR_SRV_BRIDGE_SRVCORE_DISCONNECT", ret); + } +} + +VkResult pvr_srv_alloc_sync_primitive_block(int fd, + void **const handle_out, + void **const pmr_out, + uint32_t *const size_out, + uint32_t *const addr_out) +{ + /* Initialize ret.error to a default error */ + struct pvr_srv_bridge_alloc_sync_primitive_block_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_SYNC, + PVR_SRV_BRIDGE_SYNC_ALLOCSYNCPRIMITIVEBLOCK, + NULL, + 0, + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_SYNC_ALLOCSYNCPRIMITIVEBLOCK", + ret); + } + + *handle_out = ret.handle; + *pmr_out = ret.pmr; + *size_out = ret.size; + *addr_out = ret.addr; + + return VK_SUCCESS; +} + +void pvr_srv_free_sync_primitive_block(int fd, void *handle) +{ + struct pvr_srv_bridge_free_sync_primitive_block_cmd cmd = { + .handle = handle, + }; + + /* Initialize ret.error to a default error */ + struct pvr_srv_bridge_free_sync_primitive_block_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_SYNC, + PVR_SRV_BRIDGE_SYNC_FREESYNCPRIMITIVEBLOCK, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + vk_bridge_err(VK_ERROR_UNKNOWN, + "PVR_SRV_BRIDGE_SYNC_FREESYNCPRIMITIVEBLOCK", + ret); + } +} + +VkResult pvr_srv_get_heap_count(int fd, uint32_t *const heap_count_out) +{ + struct pvr_srv_heap_count_cmd cmd = { + .heap_config_index = 0, + }; + + struct pvr_srv_heap_count_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_HEAPCFGHEAPCOUNT, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_MM_HEAPCFGHEAPCOUNT", + ret); + } + + *heap_count_out = ret.heap_count; + + return VK_SUCCESS; +} + +VkResult pvr_srv_int_heap_create(int fd, + pvr_dev_addr_t base_address, + uint64_t size, + uint32_t log2_page_size, + void *server_memctx, + void **const server_heap_out) +{ + struct pvr_srv_devmem_int_heap_create_cmd cmd = { + .server_memctx = server_memctx, + .base_addr = base_address, + .size = size, + .log2_page_size = log2_page_size, + }; + + struct pvr_srv_devmem_int_heap_create_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPCREATE, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPCREATE", + ret); + } + + *server_heap_out = ret.server_heap; + + return VK_SUCCESS; +} + +void pvr_srv_int_heap_destroy(int fd, void *server_heap) +{ + struct pvr_srv_devmem_int_heap_destroy_cmd cmd = { + .server_heap = server_heap, + }; + + struct pvr_srv_devmem_int_heap_destroy_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPDESTROY, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPDESTROY", + ret); + } +} + +/* This bridge function allows to independently query heap name and heap + * details, i-e buffer/base_address/size/reserved_size/log2_page_size pointers + * are allowed to be NULL. + */ +VkResult pvr_srv_get_heap_details(int fd, + uint32_t heap_index, + uint32_t buffer_size, + char *const buffer_out, + pvr_dev_addr_t *const base_address_out, + uint64_t *const size_out, + uint64_t *const reserved_size_out, + uint32_t *const log2_page_size_out) +{ + struct pvr_srv_heap_cfg_details_cmd cmd = { + .heap_config_index = 0, + .heap_index = heap_index, + .buffer_size = buffer_size, + .buffer = buffer_out, + }; + + struct pvr_srv_heap_cfg_details_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + .buffer = buffer_out, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_HEAPCFGHEAPDETAILS, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_MM_HEAPCFGHEAPDETAILS", + ret); + } + + VG(VALGRIND_MAKE_MEM_DEFINED(buffer_out, buffer_size)); + + if (base_address_out) + *base_address_out = ret.base_addr; + + if (size_out) + *size_out = ret.size; + + if (reserved_size_out) + *reserved_size_out = ret.reserved_size; + + if (log2_page_size_out) + *log2_page_size_out = ret.log2_page_size; + + return VK_SUCCESS; +} + +void pvr_srv_int_ctx_destroy(int fd, void *server_memctx) +{ + struct pvr_srv_devmem_int_ctx_destroy_cmd cmd = { + .server_memctx = server_memctx, + }; + + struct pvr_srv_devmem_int_ctx_destroy_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_DEVMEMINTCTXDESTROY, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_MM_DEVMEMINTCTXDESTROY", + ret); + } +} + +VkResult pvr_srv_int_ctx_create(int fd, + void **const server_memctx_out, + void **const server_memctx_data_out) +{ + struct pvr_srv_devmem_int_ctx_create_cmd cmd = { + .kernel_memory_ctx = false, + }; + + struct pvr_srv_devmem_int_ctx_create_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_DEVMEMINTCTXCREATE, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_MM_DEVMEMINTCTXCREATE", + ret); + } + + *server_memctx_out = ret.server_memctx; + *server_memctx_data_out = ret.server_memctx_data; + + return VK_SUCCESS; +} + +VkResult pvr_srv_int_reserve_addr(int fd, + void *server_heap, + pvr_dev_addr_t addr, + uint64_t size, + void **const reservation_out) +{ + struct pvr_srv_devmem_int_reserve_range_cmd cmd = { + .server_heap = server_heap, + .addr = addr, + .size = size, + }; + + struct pvr_srv_devmem_int_reserve_range_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_DEVMEMINTRESERVERANGE, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_MM_DEVMEMINTRESERVERANGE", + ret); + } + + *reservation_out = ret.reservation; + + return VK_SUCCESS; +} + +void pvr_srv_int_unreserve_addr(int fd, void *reservation) +{ + struct pvr_srv_bridge_in_devmem_int_unreserve_range_cmd cmd = { + .reservation = reservation, + }; + + struct pvr_srv_bridge_in_devmem_int_unreserve_range_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_DEVMEMINTUNRESERVERANGE, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_MM_DEVMEMINTUNRESERVERANGE", + ret); + } +} + +VkResult pvr_srv_alloc_pmr(int fd, + uint64_t size, + uint64_t block_size, + uint32_t phy_blocks, + uint32_t virt_blocks, + uint32_t log2_page_size, + uint64_t flags, + uint32_t pid, + void **const pmr_out) +{ + const char *annotation = "VK PHYSICAL ALLOCATION"; + const uint32_t annotation_size = + strnlen(annotation, DEVMEM_ANNOTATION_MAX_LEN - 1) + 1; + uint32_t mapping_table = 0; + + struct pvr_srv_physmem_new_ram_backed_locked_pmr_cmd cmd = { + .size = size, + .block_size = block_size, + .phy_blocks = phy_blocks, + .virt_blocks = virt_blocks, + .mapping_table = &mapping_table, + .log2_page_size = log2_page_size, + .flags = flags, + .annotation_size = annotation_size, + .annotation = annotation, + .pid = pid, + .pdump_flags = 0x00000000U, + }; + + struct pvr_srv_physmem_new_ram_backed_locked_pmr_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_PHYSMEMNEWRAMBACKEDLOCKEDPMR, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_MEMORY_MAP_FAILED, + "PVR_SRV_BRIDGE_MM_PHYSMEMNEWRAMBACKEDLOCKEDPMR", + ret); + } + + *pmr_out = ret.pmr; + + return VK_SUCCESS; +} + +void pvr_srv_free_pmr(int fd, void *pmr) +{ + struct pvr_srv_pmr_unref_unlock_pmr_cmd cmd = { + .pmr = pmr, + }; + + struct pvr_srv_pmr_unref_unlock_pmr_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_PMRUNREFUNLOCKPMR, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + vk_bridge_err(VK_ERROR_UNKNOWN, + "PVR_SRV_BRIDGE_MM_PMRUNREFUNLOCKPMR", + ret); + } +} + +VkResult pvr_srv_int_map_pages(int fd, + void *reservation, + void *pmr, + uint32_t page_count, + uint32_t page_offset, + uint64_t flags, + pvr_dev_addr_t addr) +{ + struct pvr_srv_devmem_int_map_pages_cmd cmd = { + .reservation = reservation, + .pmr = pmr, + .page_count = page_count, + .page_offset = page_offset, + .flags = flags, + .addr = addr, + }; + + struct pvr_srv_devmem_int_map_pages_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPAGES, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_MEMORY_MAP_FAILED, + "PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPAGES", + ret); + } + + return VK_SUCCESS; +} + +void pvr_srv_int_unmap_pages(int fd, + void *reservation, + pvr_dev_addr_t dev_addr, + uint32_t page_count) +{ + struct pvr_srv_devmem_int_unmap_pages_cmd cmd = { + .reservation = reservation, + .dev_addr = dev_addr, + .page_count = page_count, + }; + + struct pvr_srv_devmem_int_unmap_pages_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPAGES, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + vk_bridge_err(VK_ERROR_UNKNOWN, + "PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPAGES", + ret); + } +} + +VkResult pvr_srv_int_map_pmr(int fd, + void *server_heap, + void *reservation, + void *pmr, + uint64_t flags, + void **const mapping_out) +{ + struct pvr_srv_devmem_int_map_pmr_cmd cmd = { + .server_heap = server_heap, + .reservation = reservation, + .pmr = pmr, + .flags = flags, + }; + + struct pvr_srv_devmem_int_map_pmr_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPMR, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_MEMORY_MAP_FAILED, + "PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPMR", + ret); + } + + *mapping_out = ret.mapping; + + return VK_SUCCESS; +} + +void pvr_srv_int_unmap_pmr(int fd, void *mapping) +{ + struct pvr_srv_devmem_int_unmap_pmr_cmd cmd = { + .mapping = mapping, + }; + + struct pvr_srv_devmem_int_unmap_pmr_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_MM, + PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPMR, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + vk_bridge_err(VK_ERROR_UNKNOWN, + "PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPMR", + ret); + } +} + +VkResult pvr_srv_physmem_import_dmabuf(int fd, + int buffer_fd, + uint64_t flags, + void **const pmr_out, + uint64_t *const size_out, + uint64_t *const align_out) +{ + struct pvr_srv_phys_mem_import_dmabuf_cmd cmd = { + .buffer_fd = buffer_fd, + .flags = flags, + .name_size = 0, + .name = NULL, + }; + + struct pvr_srv_phys_mem_import_dmabuf_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_DMABUF, + PVR_SRV_BRIDGE_DMABUF_PHYSMEMIMPORTDMABUF, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_INVALID_EXTERNAL_HANDLE, + "PVR_SRV_BRIDGE_DMABUF_PHYSMEMIMPORTDMABUF", + ret); + } + + *pmr_out = ret.pmr; + *size_out = ret.size; + *align_out = ret.align; + + return VK_SUCCESS; +} + +VkResult pvr_srv_physmem_export_dmabuf(int fd, void *pmr, int *const fd_out) +{ + struct pvr_srv_phys_mem_export_dmabuf_cmd cmd = { + .pmr = pmr, + }; + + struct pvr_srv_phys_mem_export_dmabuf_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_DMABUF, + PVR_SRV_BRIDGE_DMABUF_PHYSMEMEXPORTDMABUF, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_OUT_OF_HOST_MEMORY, + "PVR_SRV_BRIDGE_DMABUF_PHYSMEMEXPORTDMABUF", + ret); + } + + *fd_out = ret.fd; + + return VK_SUCCESS; +} + +VkResult +pvr_srv_rgx_create_compute_context(int fd, + uint32_t priority, + uint32_t reset_framework_cmd_size, + uint8_t *reset_framework_cmd, + void *priv_data, + uint32_t static_compute_context_state_size, + uint8_t *static_compute_context_state, + uint32_t packed_ccb_size, + uint32_t context_flags, + uint64_t robustness_address, + uint32_t max_deadline_ms, + void **const compute_context_out) +{ + struct pvr_srv_rgx_create_compute_context_cmd cmd = { + .priority = priority, + .reset_framework_cmd_size = reset_framework_cmd_size, + .reset_framework_cmd = reset_framework_cmd, + .priv_data = priv_data, + .static_compute_context_state_size = static_compute_context_state_size, + .static_compute_context_state = static_compute_context_state, + .packed_ccb_size = packed_ccb_size, + .context_flags = context_flags, + .robustness_address = robustness_address, + .max_deadline_ms = max_deadline_ms, + }; + + struct pvr_srv_rgx_create_compute_context_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_RGXCMP, + PVR_SRV_BRIDGE_RGXCMP_RGXCREATECOMPUTECONTEXT, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_RGXCMP_RGXCREATECOMPUTECONTEXT", + ret); + } + + *compute_context_out = ret.compute_context; + + return VK_SUCCESS; +} + +void pvr_srv_rgx_destroy_compute_context(int fd, void *compute_context) +{ + struct pvr_srv_rgx_destroy_compute_context_cmd cmd = { + .compute_context = compute_context, + }; + + struct pvr_srv_rgx_destroy_compute_context_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_RGXCMP, + PVR_SRV_BRIDGE_RGXCMP_RGXDESTROYCOMPUTECONTEXT, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + vk_bridge_err(VK_ERROR_UNKNOWN, + "PVR_SRV_BRIDGE_RGXCMP_RGXDESTROYCOMPUTECONTEXT", + ret); + } +} + +VkResult pvr_srv_rgx_kick_compute2(int fd, + void *compute_context, + uint32_t client_cache_op_seq_num, + uint32_t client_update_count, + void **client_update_ufo_sync_prim_block, + uint32_t *client_update_offset, + uint32_t *client_update_value, + int32_t check_fence, + int32_t update_timeline, + uint32_t cmd_size, + uint8_t *cdm_cmd, + uint32_t ext_job_ref, + uint32_t num_work_groups, + uint32_t num_work_items, + uint32_t pdump_flags, + uint64_t max_deadline_us, + char *update_fence_name, + int32_t *const update_fence_out) +{ + struct pvr_srv_rgx_kick_cdm2_cmd cmd = { + .max_deadline_us = max_deadline_us, + .compute_context = compute_context, + .client_update_offset = client_update_offset, + .client_update_value = client_update_value, + .cdm_cmd = cdm_cmd, + .update_fence_name = update_fence_name, + .client_update_ufo_sync_prim_block = client_update_ufo_sync_prim_block, + .check_fence = check_fence, + .update_timeline = update_timeline, + .client_cache_op_seq_num = client_cache_op_seq_num, + .client_update_count = client_update_count, + .cmd_size = cmd_size, + .ext_job_ref = ext_job_ref, + .num_work_groups = num_work_groups, + .num_work_items = num_work_items, + .pdump_flags = pdump_flags, + }; + + struct pvr_srv_rgx_kick_cdm2_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_RGXCMP, + PVR_SRV_BRIDGE_RGXCMP_RGXKICKCDM2, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "PVR_SRV_BRIDGE_RGXCMP_RGXKICKCDM2", + ret); + } + + *update_fence_out = ret.update_fence; + + return VK_SUCCESS; +} + +VkResult +pvr_srv_rgx_create_hwrt_dataset(int fd, + pvr_dev_addr_t pm_mlist_dev_addr0, + pvr_dev_addr_t pm_mlist_dev_addr1, + pvr_dev_addr_t tail_ptrs_dev_addr, + pvr_dev_addr_t macrotile_array_dev_addr0, + pvr_dev_addr_t macrotile_array_dev_addr1, + pvr_dev_addr_t rtc_dev_addr, + pvr_dev_addr_t rgn_header_dev_addr0, + pvr_dev_addr_t rgn_header_dev_addr1, + pvr_dev_addr_t vheap_table_dev_add, + uint64_t flipped_multi_sample_ctl, + uint64_t multi_sample_ctl, + uint64_t rgn_header_size, + void **free_lists, + uint32_t mtile_stride, + uint32_t ppp_screen, + uint32_t te_aa, + uint32_t te_mtile1, + uint32_t te_mtile2, + uint32_t te_screen, + uint32_t tpc_size, + uint32_t tpc_stride, + uint32_t isp_merge_lower_x, + uint32_t isp_merge_lower_y, + uint32_t isp_merge_scale_x, + uint32_t isp_merge_scale_y, + uint32_t isp_merge_upper_x, + uint32_t isp_merge_upper_y, + uint32_t isp_mtile_size, + uint16_t max_rts, + void **const hwrt_dataset0_out, + void **const hwrt_dataset1_out) +{ + struct pvr_srv_rgx_create_hwrt_dataset_cmd cmd = { + .pm_mlist_dev_addr0 = pm_mlist_dev_addr0, + .pm_mlist_dev_addr1 = pm_mlist_dev_addr1, + .tail_ptrs_dev_addr = tail_ptrs_dev_addr, + .macrotile_array_dev_addr0 = macrotile_array_dev_addr0, + .macrotile_array_dev_addr1 = macrotile_array_dev_addr1, + .rtc_dev_addr = rtc_dev_addr, + .rgn_header_dev_addr0 = rgn_header_dev_addr0, + .rgn_header_dev_addr1 = rgn_header_dev_addr1, + .vheap_table_dev_add = vheap_table_dev_add, + .flipped_multi_sample_ctl = flipped_multi_sample_ctl, + .multi_sample_ctl = multi_sample_ctl, + .rgn_header_size = rgn_header_size, + .free_lists = free_lists, + .mtile_stride = mtile_stride, + .ppp_screen = ppp_screen, + .te_aa = te_aa, + .te_mtile1 = te_mtile1, + .te_mtile2 = te_mtile2, + .te_screen = te_screen, + .tpc_size = tpc_size, + .tpc_stride = tpc_stride, + .isp_merge_lower_x = isp_merge_lower_x, + .isp_merge_lower_y = isp_merge_lower_y, + .isp_merge_scale_x = isp_merge_scale_x, + .isp_merge_scale_y = isp_merge_scale_y, + .isp_merge_upper_x = isp_merge_upper_x, + .isp_merge_upper_y = isp_merge_upper_y, + .isp_mtile_size = isp_mtile_size, + .max_rts = max_rts, + }; + + struct pvr_srv_rgx_create_hwrt_dataset_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_RGXTA3D, + PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEHWRTDATASET, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEHWRTDATASET", + ret); + } + + *hwrt_dataset0_out = ret.hwrt_dataset0; + *hwrt_dataset1_out = ret.hwrt_dataset1; + + return VK_SUCCESS; +} + +void pvr_srv_rgx_destroy_hwrt_dataset(int fd, void *hwrt_dataset) +{ + struct pvr_srv_rgx_destroy_hwrt_dataset_cmd cmd = { + .hwrt_dataset = hwrt_dataset, + }; + + struct pvr_srv_rgx_destroy_hwrt_dataset_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_RGXTA3D, + PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYHWRTDATASET, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYHWRTDATASET", + ret); + } +} + +VkResult pvr_srv_rgx_create_free_list(int fd, + void *mem_ctx_priv_data, + uint32_t max_free_list_pages, + uint32_t init_free_list_pages, + uint32_t grow_free_list_pages, + uint32_t grow_param_threshold, + void *global_free_list, + enum pvr_srv_bool free_list_check, + pvr_dev_addr_t free_list_dev_addr, + void *free_list_pmr, + uint64_t pmr_offset, + void **const cleanup_cookie_out) +{ + struct pvr_srv_rgx_create_free_list_cmd cmd = { + .free_list_dev_addr = free_list_dev_addr, + .pmr_offset = pmr_offset, + .mem_ctx_priv_data = mem_ctx_priv_data, + .free_list_pmr = free_list_pmr, + .global_free_list = global_free_list, + .free_list_check = free_list_check, + .grow_free_list_pages = grow_free_list_pages, + .grow_param_threshold = grow_param_threshold, + .init_free_list_pages = init_free_list_pages, + .max_free_list_pages = max_free_list_pages, + }; + + struct pvr_srv_rgx_create_free_list_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_RGXTA3D, + PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEFREELIST, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEFREELIST", + ret); + } + + *cleanup_cookie_out = ret.cleanup_cookie; + + return VK_SUCCESS; +} + +void pvr_srv_rgx_destroy_free_list(int fd, void *cleanup_cookie) +{ + struct pvr_srv_rgx_destroy_free_list_cmd cmd = { + .cleanup_cookie = cleanup_cookie, + }; + + struct pvr_srv_rgx_destroy_free_list_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + /* FIXME: Do we want to propagate the retry error up the call chain so that + * we can do something better than busy wait or is the expectation that we + * should never get into this situation because the driver doesn't attempt + * to free any resources while they're in use? + */ + do { + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_RGXTA3D, + PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYFREELIST, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + } while (result == PVR_SRV_ERROR_RETRY); + + if (result || ret.error != PVR_SRV_OK) { + vk_bridge_err(VK_ERROR_UNKNOWN, + "PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYFREELIST", + ret); + } +} + +VkResult +pvr_srv_rgx_create_render_context(int fd, + uint32_t priority, + pvr_dev_addr_t vdm_callstack_addr, + uint32_t reset_framework_cmd_size, + uint8_t *reset_framework_cmd, + void *priv_data, + uint32_t static_render_context_state_size, + uint8_t *static_render_context_state, + uint32_t packed_ccb_size, + uint32_t context_flags, + uint64_t robustness_address, + uint32_t max_geom_deadline_ms, + uint32_t max_frag_deadline_ms, + void **const render_context_out) +{ + struct pvr_srv_rgx_create_render_context_cmd cmd = { + .priority = priority, + .vdm_callstack_addr = vdm_callstack_addr, + .reset_framework_cmd_size = reset_framework_cmd_size, + .reset_framework_cmd = reset_framework_cmd, + .priv_data = priv_data, + .static_render_context_state_size = static_render_context_state_size, + .static_render_context_state = static_render_context_state, + .packed_ccb_size = packed_ccb_size, + .context_flags = context_flags, + .robustness_address = robustness_address, + .max_ta_deadline_ms = max_geom_deadline_ms, + .max_3d_deadline_ms = max_frag_deadline_ms, + }; + + struct pvr_srv_rgx_create_render_context_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_RGXTA3D, + PVR_SRV_BRIDGE_RGXTA3D_RGXCREATERENDERCONTEXT, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED, + "PVR_SRV_BRIDGE_RGXTA3D_RGXCREATERENDERCONTEXT", + ret); + } + + *render_context_out = ret.render_context; + + return VK_SUCCESS; +} + +void pvr_srv_rgx_destroy_render_context(int fd, void *render_context) +{ + struct pvr_srv_rgx_destroy_render_context_cmd cmd = { + .render_context = render_context, + }; + + struct pvr_srv_rgx_destroy_render_context_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_RGXTA3D, + PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYRENDERCONTEXT, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + vk_bridge_err(VK_ERROR_UNKNOWN, + "PVR_SRV_BRIDGE_RGXTA3D_RGXDESTORYRENDERCONTEXT", + ret); + } +} + +VkResult pvr_srv_rgx_kick_render2(int fd, + void *render_ctx, + uint32_t client_cache_op_seq_num, + uint32_t client_geom_fence_count, + void **client_geom_fence_sync_prim_block, + uint32_t *client_geom_fence_sync_offset, + uint32_t *client_geom_fence_value, + uint32_t client_geom_update_count, + void **client_geom_update_sync_prim_block, + uint32_t *client_geom_update_sync_offset, + uint32_t *client_geom_update_value, + uint32_t client_frag_update_count, + void **client_frag_update_sync_prim_block, + uint32_t *client_frag_update_sync_offset, + uint32_t *client_frag_update_value, + void *pr_fence_ufo_sync_prim_block, + uint32_t client_pr_fence_ufo_sync_offset, + uint32_t client_pr_fence_value, + int32_t check_fence, + int32_t update_timeline, + int32_t *const update_fence_out, + char *update_fence_name, + int32_t check_fence_frag, + int32_t update_timeline_frag, + int32_t *const update_fence_frag_out, + char *update_fence_name_frag, + uint32_t cmd_geom_size, + uint8_t *cmd_geom, + uint32_t cmd_frag_pr_size, + uint8_t *cmd_frag_pr, + uint32_t cmd_frag_size, + uint8_t *cmd_frag, + uint32_t ext_job_ref, + bool kick_geom, + bool kick_pr, + bool kick_frag, + bool abort, + uint32_t pdump_flags, + void *hw_rt_dataset, + void *zs_buffer, + void *msaa_scratch_buffer, + uint32_t sync_pmr_count, + uint32_t *sync_pmr_flags, + void **sync_pmrs, + uint32_t render_target_size, + uint32_t num_draw_calls, + uint32_t num_indices, + uint32_t num_mrts, + uint64_t deadline) +{ + struct pvr_srv_rgx_kick_ta3d2_cmd cmd = { + .deadline = deadline, + .hw_rt_dataset = hw_rt_dataset, + .msaa_scratch_buffer = msaa_scratch_buffer, + .pr_fence_ufo_sync_prim_block = pr_fence_ufo_sync_prim_block, + .render_ctx = render_ctx, + .zs_buffer = zs_buffer, + .client_3d_update_sync_offset = client_frag_update_sync_offset, + .client_3d_update_value = client_frag_update_value, + .client_ta_fence_sync_offset = client_geom_fence_sync_offset, + .client_ta_fence_value = client_geom_fence_value, + .client_ta_update_sync_offset = client_geom_update_sync_offset, + .client_ta_update_value = client_geom_update_value, + .sync_pmr_flags = sync_pmr_flags, + .cmd_3d = cmd_frag, + .cmd_3d_pr = cmd_frag_pr, + .cmd_ta = cmd_geom, + .update_fence_name = update_fence_name, + .update_fence_name_3d = update_fence_name_frag, + .client_3d_update_sync_prim_block = client_frag_update_sync_prim_block, + .client_ta_fence_sync_prim_block = client_geom_fence_sync_prim_block, + .client_ta_update_sync_prim_block = client_geom_update_sync_prim_block, + .sync_pmrs = sync_pmrs, + .abort = abort, + .kick_3d = kick_frag, + .kick_pr = kick_pr, + .kick_ta = kick_geom, + .check_fence = check_fence, + .check_fence_3d = check_fence_frag, + .update_timeline = update_timeline, + .update_timeline_3d = update_timeline_frag, + .cmd_3d_size = cmd_frag_size, + .cmd_3d_pr_size = cmd_frag_pr_size, + .client_3d_update_count = client_frag_update_count, + .client_cache_op_seq_num = client_cache_op_seq_num, + .client_ta_fence_count = client_geom_fence_count, + .client_ta_update_count = client_geom_update_count, + .ext_job_ref = ext_job_ref, + .client_pr_fence_ufo_sync_offset = client_pr_fence_ufo_sync_offset, + .client_pr_fence_value = client_pr_fence_value, + .num_draw_calls = num_draw_calls, + .num_indices = num_indices, + .num_mrts = num_mrts, + .pdump_flags = pdump_flags, + .render_target_size = render_target_size, + .sync_pmr_count = sync_pmr_count, + .cmd_ta_size = cmd_geom_size, + }; + + struct pvr_srv_rgx_kick_ta3d2_ret ret = { + .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED, + .update_fence = -1, + .update_fence_3d = -1, + }; + + int result; + + result = pvr_srv_bridge_call(fd, + PVR_SRV_BRIDGE_RGXTA3D, + PVR_SRV_BRIDGE_RGXTA3D_RGXKICKTA3D2, + &cmd, + sizeof(cmd), + &ret, + sizeof(ret)); + if (result || ret.error != PVR_SRV_OK) { + /* There is no 'retry' VkResult, so treat it as VK_NOT_READY instead. */ + if (result == PVR_SRV_ERROR_RETRY) + return VK_NOT_READY; + + return vk_bridge_err(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "PVR_SRV_BRIDGE_RGXTA3D_RGXKICKTA3D2", + ret); + } + + *update_fence_out = ret.update_fence; + *update_fence_frag_out = ret.update_fence_3d; + + return VK_SUCCESS; +} diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.h new file mode 100644 index 00000000000..fa829888759 --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.h @@ -0,0 +1,943 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_SRV_BRIDGE_H +#define PVR_SRV_BRIDGE_H + +#include +#include + +#include "pvr_private.h" +#include "pvr_srv.h" +#include "pvr_winsys.h" +#include "util/macros.h" + +/****************************************************************************** + Services bridges + ******************************************************************************/ + +#define PVR_SRV_BRIDGE_SRVCORE 1UL + +#define PVR_SRV_BRIDGE_SRVCORE_CONNECT 0UL +#define PVR_SRV_BRIDGE_SRVCORE_DISCONNECT 1UL + +#define PVR_SRV_BRIDGE_SYNC 2UL + +#define PVR_SRV_BRIDGE_SYNC_ALLOCSYNCPRIMITIVEBLOCK 0UL +#define PVR_SRV_BRIDGE_SYNC_FREESYNCPRIMITIVEBLOCK 1UL + +#define PVR_SRV_BRIDGE_MM 6UL + +#define PVR_SRV_BRIDGE_MM_PMRUNREFUNLOCKPMR 8UL +#define PVR_SRV_BRIDGE_MM_PHYSMEMNEWRAMBACKEDLOCKEDPMR 10UL +#define PVR_SRV_BRIDGE_MM_DEVMEMINTCTXCREATE 15UL +#define PVR_SRV_BRIDGE_MM_DEVMEMINTCTXDESTROY 16UL +#define PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPCREATE 17UL +#define PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPDESTROY 18UL +#define PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPMR 19UL +#define PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPMR 20UL +#define PVR_SRV_BRIDGE_MM_DEVMEMINTRESERVERANGE 21UL +#define PVR_SRV_BRIDGE_MM_DEVMEMINTUNRESERVERANGE 22UL +#define PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPAGES 24UL +#define PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPAGES 25UL +#define PVR_SRV_BRIDGE_MM_HEAPCFGHEAPCOUNT 30UL +#define PVR_SRV_BRIDGE_MM_HEAPCFGHEAPDETAILS 32UL + +#define PVR_SRV_BRIDGE_DMABUF 11UL + +#define PVR_SRV_BRIDGE_DMABUF_PHYSMEMIMPORTDMABUF 0UL +#define PVR_SRV_BRIDGE_DMABUF_PHYSMEMEXPORTDMABUF 1UL + +#define PVR_SRV_BRIDGE_RGXCMP 129UL + +#define PVR_SRV_BRIDGE_RGXCMP_RGXCREATECOMPUTECONTEXT 0UL +#define PVR_SRV_BRIDGE_RGXCMP_RGXDESTROYCOMPUTECONTEXT 1UL +#define PVR_SRV_BRIDGE_RGXCMP_RGXKICKCDM2 5UL + +#define PVR_SRV_BRIDGE_RGXTA3D 130UL + +#define PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEHWRTDATASET 0UL +#define PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYHWRTDATASET 1UL +#define PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEFREELIST 6UL +#define PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYFREELIST 7UL +#define PVR_SRV_BRIDGE_RGXTA3D_RGXCREATERENDERCONTEXT 8UL +#define PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYRENDERCONTEXT 9UL +#define PVR_SRV_BRIDGE_RGXTA3D_RGXKICKTA3D2 12UL + +/****************************************************************************** + DRM Services specific defines + ******************************************************************************/ +/* DRM command numbers, relative to DRM_COMMAND_BASE. + * These defines must be prefixed with "DRM_". + */ +#define DRM_SRVKM_CMD 0U /* Used for Services ioctls */ + +/* These defines must be prefixed with "DRM_IOCTL_". */ +#define DRM_IOCTL_SRVKM_CMD \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_SRVKM_CMD, struct drm_srvkm_cmd) + +/****************************************************************************** + Misc defines + ******************************************************************************/ + +#define SUPPORT_RGX_SET_OFFSET BITFIELD_BIT(4U) +#define DEBUG_SET_OFFSET BITFIELD_BIT(10U) +#define SUPPORT_BUFFER_SYNC_SET_OFFSET BITFIELD_BIT(11U) +#define OPTIONS_BIT31 BITFIELD_BIT(31U) + +#define RGX_BUILD_OPTIONS \ + (SUPPORT_RGX_SET_OFFSET | DEBUG_SET_OFFSET | \ + SUPPORT_BUFFER_SYNC_SET_OFFSET | OPTIONS_BIT31) + +#define PVR_SRV_VERSION_MAJ 1U +#define PVR_SRV_VERSION_MIN 14U + +#define PVR_SRV_VERSION \ + (((uint32_t)((uint32_t)(PVR_SRV_VERSION_MAJ)&0xFFFFU) << 16U) | \ + (((PVR_SRV_VERSION_MIN)&0xFFFFU) << 0U)) + +#define PVR_SRV_VERSION_BUILD 5843584 + +/*! This flags gets set if the client is 64 Bit compatible. */ +#define PVR_SRV_FLAGS_CLIENT_64BIT_COMPAT BITFIELD_BIT(5U) + +#define DEVMEM_ANNOTATION_MAX_LEN 64U + +#define PVR_SRV_SYNC_MAX 12U + +#define PVR_BUFFER_FLAG_READ BITFIELD_BIT(0U) +#define PVR_BUFFER_FLAG_WRITE BITFIELD_BIT(1U) + +/****************************************************************************** + Services Boolean + ******************************************************************************/ + +enum pvr_srv_bool { + PVR_SRV_FALSE = 0, + PVR_SRV_TRUE = 1, + PVR_SRV_FORCE_ALIGN = 0x7fffffff +}; + +/****************************************************************************** + Service Error codes + ******************************************************************************/ + +enum pvr_srv_error { + PVR_SRV_OK, + PVR_SRV_ERROR_RETRY = 25, + PVR_SRV_ERROR_BRIDGE_CALL_FAILED = 37, + PVR_SRV_ERROR_FORCE_I32 = 0x7fffffff +}; + +/****************************************************************************** + PVR_SRV_BRIDGE_SRVCORE_CONNECT structs + ******************************************************************************/ + +struct pvr_srv_bridge_connect_cmd { + uint32_t build_options; + uint32_t DDK_build; + uint32_t DDK_version; + uint32_t flags; +} PACKED; + +struct pvr_srv_bridge_connect_ret { + uint64_t bvnc; + enum pvr_srv_error error; + uint32_t capability_flags; + uint8_t kernel_arch; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_SRVCORE_DISCONNECT struct + ******************************************************************************/ + +struct pvr_srv_bridge_disconnect_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_SYNC_ALLOCSYNCPRIMITIVEBLOCK struct + ******************************************************************************/ + +struct pvr_srv_bridge_alloc_sync_primitive_block_ret { + void *handle; + void *pmr; + enum pvr_srv_error error; + uint32_t size; + uint32_t addr; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_SYNC_FREESYNCPRIMITIVEBLOCK structs + ******************************************************************************/ + +struct pvr_srv_bridge_free_sync_primitive_block_cmd { + void *handle; +} PACKED; + +struct pvr_srv_bridge_free_sync_primitive_block_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_DEVMEMINTCTXCREATE structs + ******************************************************************************/ + +struct pvr_srv_devmem_int_ctx_create_cmd { + uint32_t kernel_memory_ctx; +} PACKED; + +struct pvr_srv_devmem_int_ctx_create_ret { + void *server_memctx; + void *server_memctx_data; + enum pvr_srv_error error; + uint32_t cpu_cache_line_size; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_DEVMEMINTCTXDESTROY structs + ******************************************************************************/ + +struct pvr_srv_devmem_int_ctx_destroy_cmd { + void *server_memctx; +} PACKED; + +struct pvr_srv_devmem_int_ctx_destroy_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_HEAPCFGHEAPCOUNT structs + ******************************************************************************/ + +struct pvr_srv_heap_count_cmd { + uint32_t heap_config_index; +} PACKED; + +struct pvr_srv_heap_count_ret { + enum pvr_srv_error error; + uint32_t heap_count; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_HEAPCFGHEAPDETAILS structs + ******************************************************************************/ + +struct pvr_srv_heap_cfg_details_cmd { + char *buffer; + uint32_t heap_config_index; + uint32_t heap_index; + uint32_t buffer_size; +} PACKED; + +struct pvr_srv_heap_cfg_details_ret { + pvr_dev_addr_t base_addr; + uint64_t size; + uint64_t reserved_size; + char *buffer; + enum pvr_srv_error error; + uint32_t log2_page_size; + uint32_t log2_alignment; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPCREATE structs + ******************************************************************************/ + +struct pvr_srv_devmem_int_heap_create_cmd { + pvr_dev_addr_t base_addr; + uint64_t size; + void *server_memctx; + uint32_t log2_page_size; +} PACKED; + +struct pvr_srv_devmem_int_heap_create_ret { + void *server_heap; + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPDESTROY structs + ******************************************************************************/ + +struct pvr_srv_devmem_int_heap_destroy_cmd { + void *server_heap; +} PACKED; + +struct pvr_srv_devmem_int_heap_destroy_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_DEVMEMINTRESERVERANGE structs + ******************************************************************************/ + +struct pvr_srv_devmem_int_reserve_range_cmd { + pvr_dev_addr_t addr; + uint64_t size; + void *server_heap; +} PACKED; + +struct pvr_srv_devmem_int_reserve_range_ret { + void *reservation; + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_DEVMEMINTUNRESERVERANGE structs + ******************************************************************************/ + +struct pvr_srv_bridge_in_devmem_int_unreserve_range_cmd { + void *reservation; +} PACKED; + +struct pvr_srv_bridge_in_devmem_int_unreserve_range_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_PHYSMEMNEWRAMBACKEDLOCKEDPMR structs + ******************************************************************************/ + +struct pvr_srv_physmem_new_ram_backed_locked_pmr_cmd { + uint64_t block_size; + uint64_t size; + uint32_t *mapping_table; + const char *annotation; + uint32_t annotation_size; + uint32_t log2_page_size; + uint32_t phy_blocks; + uint32_t virt_blocks; + uint32_t pdump_flags; + uint32_t pid; + uint64_t flags; +} PACKED; + +struct pvr_srv_physmem_new_ram_backed_locked_pmr_ret { + void *pmr; + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_PMRUNREFUNLOCKPMR structs + ******************************************************************************/ + +struct pvr_srv_pmr_unref_unlock_pmr_cmd { + void *pmr; +} PACKED; + +struct pvr_srv_pmr_unref_unlock_pmr_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPAGES structs + ******************************************************************************/ + +struct pvr_srv_devmem_int_map_pages_cmd { + pvr_dev_addr_t addr; + void *pmr; + void *reservation; + uint32_t page_count; + uint32_t page_offset; + uint64_t flags; +} PACKED; + +struct pvr_srv_devmem_int_map_pages_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPAGES structs + ******************************************************************************/ + +struct pvr_srv_devmem_int_unmap_pages_cmd { + pvr_dev_addr_t dev_addr; + void *reservation; + uint32_t page_count; +} PACKED; + +struct pvr_srv_devmem_int_unmap_pages_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPMR structs + ******************************************************************************/ + +struct pvr_srv_devmem_int_map_pmr_cmd { + void *server_heap; + void *pmr; + void *reservation; + uint64_t flags; +} PACKED; + +struct pvr_srv_devmem_int_map_pmr_ret { + void *mapping; + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPMR structs + ******************************************************************************/ + +struct pvr_srv_devmem_int_unmap_pmr_cmd { + void *mapping; +} PACKED; + +struct pvr_srv_devmem_int_unmap_pmr_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_DMABUF_PHYSMEMIMPORTDMABUF structs + ******************************************************************************/ + +struct pvr_srv_phys_mem_import_dmabuf_cmd { + const char *name; + int buffer_fd; + uint32_t name_size; + uint64_t flags; +} PACKED; + +struct pvr_srv_phys_mem_import_dmabuf_ret { + uint64_t align; + uint64_t size; + void *pmr; + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_DMABUF_PHYSMEMEXPORTDMABUF structs + ******************************************************************************/ + +struct pvr_srv_phys_mem_export_dmabuf_cmd { + void *pmr; +} PACKED; + +struct pvr_srv_phys_mem_export_dmabuf_ret { + enum pvr_srv_error error; + int fd; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_RGXCMP_RGXCREATECOMPUTECONTEXT structs + ******************************************************************************/ + +struct pvr_srv_rgx_create_compute_context_cmd { + uint64_t robustness_address; + void *priv_data; + uint8_t *reset_framework_cmd; + uint8_t *static_compute_context_state; + uint32_t context_flags; + uint32_t reset_framework_cmd_size; + uint32_t max_deadline_ms; + uint32_t packed_ccb_size; + /* RGX_CONTEXT_PRIORITY_... flags. */ + uint32_t priority; + uint32_t static_compute_context_state_size; +} PACKED; + +struct pvr_srv_rgx_create_compute_context_ret { + void *compute_context; + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_RGXCMP_RGXDESTROYCOMPUTECONTEXT structs + ******************************************************************************/ + +struct pvr_srv_rgx_destroy_compute_context_cmd { + void *compute_context; +} PACKED; + +struct pvr_srv_rgx_destroy_compute_context_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_RGXCMP_RGXKICKCDM2 structs + ******************************************************************************/ + +struct pvr_srv_rgx_kick_cdm2_cmd { + uint64_t max_deadline_us; + void *compute_context; + uint32_t *client_update_offset; + uint32_t *client_update_value; + uint8_t *cdm_cmd; + char *update_fence_name; + void **client_update_ufo_sync_prim_block; + int32_t check_fence; + int32_t update_timeline; + uint32_t client_cache_op_seq_num; + uint32_t client_update_count; + uint32_t cmd_size; + uint32_t ext_job_ref; + uint32_t num_work_groups; + uint32_t num_work_items; + uint32_t pdump_flags; +} PACKED; + +struct pvr_srv_rgx_kick_cdm2_ret { + enum pvr_srv_error error; + int32_t update_fence; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEHWRTDATASET structs + ******************************************************************************/ + +struct pvr_srv_rgx_create_hwrt_dataset_cmd { + pvr_dev_addr_t pm_mlist_dev_addr0; + pvr_dev_addr_t pm_mlist_dev_addr1; + pvr_dev_addr_t tail_ptrs_dev_addr; + pvr_dev_addr_t macrotile_array_dev_addr0; + pvr_dev_addr_t macrotile_array_dev_addr1; + pvr_dev_addr_t rtc_dev_addr; + pvr_dev_addr_t rgn_header_dev_addr0; + pvr_dev_addr_t rgn_header_dev_addr1; + pvr_dev_addr_t vheap_table_dev_add; + uint64_t flipped_multi_sample_ctl; + uint64_t multi_sample_ctl; + uint64_t rgn_header_size; + void **free_lists; + uint32_t mtile_stride; + uint32_t ppp_screen; + uint32_t te_aa; + uint32_t te_mtile1; + uint32_t te_mtile2; + uint32_t te_screen; + uint32_t tpc_size; + uint32_t tpc_stride; + uint32_t isp_merge_lower_x; + uint32_t isp_merge_lower_y; + uint32_t isp_merge_scale_x; + uint32_t isp_merge_scale_y; + uint32_t isp_merge_upper_x; + uint32_t isp_merge_upper_y; + uint32_t isp_mtile_size; + uint16_t max_rts; +} PACKED; + +struct pvr_srv_rgx_create_hwrt_dataset_ret { + void *hwrt_dataset0; + void *hwrt_dataset1; + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYHWRTDATASET structs + ******************************************************************************/ + +struct pvr_srv_rgx_destroy_hwrt_dataset_cmd { + void *hwrt_dataset; +} PACKED; + +struct pvr_srv_rgx_destroy_hwrt_dataset_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEFREELIST structs + ******************************************************************************/ + +struct pvr_srv_rgx_create_free_list_cmd { + pvr_dev_addr_t free_list_dev_addr; + uint64_t pmr_offset; + void *mem_ctx_priv_data; + void *free_list_pmr; + void *global_free_list; + enum pvr_srv_bool free_list_check; + uint32_t grow_free_list_pages; + uint32_t grow_param_threshold; + uint32_t init_free_list_pages; + uint32_t max_free_list_pages; +} PACKED; + +struct pvr_srv_rgx_create_free_list_ret { + void *cleanup_cookie; + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYFREELIST structs + ******************************************************************************/ + +struct pvr_srv_rgx_destroy_free_list_cmd { + void *cleanup_cookie; +} PACKED; + +struct pvr_srv_rgx_destroy_free_list_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_RGXTA3D_RGXCREATERENDERCONTEXT structs + ******************************************************************************/ + +struct pvr_srv_rgx_create_render_context_cmd { + pvr_dev_addr_t vdm_callstack_addr; + uint64_t robustness_address; + void *priv_data; + uint8_t *reset_framework_cmd; + uint8_t *static_render_context_state; +#define RGX_CONTEXT_FLAG_DISABLESLR BITFIELD_BIT(0U) + uint32_t context_flags; + uint32_t reset_framework_cmd_size; + uint32_t max_3d_deadline_ms; + uint32_t max_ta_deadline_ms; + uint32_t packed_ccb_size; +#define RGX_CONTEXT_PRIORITY_REALTIME UINT32_MAX +#define RGX_CONTEXT_PRIORITY_HIGH 2U +#define RGX_CONTEXT_PRIORITY_MEDIUM 1U +#define RGX_CONTEXT_PRIORITY_LOW 0U + uint32_t priority; + uint32_t static_render_context_state_size; +} PACKED; + +struct pvr_srv_rgx_create_render_context_ret { + void *render_context; + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYRENDERCONTEXT structs + ******************************************************************************/ + +struct pvr_srv_rgx_destroy_render_context_cmd { + void *render_context; +} PACKED; + +struct pvr_srv_rgx_destroy_render_context_ret { + enum pvr_srv_error error; +} PACKED; + +/****************************************************************************** + PVR_SRV_BRIDGE_RGXTA3D_RGXKICKTA3D2 structs + ******************************************************************************/ + +struct pvr_srv_rgx_kick_ta3d2_cmd { + uint64_t deadline; + void *hw_rt_dataset; + void *msaa_scratch_buffer; + void *pr_fence_ufo_sync_prim_block; + void *render_ctx; + void *zs_buffer; + uint32_t *client_3d_update_sync_offset; + uint32_t *client_3d_update_value; + uint32_t *client_ta_fence_sync_offset; + uint32_t *client_ta_fence_value; + uint32_t *client_ta_update_sync_offset; + uint32_t *client_ta_update_value; + uint32_t *sync_pmr_flags; + uint8_t *cmd_3d; + uint8_t *cmd_3d_pr; + uint8_t *cmd_ta; + char *update_fence_name; + char *update_fence_name_3d; + void **client_3d_update_sync_prim_block; + void **client_ta_fence_sync_prim_block; + void **client_ta_update_sync_prim_block; + void **sync_pmrs; + enum pvr_srv_bool abort; + enum pvr_srv_bool kick_3d; + enum pvr_srv_bool kick_pr; + enum pvr_srv_bool kick_ta; + int32_t check_fence; + int32_t check_fence_3d; + int32_t update_timeline; + int32_t update_timeline_3d; + uint32_t cmd_3d_size; + uint32_t cmd_3d_pr_size; + uint32_t client_3d_update_count; + uint32_t client_cache_op_seq_num; + uint32_t client_ta_fence_count; + uint32_t client_ta_update_count; + uint32_t ext_job_ref; + uint32_t client_pr_fence_ufo_sync_offset; + uint32_t client_pr_fence_value; + uint32_t num_draw_calls; + uint32_t num_indices; + uint32_t num_mrts; + uint32_t pdump_flags; + uint32_t render_target_size; + uint32_t sync_pmr_count; + uint32_t cmd_ta_size; +} PACKED; + +struct pvr_srv_rgx_kick_ta3d2_ret { + enum pvr_srv_error error; + int32_t update_fence; + int32_t update_fence_3d; +} PACKED; + +/****************************************************************************** + Ioctl structure to pass cmd and ret structures + ******************************************************************************/ + +struct drm_srvkm_cmd { + uint32_t bridge_id; + uint32_t bridge_func_id; + uint64_t in_data_ptr; + uint64_t out_data_ptr; + uint32_t in_data_size; + uint32_t out_data_size; +}; + +/****************************************************************************** + Bridge function prototype + ******************************************************************************/ + +VkResult pvr_srv_connection_create(int fd, uint64_t *const bvnc_out); +void pvr_srv_connection_destroy(int fd); + +VkResult pvr_srv_alloc_sync_primitive_block(int fd, + void **const handle_out, + void **const pmr_out, + uint32_t *const size_out, + uint32_t *const addr_out); +void pvr_srv_free_sync_primitive_block(int fd, void *handle); + +VkResult pvr_srv_get_heap_count(int fd, uint32_t *const heap_count_out); +VkResult pvr_srv_get_heap_details(int fd, + uint32_t heap_index, + uint32_t buffer_size, + char *const buffer_out, + pvr_dev_addr_t *const base_address_out, + uint64_t *const size_out, + uint64_t *const reserved_size_out, + uint32_t *const log2_page_size_out); + +VkResult pvr_srv_int_heap_create(int fd, + pvr_dev_addr_t base_address, + uint64_t size, + uint32_t log2_page_size, + void *server_memctx, + void **const server_heap_out); +void pvr_srv_int_heap_destroy(int fd, void *server_heap); + +VkResult pvr_srv_int_ctx_create(int fd, + void **const server_memctx_out, + void **const server_memctx_data_out); +void pvr_srv_int_ctx_destroy(int fd, void *server_memctx); + +VkResult pvr_srv_int_reserve_addr(int fd, + void *server_heap, + pvr_dev_addr_t addr, + uint64_t size, + void **const reservation_out); +void pvr_srv_int_unreserve_addr(int fd, void *reservation); + +VkResult pvr_srv_alloc_pmr(int fd, + uint64_t size, + uint64_t block_size, + uint32_t phy_blocks, + uint32_t virt_blocks, + uint32_t log2_page_size, + uint64_t flags, + uint32_t pid, + void **const pmr_out); +void pvr_srv_free_pmr(int fd, void *pmr); + +VkResult pvr_srv_int_map_pages(int fd, + void *reservation, + void *pmr, + uint32_t page_count, + uint32_t page_offset, + uint64_t flags, + pvr_dev_addr_t addr); +void pvr_srv_int_unmap_pages(int fd, + void *reservation, + pvr_dev_addr_t dev_addr, + uint32_t page_count); + +VkResult pvr_srv_int_map_pmr(int fd, + void *server_heap, + void *reservation, + void *pmr, + uint64_t flags, + void **const mapping_out); +void pvr_srv_int_unmap_pmr(int fd, void *mapping); + +VkResult pvr_srv_physmem_import_dmabuf(int fd, + int buffer_fd, + uint64_t flags, + void **const pmr_out, + uint64_t *const size_out, + uint64_t *const align_out); +VkResult pvr_srv_physmem_export_dmabuf(int fd, void *pmr, int *const fd_out); + +VkResult +pvr_srv_rgx_create_compute_context(int fd, + uint32_t priority, + uint32_t reset_framework_cmd_size, + uint8_t *reset_framework_cmd, + void *priv_data, + uint32_t static_compute_context_state_size, + uint8_t *static_compute_context_state, + uint32_t packed_ccb_size, + uint32_t context_flags, + uint64_t robustness_address, + uint32_t max_deadline_ms, + void **const compute_context_out); +void pvr_srv_rgx_destroy_compute_context(int fd, void *compute_context); + +VkResult pvr_srv_rgx_kick_compute2(int fd, + void *compute_context, + uint32_t client_cache_op_seq_num, + uint32_t client_update_count, + void **client_update_ufo_sync_prim_block, + uint32_t *client_update_offset, + uint32_t *client_update_value, + int32_t check_fence, + int32_t update_timeline, + uint32_t cmd_size, + uint8_t *cdm_cmd, + uint32_t ext_job_ref, + uint32_t num_work_groups, + uint32_t num_work_items, + uint32_t pdump_flags, + uint64_t max_deadline_us, + char *update_fence_name, + int32_t *const update_fence_out); + +VkResult +pvr_srv_rgx_create_hwrt_dataset(int fd, + pvr_dev_addr_t pm_mlist_dev_addr0, + pvr_dev_addr_t pm_mlist_dev_addr1, + pvr_dev_addr_t tail_ptrs_dev_addr, + pvr_dev_addr_t macrotile_array_dev_addr0, + pvr_dev_addr_t macrotile_array_dev_addr1, + pvr_dev_addr_t rtc_dev_addr, + pvr_dev_addr_t rgn_header_dev_addr0, + pvr_dev_addr_t rgn_header_dev_addr1, + pvr_dev_addr_t vheap_table_dev_add, + uint64_t flipped_multi_sample_ctl, + uint64_t multi_sample_ctl, + uint64_t rgn_header_size, + void **free_lists, + uint32_t mtile_stride, + uint32_t ppp_screen, + uint32_t te_aa, + uint32_t te_mtile1, + uint32_t te_mtile2, + uint32_t te_screen, + uint32_t tpc_size, + uint32_t tpc_stride, + uint32_t isp_merge_lower_x, + uint32_t isp_merge_lower_y, + uint32_t isp_merge_scale_x, + uint32_t isp_merge_scale_y, + uint32_t isp_merge_upper_x, + uint32_t isp_merge_upper_y, + uint32_t isp_mtile_size, + uint16_t max_rts, + void **const hwrt_dataset0_out, + void **const hwrt_dataset1_out); + +void pvr_srv_rgx_destroy_hwrt_dataset(int fd, void *hwrt_dataset); + +VkResult pvr_srv_rgx_create_free_list(int fd, + void *mem_ctx_priv_data, + uint32_t max_free_list_pages, + uint32_t init_free_list_pages, + uint32_t grow_free_list_pages, + uint32_t grow_param_threshold, + void *global_free_list, + enum pvr_srv_bool free_list_check, + pvr_dev_addr_t free_list_dev_addr, + void *free_list_pmr, + uint64_t pmr_offset, + void **const cleanup_cookie_out); + +void pvr_srv_rgx_destroy_free_list(int fd, void *cleanup_cookie); + +VkResult +pvr_srv_rgx_create_render_context(int fd, + uint32_t priority, + pvr_dev_addr_t vdm_callstack_addr, + uint32_t reset_framework_cmd_size, + uint8_t *reset_framework_cmd, + void *priv_data, + uint32_t static_render_context_state_size, + uint8_t *static_render_context_state, + uint32_t packed_ccb_size, + uint32_t context_flags, + uint64_t robustness_address, + uint32_t max_geom_deadline_ms, + uint32_t max_frag_deadline_ms, + void **const render_context_out); + +void pvr_srv_rgx_destroy_render_context(int fd, void *render_context); + +VkResult pvr_srv_rgx_kick_render2(int fd, + void *render_ctx, + uint32_t client_cache_op_seq_num, + uint32_t client_geom_fence_count, + void **client_geom_fence_sync_prim_block, + uint32_t *client_geom_fence_sync_offset, + uint32_t *client_geom_fence_value, + uint32_t client_geom_update_count, + void **client_geom_update_sync_prim_block, + uint32_t *client_geom_update_sync_offset, + uint32_t *client_geom_update_value, + uint32_t client_frag_update_count, + void **client_frag_update_sync_prim_block, + uint32_t *client_frag_update_sync_offset, + uint32_t *client_frag_update_value, + void *client_pr_fence_ufo_sync_prim_block, + uint32_t client_pr_fence_ufo_sync_offset, + uint32_t client_pr_fence_value, + int32_t check_fence, + int32_t update_timeline, + int32_t *const update_fence_out, + char *update_fence_name, + int32_t check_fence_frag, + int32_t update_timeline_frag, + int32_t *const update_fence_frag_out, + char *update_fence_name_frag, + uint32_t cmd_geom_size, + uint8_t *cmd_geom, + uint32_t cmd_frag_pr_size, + uint8_t *cmd_frag_pr, + uint32_t cmd_frag_size, + uint8_t *cmd_frag, + uint32_t ext_job_ref, + bool kick_geom, + bool kick_pr, + bool kick_frag, + bool abort, + uint32_t pdump_flags, + void *hw_rt_dataset, + void *zs_buffer, + void *msaa_scratch_buffer, + uint32_t sync_pmr_count, + uint32_t *sync_pmr_flags, + void **sync_pmrs, + uint32_t render_target_size, + uint32_t num_draw_calls, + uint32_t num_indices, + uint32_t num_mrts, + uint64_t deadline); + +#endif /* PVR_SRV_BRIDGE_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_common.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_common.h new file mode 100644 index 00000000000..71a5755d6db --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_common.h @@ -0,0 +1,50 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_SRV_JOB_COMMON_H +#define PVR_SRV_JOB_COMMON_H + +#include + +#include "pvr_srv_bridge.h" +#include "pvr_winsys.h" +#include "util/macros.h" + +#define PVR_SRV_SYNC_DEV_PATH "/dev/pvr_sync" + +static inline uint32_t +pvr_srv_from_winsys_priority(enum pvr_winsys_ctx_priority priority) +{ + switch (priority) { + case PVR_WINSYS_CTX_PRIORITY_HIGH: + return RGX_CONTEXT_PRIORITY_HIGH; + case PVR_WINSYS_CTX_PRIORITY_MEDIUM: + return RGX_CONTEXT_PRIORITY_MEDIUM; + case PVR_WINSYS_CTX_PRIORITY_LOW: + return RGX_CONTEXT_PRIORITY_LOW; + default: + unreachable("Invalid winsys context priority."); + } +} + +#endif /* PVR_SRV_JOB_COMMON_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c new file mode 100644 index 00000000000..b6fd8e0533c --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c @@ -0,0 +1,253 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "fw-api/pvr_rogue_fwif.h" +#include "fw-api/pvr_rogue_fwif_rf.h" +#include "pvr_private.h" +#include "pvr_srv.h" +#include "pvr_srv_bridge.h" +#include "pvr_srv_job_compute.h" +#include "pvr_srv_job_common.h" +#include "pvr_srv_syncobj.h" +#include "pvr_winsys.h" +#include "util/macros.h" +#include "vk_alloc.h" +#include "vk_log.h" + +struct pvr_srv_winsys_compute_ctx { + struct pvr_winsys_compute_ctx base; + + void *handle; + + int timeline; +}; + +#define to_pvr_srv_winsys_compute_ctx(ctx) \ + container_of(ctx, struct pvr_srv_winsys_compute_ctx, base) + +VkResult pvr_srv_winsys_compute_ctx_create( + struct pvr_winsys *ws, + const struct pvr_winsys_compute_ctx_create_info *create_info, + struct pvr_winsys_compute_ctx **const ctx_out) +{ + struct rogue_fwif_static_computecontext_state static_state = { + .ctx_switch_regs = { + .cdm_context_state_base_addr = + create_info->static_state.cdm_ctx_state_base_addr, + + .cdm_context_pds0 = create_info->static_state.cdm_ctx_store_pds0, + .cdm_context_pds0_b = + create_info->static_state.cdm_ctx_store_pds0_b, + .cdm_context_pds1 = create_info->static_state.cdm_ctx_store_pds1, + + .cdm_terminate_pds = create_info->static_state.cdm_ctx_terminate_pds, + .cdm_terminate_pds1 = + create_info->static_state.cdm_ctx_terminate_pds1, + + .cdm_resume_pds0 = create_info->static_state.cdm_ctx_resume_pds0, + .cdm_resume_pds0_b = create_info->static_state.cdm_ctx_resume_pds0_b, + }, + }; + + struct rogue_fwif_rf_cmd reset_cmd = { + .flags = 0U, + }; + + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); + struct pvr_srv_winsys_compute_ctx *srv_ctx; + VkResult result; + + srv_ctx = vk_alloc(srv_ws->alloc, + sizeof(*srv_ctx), + 8U, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!srv_ctx) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + srv_ctx->timeline = open(PVR_SRV_SYNC_DEV_PATH, O_CLOEXEC | O_RDWR); + if (srv_ctx->timeline < 0) + goto err_free_srv_ctx; + + result = pvr_srv_rgx_create_compute_context( + srv_ws->render_fd, + pvr_srv_from_winsys_priority(create_info->priority), + sizeof(reset_cmd) - sizeof(reset_cmd.regs), + (uint8_t *)&reset_cmd, + srv_ws->server_memctx_data, + sizeof(static_state), + (uint8_t *)&static_state, + 0U, + RGX_CONTEXT_FLAG_DISABLESLR, + 0U, + UINT_MAX, + &srv_ctx->handle); + if (result != VK_SUCCESS) + goto err_close_timeline; + + srv_ctx->base.ws = ws; + + *ctx_out = &srv_ctx->base; + + return VK_SUCCESS; + +err_close_timeline: + close(srv_ctx->timeline); + +err_free_srv_ctx: + vk_free(srv_ws->alloc, srv_ctx); + + return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED); +} + +void pvr_srv_winsys_compute_ctx_destroy(struct pvr_winsys_compute_ctx *ctx) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws); + struct pvr_srv_winsys_compute_ctx *srv_ctx = + to_pvr_srv_winsys_compute_ctx(ctx); + + pvr_srv_rgx_destroy_compute_context(srv_ws->render_fd, srv_ctx->handle); + close(srv_ctx->timeline); + vk_free(srv_ws->alloc, srv_ctx); +} + +static void pvr_srv_compute_cmd_init( + const struct pvr_winsys_compute_submit_info *submit_info, + struct rogue_fwif_cmd_compute *cmd) +{ + struct rogue_fwif_cdm_regs *fw_regs = &cmd->regs; + + memset(cmd, 0, sizeof(*cmd)); + + cmd->cmn.frame_num = submit_info->frame_num; + + fw_regs->tpu_border_colour_table = submit_info->regs.tpu_border_colour_table; + fw_regs->cdm_item = submit_info->regs.cdm_item; + fw_regs->compute_cluster = submit_info->regs.compute_cluster; + fw_regs->cdm_ctrl_stream_base = submit_info->regs.cdm_ctrl_stream_base; + fw_regs->tpu = submit_info->regs.tpu; + fw_regs->cdm_resume_pds1 = submit_info->regs.cdm_resume_pds1; + + if (submit_info->flags & PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP) + cmd->flags |= ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_OVERLAP; + + if (submit_info->flags & PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE) + cmd->flags |= ROGUE_FWIF_COMPUTE_FLAG_SINGLE_CORE; +} + +VkResult pvr_srv_winsys_compute_submit( + const struct pvr_winsys_compute_ctx *ctx, + const struct pvr_winsys_compute_submit_info *submit_info, + struct pvr_winsys_syncobj **const syncobj_out) +{ + const struct pvr_srv_winsys_compute_ctx *srv_ctx = + to_pvr_srv_winsys_compute_ctx(ctx); + const struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws); + + struct pvr_winsys_syncobj *signal_syncobj = NULL; + struct pvr_winsys_syncobj *wait_syncobj = NULL; + struct pvr_srv_winsys_syncobj *srv_syncobj; + + struct rogue_fwif_cmd_compute compute_cmd; + VkResult result; + int fence; + + pvr_srv_compute_cmd_init(submit_info, &compute_cmd); + + for (uint32_t i = 0U; i < submit_info->semaphore_count; i++) { + PVR_FROM_HANDLE(pvr_semaphore, sem, submit_info->semaphores[i]); + + if (!sem->syncobj) + continue; + + if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_COMPUTE_BIT) { + result = pvr_srv_winsys_syncobjs_merge(sem->syncobj, + wait_syncobj, + &wait_syncobj); + if (result != VK_SUCCESS) + goto err_destroy_wait_syncobj; + + submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_COMPUTE_BIT; + } + + if (submit_info->stage_flags[i] == 0U) { + pvr_srv_winsys_syncobj_destroy(sem->syncobj); + sem->syncobj = NULL; + } + } + + srv_syncobj = to_pvr_srv_winsys_syncobj(wait_syncobj); + + do { + result = pvr_srv_rgx_kick_compute2(srv_ws->render_fd, + srv_ctx->handle, + /* No support cache operations. */ + 0U, + 0U, + NULL, + NULL, + NULL, + wait_syncobj ? srv_syncobj->fd : -1, + srv_ctx->timeline, + sizeof(compute_cmd), + (uint8_t *)&compute_cmd, + submit_info->job_num, + 0U, + 0U, + 0U, + 0U, + "COMPUTE", + &fence); + } while (result == VK_NOT_READY); + + if (result != VK_SUCCESS) + goto err_destroy_wait_syncobj; + + /* Given job submission succeeded, we don't need to close wait fence and it + * should be consumed by the compute job itself. + */ + if (wait_syncobj) + srv_syncobj->fd = -1; + + if (fence != -1) { + result = pvr_srv_winsys_syncobj_create(ctx->ws, false, &signal_syncobj); + if (result != VK_SUCCESS) + goto err_destroy_wait_syncobj; + + pvr_srv_set_syncobj_payload(signal_syncobj, fence); + } + + *syncobj_out = signal_syncobj; + +err_destroy_wait_syncobj: + if (wait_syncobj) + pvr_srv_winsys_syncobj_destroy(wait_syncobj); + + return result; +} diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.h new file mode 100644 index 00000000000..09a85a1e8be --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.h @@ -0,0 +1,50 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_SRV_JOB_COMPUTE_H +#define PVR_SRV_JOB_COMPUTE_H + +#include + +struct pvr_winsys; +struct pvr_winsys_compute_ctx; +struct pvr_winsys_compute_ctx_create_info; +struct pvr_winsys_compute_submit_info; +struct pvr_winsys_syncobj; + +/******************************************* + Function prototypes + *******************************************/ + +VkResult pvr_srv_winsys_compute_ctx_create( + struct pvr_winsys *ws, + const struct pvr_winsys_compute_ctx_create_info *create_info, + struct pvr_winsys_compute_ctx **const ctx_out); +void pvr_srv_winsys_compute_ctx_destroy(struct pvr_winsys_compute_ctx *ctx); + +VkResult pvr_srv_winsys_compute_submit( + const struct pvr_winsys_compute_ctx *ctx, + const struct pvr_winsys_compute_submit_info *submit_info, + struct pvr_winsys_syncobj **const syncobj_out); + +#endif /* PVR_SRV_JOB_COMPUTE_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c new file mode 100644 index 00000000000..5084cfb415c --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c @@ -0,0 +1,706 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fw-api/pvr_rogue_fwif.h" +#include "fw-api/pvr_rogue_fwif_rf.h" +#include "pvr_private.h" +#include "pvr_srv.h" +#include "pvr_srv_bo.h" +#include "pvr_srv_bridge.h" +#include "pvr_srv_job_common.h" +#include "pvr_srv_job_render.h" +#include "pvr_srv_syncobj.h" +#include "pvr_winsys.h" +#include "util/log.h" +#include "util/macros.h" +#include "vk_alloc.h" +#include "vk_log.h" +#include "vk_util.h" + +struct pvr_srv_winsys_free_list { + struct pvr_winsys_free_list base; + + void *handle; + + struct pvr_srv_winsys_free_list *parent; +}; + +#define to_pvr_srv_winsys_free_list(free_list) \ + container_of(free_list, struct pvr_srv_winsys_free_list, base) + +struct pvr_srv_winsys_rt_dataset { + struct pvr_winsys_rt_dataset base; + + struct { + void *handle; + struct pvr_srv_sync_prim *sync_prim; + } rt_datas[ROGUE_FWIF_NUM_RTDATAS]; +}; + +#define to_pvr_srv_winsys_rt_dataset(rt_dataset) \ + container_of(rt_dataset, struct pvr_srv_winsys_rt_dataset, base) + +struct pvr_srv_winsys_render_ctx { + struct pvr_winsys_render_ctx base; + + /* Handle to kernel context. */ + void *handle; + + int timeline_geom; + int timeline_frag; +}; + +#define to_pvr_srv_winsys_render_ctx(ctx) \ + container_of(ctx, struct pvr_srv_winsys_render_ctx, base) + +VkResult pvr_srv_winsys_free_list_create( + struct pvr_winsys *ws, + struct pvr_winsys_vma *free_list_vma, + uint32_t initial_num_pages, + uint32_t max_num_pages, + uint32_t grow_num_pages, + uint32_t grow_threshold, + struct pvr_winsys_free_list *parent_free_list, + struct pvr_winsys_free_list **const free_list_out) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); + struct pvr_srv_winsys_bo *srv_free_list_bo = + to_pvr_srv_winsys_bo(free_list_vma->bo); + struct pvr_srv_winsys_free_list *srv_free_list; + void *parent_handle; + VkResult result; + + srv_free_list = vk_zalloc(srv_ws->alloc, + sizeof(*srv_free_list), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!srv_free_list) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + if (parent_free_list) { + srv_free_list->parent = to_pvr_srv_winsys_free_list(parent_free_list); + parent_handle = srv_free_list->parent->handle; + } else { + srv_free_list->parent = NULL; + parent_handle = NULL; + } + + result = pvr_srv_rgx_create_free_list(srv_ws->render_fd, + srv_ws->server_memctx_data, + max_num_pages, + initial_num_pages, + grow_num_pages, + grow_threshold, + parent_handle, +#if defined(DEBUG) + PVR_SRV_TRUE /* free_list_check */, +#else + PVR_SRV_FALSE /* free_list_check */, +#endif + free_list_vma->dev_addr, + srv_free_list_bo->pmr, + 0 /* pmr_offset */, + &srv_free_list->handle); + if (result != VK_SUCCESS) + goto err_vk_free_srv_free_list; + + srv_free_list->base.ws = ws; + + *free_list_out = &srv_free_list->base; + + return VK_SUCCESS; + +err_vk_free_srv_free_list: + vk_free(srv_ws->alloc, srv_free_list); + + return result; +} + +void pvr_srv_winsys_free_list_destroy(struct pvr_winsys_free_list *free_list) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(free_list->ws); + struct pvr_srv_winsys_free_list *srv_free_list = + to_pvr_srv_winsys_free_list(free_list); + + pvr_srv_rgx_destroy_free_list(srv_ws->render_fd, srv_free_list->handle); + vk_free(srv_ws->alloc, srv_free_list); +} + +VkResult pvr_srv_render_target_dataset_create( + struct pvr_winsys *ws, + const struct pvr_winsys_rt_dataset_create_info *create_info, + struct pvr_winsys_rt_dataset **const rt_dataset_out) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); + struct pvr_srv_winsys_free_list *srv_local_free_list = + to_pvr_srv_winsys_free_list(create_info->local_free_list); + void *free_lists[ROGUE_FW_MAX_FREELISTS] = { NULL }; + struct pvr_srv_winsys_rt_dataset *srv_rt_dataset; + VkResult result; + + free_lists[ROGUE_FW_LOCAL_FREELIST] = srv_local_free_list->handle; + + if (srv_local_free_list->parent) { + free_lists[ROGUE_FW_GLOBAL_FREELIST] = + srv_local_free_list->parent->handle; + } + + srv_rt_dataset = vk_zalloc(srv_ws->alloc, + sizeof(*srv_rt_dataset), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!srv_rt_dataset) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + result = pvr_srv_rgx_create_hwrt_dataset( + srv_ws->render_fd, + create_info->rt_datas[0].pm_mlist_dev_addr, + create_info->rt_datas[1].pm_mlist_dev_addr, + create_info->tpc_dev_addr, + create_info->rt_datas[0].macrotile_array_dev_addr, + create_info->rt_datas[1].macrotile_array_dev_addr, + create_info->rtc_dev_addr, + create_info->rt_datas[0].rgn_header_dev_addr, + create_info->rt_datas[1].rgn_header_dev_addr, + create_info->vheap_table_dev_addr, + create_info->ppp_multi_sample_ctl_y_flipped, + create_info->ppp_multi_sample_ctl, + create_info->rgn_header_size, + free_lists, + create_info->mtile_stride, + create_info->ppp_screen, + create_info->te_aa, + create_info->te_mtile1, + create_info->te_mtile2, + create_info->te_screen, + create_info->tpc_size, + create_info->tpc_stride, + create_info->isp_merge_lower_x, + create_info->isp_merge_lower_y, + create_info->isp_merge_scale_x, + create_info->isp_merge_scale_y, + create_info->isp_merge_upper_x, + create_info->isp_merge_upper_y, + create_info->isp_mtile_size, + create_info->max_rts, + &srv_rt_dataset->rt_datas[0].handle, + &srv_rt_dataset->rt_datas[1].handle); + if (result != VK_SUCCESS) + goto err_vk_free_srv_rt_dataset; + + for (uint32_t i = 0; i < ARRAY_SIZE(srv_rt_dataset->rt_datas); i++) { + srv_rt_dataset->rt_datas[i].sync_prim = pvr_srv_sync_prim_alloc(srv_ws); + if (!srv_rt_dataset->rt_datas[i].sync_prim) + goto err_srv_sync_prim_free; + } + + srv_rt_dataset->base.ws = ws; + + *rt_dataset_out = &srv_rt_dataset->base; + + return VK_SUCCESS; + +err_srv_sync_prim_free: + for (uint32_t i = 0; i < ARRAY_SIZE(srv_rt_dataset->rt_datas); i++) { + pvr_srv_sync_prim_free(srv_rt_dataset->rt_datas[i].sync_prim); + + if (srv_rt_dataset->rt_datas[i].handle) { + pvr_srv_rgx_destroy_hwrt_dataset(srv_ws->render_fd, + srv_rt_dataset->rt_datas[i].handle); + } + } + +err_vk_free_srv_rt_dataset: + vk_free(srv_ws->alloc, srv_rt_dataset); + + return result; +} + +void pvr_srv_render_target_dataset_destroy( + struct pvr_winsys_rt_dataset *rt_dataset) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(rt_dataset->ws); + struct pvr_srv_winsys_rt_dataset *srv_rt_dataset = + to_pvr_srv_winsys_rt_dataset(rt_dataset); + + for (uint32_t i = 0; i < ARRAY_SIZE(srv_rt_dataset->rt_datas); i++) { + pvr_srv_sync_prim_free(srv_rt_dataset->rt_datas[i].sync_prim); + + if (srv_rt_dataset->rt_datas[i].handle) { + pvr_srv_rgx_destroy_hwrt_dataset(srv_ws->render_fd, + srv_rt_dataset->rt_datas[i].handle); + } + } + + vk_free(srv_ws->alloc, srv_rt_dataset); +} + +static void pvr_srv_render_ctx_fw_static_state_init( + struct pvr_winsys_render_ctx_create_info *create_info, + struct rogue_fwif_static_rendercontext_state *static_state) +{ + struct pvr_winsys_render_ctx_static_state *ws_static_state = + &create_info->static_state; + struct rogue_fwif_ta_regs_cswitch *regs = &static_state->ctx_switch_regs; + + memset(static_state, 0, sizeof(*static_state)); + + regs->vdm_context_state_base_addr = ws_static_state->vdm_ctx_state_base_addr; + regs->ta_context_state_base_addr = ws_static_state->geom_ctx_state_base_addr; + + STATIC_ASSERT(ARRAY_SIZE(regs->ta_state) == + ARRAY_SIZE(ws_static_state->geom_state)); + for (uint32_t i = 0; i < ARRAY_SIZE(ws_static_state->geom_state); i++) { + regs->ta_state[i].vdm_context_store_task0 = + ws_static_state->geom_state[i].vdm_ctx_store_task0; + regs->ta_state[i].vdm_context_store_task1 = + ws_static_state->geom_state[i].vdm_ctx_store_task1; + regs->ta_state[i].vdm_context_store_task2 = + ws_static_state->geom_state[i].vdm_ctx_store_task2; + + regs->ta_state[i].vdm_context_resume_task0 = + ws_static_state->geom_state[i].vdm_ctx_resume_task0; + regs->ta_state[i].vdm_context_resume_task1 = + ws_static_state->geom_state[i].vdm_ctx_resume_task1; + regs->ta_state[i].vdm_context_resume_task2 = + ws_static_state->geom_state[i].vdm_ctx_resume_task2; + } +} + +VkResult pvr_srv_winsys_render_ctx_create( + struct pvr_winsys *ws, + struct pvr_winsys_render_ctx_create_info *create_info, + struct pvr_winsys_render_ctx **const ctx_out) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); + struct rogue_fwif_rf_cmd reset_cmd = { + .flags = 0, + }; + + struct rogue_fwif_static_rendercontext_state static_state; + struct pvr_srv_winsys_render_ctx *srv_ctx; + VkResult result; + + srv_ctx = vk_zalloc(srv_ws->alloc, + sizeof(*srv_ctx), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!srv_ctx) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + srv_ctx->timeline_geom = open(PVR_SRV_SYNC_DEV_PATH, O_CLOEXEC | O_RDWR); + if (srv_ctx->timeline_geom < 0) + goto err_free_srv_ctx; + + srv_ctx->timeline_frag = open(PVR_SRV_SYNC_DEV_PATH, O_CLOEXEC | O_RDWR); + if (srv_ctx->timeline_frag < 0) + goto err_close_timeline_geom; + + pvr_srv_render_ctx_fw_static_state_init(create_info, &static_state); + + result = pvr_srv_rgx_create_render_context( + srv_ws->render_fd, + pvr_srv_from_winsys_priority(create_info->priority), + create_info->vdm_callstack_addr, + sizeof(reset_cmd) - sizeof(reset_cmd.regs), + (uint8_t *)&reset_cmd, + srv_ws->server_memctx_data, + sizeof(static_state), + (uint8_t *)&static_state, + 0, + RGX_CONTEXT_FLAG_DISABLESLR, + 0, + UINT_MAX, + UINT_MAX, + &srv_ctx->handle); + if (result != VK_SUCCESS) + goto err_close_timeline_frag; + + srv_ctx->base.ws = ws; + + *ctx_out = &srv_ctx->base; + + return VK_SUCCESS; + +err_close_timeline_frag: + close(srv_ctx->timeline_frag); + +err_close_timeline_geom: + close(srv_ctx->timeline_geom); + +err_free_srv_ctx: + vk_free(srv_ws->alloc, srv_ctx); + + return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED); +} + +void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws); + struct pvr_srv_winsys_render_ctx *srv_ctx = + to_pvr_srv_winsys_render_ctx(ctx); + + pvr_srv_rgx_destroy_render_context(srv_ws->render_fd, srv_ctx->handle); + close(srv_ctx->timeline_frag); + close(srv_ctx->timeline_geom); + vk_free(srv_ws->alloc, srv_ctx); +} + +static void pvr_srv_geometry_cmd_init( + const struct pvr_winsys_render_submit_info *submit_info, + const struct pvr_srv_sync_prim *sync_prim, + struct rogue_fwif_cmd_ta *cmd) +{ + const struct pvr_winsys_geometry_state *state = &submit_info->geometry; + struct rogue_fwif_ta_regs *fw_regs = &cmd->geom_regs; + + memset(cmd, 0, sizeof(*cmd)); + + cmd->cmd_shared.cmn.frame_num = submit_info->frame_num; + + fw_regs->vdm_ctrl_stream_base = state->regs.vdm_ctrl_stream_base; + fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table; + fw_regs->ppp_ctrl = state->regs.ppp_ctrl; + fw_regs->te_psg = state->regs.te_psg; + fw_regs->tpu = state->regs.tpu; + fw_regs->vdm_context_resume_task0_size = + state->regs.vdm_ctx_resume_task0_size; + fw_regs->pds_ctrl = state->regs.pds_ctrl; + + if (state->flags & PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY) + cmd->flags |= ROGUE_FWIF_TAFLAGS_FIRSTKICK; + + if (state->flags & PVR_WINSYS_GEOM_FLAG_LAST_GEOMETRY) + cmd->flags |= ROGUE_FWIF_TAFLAGS_LASTKICK; + + if (state->flags & PVR_WINSYS_GEOM_FLAG_SINGLE_CORE) + cmd->flags |= ROGUE_FWIF_TAFLAGS_SINGLE_CORE; + + cmd->partial_render_ta_3d_fence.ufo_addr.addr = + pvr_srv_sync_prim_get_fw_addr(sync_prim); + cmd->partial_render_ta_3d_fence.value = sync_prim->value; +} + +static void pvr_srv_fragment_cmd_init( + const struct pvr_winsys_render_submit_info *submit_info, + struct rogue_fwif_cmd_3d *cmd) +{ + const struct pvr_winsys_fragment_state *state = &submit_info->fragment; + struct rogue_fwif_3d_regs *fw_regs = &cmd->regs; + + memset(cmd, 0, sizeof(*cmd)); + + cmd->cmd_shared.cmn.frame_num = submit_info->frame_num; + + fw_regs->usc_pixel_output_ctrl = state->regs.usc_pixel_output_ctrl; + fw_regs->isp_bgobjdepth = state->regs.isp_bgobjdepth; + fw_regs->isp_bgobjvals = state->regs.isp_bgobjvals; + fw_regs->isp_aa = state->regs.isp_aa; + fw_regs->isp_ctl = state->regs.isp_ctl; + fw_regs->tpu = state->regs.tpu; + fw_regs->event_pixel_pds_info = state->regs.event_pixel_pds_info; + fw_regs->pixel_phantom = state->regs.pixel_phantom; + fw_regs->event_pixel_pds_data = state->regs.event_pixel_pds_data; + fw_regs->isp_scissor_base = state->regs.isp_scissor_base; + fw_regs->isp_dbias_base = state->regs.isp_dbias_base; + fw_regs->isp_oclqry_base = state->regs.isp_oclqry_base; + fw_regs->isp_zlsctl = state->regs.isp_zlsctl; + fw_regs->isp_zload_store_base = state->regs.isp_zload_store_base; + fw_regs->isp_stencil_load_store_base = + state->regs.isp_stencil_load_store_base; + fw_regs->isp_zls_pixels = state->regs.isp_zls_pixels; + + STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word) == + ARRAY_SIZE(state->regs.pbe_word)); + + STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word[0]) <= + ARRAY_SIZE(state->regs.pbe_word[0])); + +#if !defined(NDEBUG) + /* Depending on the hardware we might have more PBE words than the firmware + * accepts so check that the extra words are 0. + */ + if (ARRAY_SIZE(fw_regs->pbe_word[0]) < ARRAY_SIZE(state->regs.pbe_word[0])) { + /* For each color attachment. */ + for (uint32_t i = 0; i < ARRAY_SIZE(state->regs.pbe_word); i++) { + /* For each extra PBE word not used by the firmware. */ + for (uint32_t j = ARRAY_SIZE(fw_regs->pbe_word[0]); + j < ARRAY_SIZE(state->regs.pbe_word[0]); + j++) { + assert(state->regs.pbe_word[i][j] == 0); + } + } + } +#endif + + memcpy(fw_regs->pbe_word, state->regs.pbe_word, sizeof(fw_regs->pbe_word)); + + fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table; + + STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_bgnd) == + ARRAY_SIZE(state->regs.pds_bgnd)); + typed_memcpy(fw_regs->pds_bgnd, + state->regs.pds_bgnd, + ARRAY_SIZE(fw_regs->pds_bgnd)); + + STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_pr_bgnd) == + ARRAY_SIZE(state->regs.pds_pr_bgnd)); + typed_memcpy(fw_regs->pds_pr_bgnd, + state->regs.pds_pr_bgnd, + ARRAY_SIZE(fw_regs->pds_pr_bgnd)); + + if (state->flags & PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT) + cmd->flags |= ROGUE_FWIF_RENDERFLAGS_DEPTHBUFFER; + + if (state->flags & PVR_WINSYS_FRAG_FLAG_STENCIL_BUFFER_PRESENT) + cmd->flags |= ROGUE_FWIF_RENDERFLAGS_STENCILBUFFER; + + if (state->flags & PVR_WINSYS_FRAG_FLAG_PREVENT_CDM_OVERLAP) + cmd->flags |= ROGUE_FWIF_RENDERFLAGS_PREVENT_CDM_OVERLAP; + + if (state->flags & PVR_WINSYS_FRAG_FLAG_SINGLE_CORE) + cmd->flags |= ROGUE_FWIF_RENDERFLAGS_SINGLE_CORE; + + cmd->zls_stride = state->zls_stride; + cmd->sls_stride = state->sls_stride; +} + +VkResult pvr_srv_winsys_render_submit( + const struct pvr_winsys_render_ctx *ctx, + const struct pvr_winsys_render_submit_info *submit_info, + struct pvr_winsys_syncobj **const syncobj_geom_out, + struct pvr_winsys_syncobj **const syncobj_frag_out) +{ + const struct pvr_srv_winsys_rt_dataset *srv_rt_dataset = + to_pvr_srv_winsys_rt_dataset(submit_info->rt_dataset); + struct pvr_srv_sync_prim *sync_prim = + srv_rt_dataset->rt_datas[submit_info->rt_data_idx].sync_prim; + void *rt_data_handle = + srv_rt_dataset->rt_datas[submit_info->rt_data_idx].handle; + const struct pvr_srv_winsys_render_ctx *srv_ctx = + to_pvr_srv_winsys_render_ctx(ctx); + const struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws); + + uint32_t sync_pmr_flags[PVR_SRV_SYNC_MAX] = { 0U }; + void *sync_pmrs[PVR_SRV_SYNC_MAX] = { NULL }; + uint32_t sync_pmr_count; + + struct pvr_winsys_syncobj *geom_signal_syncobj = NULL; + struct pvr_winsys_syncobj *frag_signal_syncobj = NULL; + struct pvr_winsys_syncobj *geom_wait_syncobj = NULL; + struct pvr_winsys_syncobj *frag_wait_syncobj = NULL; + struct pvr_srv_winsys_syncobj *srv_geom_syncobj; + struct pvr_srv_winsys_syncobj *srv_frag_syncobj; + + struct rogue_fwif_cmd_ta geom_cmd; + struct rogue_fwif_cmd_3d frag_cmd; + + int fence_frag; + int fence_geom; + + VkResult result; + + pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd); + pvr_srv_fragment_cmd_init(submit_info, &frag_cmd); + + for (uint32_t i = 0U; i < submit_info->semaphore_count; i++) { + PVR_FROM_HANDLE(pvr_semaphore, sem, submit_info->semaphores[i]); + + if (!sem->syncobj) + continue; + + if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_GEOM_BIT) { + result = pvr_srv_winsys_syncobjs_merge(sem->syncobj, + geom_wait_syncobj, + &geom_wait_syncobj); + if (result != VK_SUCCESS) + goto err_destroy_wait_syncobjs; + + submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_GEOM_BIT; + } + + if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_FRAG_BIT) { + result = pvr_srv_winsys_syncobjs_merge(sem->syncobj, + frag_wait_syncobj, + &frag_wait_syncobj); + if (result != VK_SUCCESS) + goto err_destroy_wait_syncobjs; + + submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_FRAG_BIT; + } + + if (submit_info->stage_flags[i] == 0U) { + pvr_srv_winsys_syncobj_destroy(sem->syncobj); + sem->syncobj = NULL; + } + } + + srv_geom_syncobj = to_pvr_srv_winsys_syncobj(geom_wait_syncobj); + srv_frag_syncobj = to_pvr_srv_winsys_syncobj(frag_wait_syncobj); + + if (submit_info->bo_count <= ARRAY_SIZE(sync_pmrs)) { + sync_pmr_count = submit_info->bo_count; + } else { + mesa_logw("Too many bos to synchronize access to (ignoring %zu bos)\n", + submit_info->bo_count - ARRAY_SIZE(sync_pmrs)); + sync_pmr_count = ARRAY_SIZE(sync_pmrs); + } + + STATIC_ASSERT(ARRAY_SIZE(sync_pmrs) == ARRAY_SIZE(sync_pmr_flags)); + assert(sync_pmr_count <= ARRAY_SIZE(sync_pmrs)); + for (uint32_t i = 0; i < sync_pmr_count; i++) { + const struct pvr_winsys_job_bo *job_bo = &submit_info->bos[i]; + const struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(job_bo->bo); + + sync_pmrs[i] = srv_bo->pmr; + + if (job_bo->flags & PVR_WINSYS_JOB_BO_FLAG_WRITE) + sync_pmr_flags[i] = PVR_BUFFER_FLAG_WRITE; + else + sync_pmr_flags[i] = PVR_BUFFER_FLAG_READ; + } + + /* The 1.14 PowerVR Services KM driver doesn't add a sync dependency to the + * fragment phase on the geometry phase for us. This makes it + * necessary to use a sync prim for this purpose. This requires that we pass + * in the same sync prim information for the geometry phase update and the + * PR fence. We update the sync prim value here as this is the value the + * sync prim will get updated to once the geometry phase has completed and + * the value the PR or fragment phase will be fenced on. + */ + sync_prim->value++; + + do { + result = + pvr_srv_rgx_kick_render2(srv_ws->render_fd, + srv_ctx->handle, + /* Currently no support for cache operations. + */ + 0, + 0, + NULL, + NULL, + NULL, + 1, + &sync_prim->srv_ws->sync_block_handle, + &sync_prim->offset, + &sync_prim->value, + 0, + NULL, + NULL, + NULL, + sync_prim->srv_ws->sync_block_handle, + sync_prim->offset, + sync_prim->value, + geom_wait_syncobj ? srv_geom_syncobj->fd : -1, + srv_ctx->timeline_geom, + &fence_geom, + "GEOM", + frag_wait_syncobj ? srv_frag_syncobj->fd : -1, + srv_ctx->timeline_frag, + &fence_frag, + "FRAG", + sizeof(geom_cmd), + (uint8_t *)&geom_cmd, + /* Currently no support for PRs. */ + 0, + /* Currently no support for PRs. */ + NULL, + sizeof(frag_cmd), + (uint8_t *)&frag_cmd, + submit_info->job_num, + true, /* Always kick the TA. */ + true, /* Always kick a PR. */ + submit_info->run_frag, + false, + 0, + rt_data_handle, + /* Currently no support for PRs. */ + NULL, + /* Currently no support for PRs. */ + NULL, + sync_pmr_count, + sync_pmr_count ? sync_pmr_flags : NULL, + sync_pmr_count ? sync_pmrs : NULL, + 0, + 0, + 0, + 0, + 0); + } while (result == VK_NOT_READY); + + if (result != VK_SUCCESS) + goto err_destroy_wait_syncobjs; + + /* Given job submission succeeded, we don't need to close wait fences, these + * should be consumed by the render job itself. + */ + if (geom_wait_syncobj) + srv_geom_syncobj->fd = -1; + + if (frag_wait_syncobj) + srv_frag_syncobj->fd = -1; + + if (fence_geom != -1) { + result = + pvr_srv_winsys_syncobj_create(ctx->ws, false, &geom_signal_syncobj); + if (result != VK_SUCCESS) + goto err_destroy_wait_syncobjs; + + pvr_srv_set_syncobj_payload(geom_signal_syncobj, fence_geom); + } + + if (fence_frag != -1) { + result = + pvr_srv_winsys_syncobj_create(ctx->ws, false, &frag_signal_syncobj); + if (result != VK_SUCCESS) { + if (geom_signal_syncobj) + pvr_srv_winsys_syncobj_destroy(geom_signal_syncobj); + goto err_destroy_wait_syncobjs; + } + + pvr_srv_set_syncobj_payload(frag_signal_syncobj, fence_frag); + } + + *syncobj_geom_out = geom_signal_syncobj; + *syncobj_frag_out = frag_signal_syncobj; + +err_destroy_wait_syncobjs: + if (geom_wait_syncobj) + pvr_srv_winsys_syncobj_destroy(geom_wait_syncobj); + + if (frag_wait_syncobj) + pvr_srv_winsys_syncobj_destroy(frag_wait_syncobj); + + return result; +} diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.h new file mode 100644 index 00000000000..bae71f65d6a --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_SRV_JOB_RENDER_H +#define PVR_SRV_JOB_RENDER_H + +#include +#include + +struct pvr_winsys; +struct pvr_winsys_free_list; +struct pvr_winsys_render_ctx; +struct pvr_winsys_render_ctx_create_info; +struct pvr_winsys_render_submit_info; +struct pvr_winsys_rt_dataset; +struct pvr_winsys_rt_dataset_create_info; +struct pvr_winsys_syncobj; +struct pvr_winsys_vma; + +/******************************************* + Function prototypes + *******************************************/ + +VkResult pvr_srv_winsys_free_list_create( + struct pvr_winsys *ws, + struct pvr_winsys_vma *free_list_vma, + uint32_t initial_num_pages, + uint32_t max_num_pages, + uint32_t grow_num_pages, + uint32_t grow_threshold, + struct pvr_winsys_free_list *parent_free_list, + struct pvr_winsys_free_list **const free_list_out); +void pvr_srv_winsys_free_list_destroy(struct pvr_winsys_free_list *free_list); + +VkResult pvr_srv_render_target_dataset_create( + struct pvr_winsys *ws, + const struct pvr_winsys_rt_dataset_create_info *create_info, + struct pvr_winsys_rt_dataset **const rt_dataset_out); +void pvr_srv_render_target_dataset_destroy( + struct pvr_winsys_rt_dataset *rt_dataset); + +VkResult pvr_srv_winsys_render_ctx_create( + struct pvr_winsys *ws, + struct pvr_winsys_render_ctx_create_info *create_info, + struct pvr_winsys_render_ctx **const ctx_out); +void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx); + +VkResult pvr_srv_winsys_render_submit( + const struct pvr_winsys_render_ctx *ctx, + const struct pvr_winsys_render_submit_info *submit_info, + struct pvr_winsys_syncobj **const syncobj_geom_out, + struct pvr_winsys_syncobj **const syncobj_frag_out); + +#endif /* PVR_SRV_JOB_RENDER_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_public.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_public.h new file mode 100644 index 00000000000..22c088e3052 --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_public.h @@ -0,0 +1,35 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_SRV_PUBLIC_H +#define PVR_SRV_PUBLIC_H + +#include + +#include "pvr_winsys.h" + +struct pvr_winsys *pvr_srv_winsys_create(int master_fd, + int render_fd, + const VkAllocationCallbacks *alloc); + +#endif /* PVR_SRV_PUBLIC_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.c new file mode 100644 index 00000000000..a25601a8e35 --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.c @@ -0,0 +1,349 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "pvr_private.h" +#include "pvr_srv.h" +#include "pvr_srv_syncobj.h" +#include "pvr_winsys.h" +#include "util/libsync.h" +#include "util/macros.h" +#include "util/timespec.h" +#include "vk_alloc.h" +#include "vk_log.h" + +VkResult +pvr_srv_winsys_syncobj_create(struct pvr_winsys *ws, + bool signaled, + struct pvr_winsys_syncobj **const syncobj_out) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); + struct pvr_srv_winsys_syncobj *srv_syncobj; + + srv_syncobj = vk_alloc(srv_ws->alloc, + sizeof(*srv_syncobj), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!srv_syncobj) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + srv_syncobj->base.ws = ws; + srv_syncobj->signaled = signaled; + srv_syncobj->fd = -1; + + *syncobj_out = &srv_syncobj->base; + + return VK_SUCCESS; +} + +void pvr_srv_winsys_syncobj_destroy(struct pvr_winsys_syncobj *syncobj) +{ + struct pvr_srv_winsys_syncobj *srv_syncobj; + struct pvr_srv_winsys *srv_ws; + + assert(syncobj); + + srv_ws = to_pvr_srv_winsys(syncobj->ws); + srv_syncobj = to_pvr_srv_winsys_syncobj(syncobj); + + if (srv_syncobj->fd != -1) + close(srv_syncobj->fd); + + vk_free(srv_ws->alloc, srv_syncobj); +} + +/* Note: function closes the fd. */ +static void pvr_set_syncobj_state(struct pvr_srv_winsys_syncobj *srv_syncobj, + bool signaled) +{ + if (srv_syncobj->fd != -1) { + close(srv_syncobj->fd); + srv_syncobj->fd = -1; + } + + srv_syncobj->signaled = signaled; +} + +void pvr_srv_set_syncobj_payload(struct pvr_winsys_syncobj *syncobj, + int payload) +{ + struct pvr_srv_winsys_syncobj *srv_syncobj = + to_pvr_srv_winsys_syncobj(syncobj); + + if (srv_syncobj->fd != -1) + close(srv_syncobj->fd); + + srv_syncobj->fd = payload; + /* FIXME: Is this valid? */ + srv_syncobj->signaled = (payload == -1); +} + +VkResult +pvr_srv_winsys_syncobjs_reset(struct pvr_winsys *ws, + struct pvr_winsys_syncobj **const syncobjs, + uint32_t count) +{ + for (uint32_t i = 0; i < count; i++) { + struct pvr_srv_winsys_syncobj *srv_syncobj; + + if (!syncobjs[i]) + continue; + + srv_syncobj = to_pvr_srv_winsys_syncobj(syncobjs[i]); + pvr_set_syncobj_state(srv_syncobj, false); + } + + return VK_SUCCESS; +} + +VkResult +pvr_srv_winsys_syncobjs_signal(struct pvr_winsys *ws, + struct pvr_winsys_syncobj **const syncobjs, + uint32_t count) +{ + for (uint32_t i = 0; i < count; i++) { + struct pvr_srv_winsys_syncobj *srv_syncobj; + + if (!syncobjs[i]) + continue; + + srv_syncobj = to_pvr_srv_winsys_syncobj(syncobjs[i]); + pvr_set_syncobj_state(srv_syncobj, true); + } + + return VK_SUCCESS; +} + +/* Careful, timeout might overflow. */ +static inline void pvr_start_timeout(struct timespec *timeout, + uint64_t timeout_ns) +{ + clock_gettime(CLOCK_MONOTONIC, timeout); + timespec_add_nsec(timeout, timeout, timeout_ns); +} + +/* Careful, a negative value might be returned. */ +static inline struct timespec +pvr_get_remaining_time(const struct timespec *timeout) +{ + struct timespec time; + + clock_gettime(CLOCK_MONOTONIC, &time); + timespec_sub(&time, timeout, &time); + + return time; +} + +/* timeout == 0 -> Get status without waiting. + * timeout == ~0 -> Wait infinitely + * else wait for the given timeout in nanoseconds. */ +VkResult +pvr_srv_winsys_syncobjs_wait(struct pvr_winsys *ws, + struct pvr_winsys_syncobj **const syncobjs, + uint32_t count, + bool wait_all, + uint64_t timeout) +{ + const struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); + uint32_t unsignaled_count = 0U; + struct timespec end_time; + struct pollfd *poll_fds; + VkResult result; + int ppoll_ret; + + if (timeout != 0U && timeout != ~0U) { + /* We don't worry about overflow since ppoll() returns EINVAL on + * negative timeout. + */ + pvr_start_timeout(&end_time, timeout); + } + + poll_fds = vk_alloc(srv_ws->alloc, + sizeof(*poll_fds) * count, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!poll_fds) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + for (uint32_t i = 0; i < count; i++) { + struct pvr_srv_winsys_syncobj *srv_syncobj = + to_pvr_srv_winsys_syncobj(syncobjs[i]); + + /* -1 in case if fence is signaled or uninitialized, ppoll will skip the + * fence. + */ + if (!srv_syncobj || srv_syncobj->signaled || srv_syncobj->fd == -1) { + poll_fds[i].fd = -1; + } else { + poll_fds[i].fd = srv_syncobj->fd; + unsignaled_count++; + } + + poll_fds[i].events = POLLIN; + poll_fds[i].revents = 0U; + } + + if (unsignaled_count == 0U) { + result = VK_SUCCESS; + goto end_wait_for_fences; + } + + /* TODO: Implement device loss handling like anvil: reporting the loss + * save the reported status, maybe abort() on env flag, etc. + */ + + do { + if (timeout == ~0U) { + ppoll_ret = ppoll(poll_fds, count, NULL, NULL); + } else { + struct timespec remaining_time; + + if (timeout == 0U) { + remaining_time = (struct timespec){ 0UL, 0UL }; + } else { + /* ppoll() returns EINVAL on negative timeout. Nothing to worry. + */ + remaining_time = pvr_get_remaining_time(&end_time); + } + + ppoll_ret = ppoll(poll_fds, count, &remaining_time, NULL); + } + + if (ppoll_ret > 0U) { + /* ppoll_ret contains the amount of structs updated by poll(). */ + unsignaled_count -= ppoll_ret; + + /* ppoll_ret > 0 is for early loop termination. */ + for (uint32_t i = 0; ppoll_ret > 0 && i < count; i++) { + struct pvr_srv_winsys_syncobj *srv_syncobj; + + if (poll_fds[i].revents == 0) + continue; + + if (poll_fds[i].revents & (POLLNVAL | POLLERR)) { + result = vk_error(NULL, VK_ERROR_DEVICE_LOST); + goto end_wait_for_fences; + } + + srv_syncobj = to_pvr_srv_winsys_syncobj(syncobjs[i]); + pvr_set_syncobj_state(srv_syncobj, true); + + if (!wait_all) { + result = VK_SUCCESS; + goto end_wait_for_fences; + } + + /* -1 makes ppoll ignore it and set revents to 0. */ + poll_fds[i].fd = -1; + ppoll_ret--; + } + + /* For zero timeout, just return even if we still have unsignaled + * fences. + */ + if (timeout == 0U && unsignaled_count != 0U) { + result = VK_TIMEOUT; + goto end_wait_for_fences; + } + } else if (ppoll_ret == 0) { + result = VK_TIMEOUT; + goto end_wait_for_fences; + } + + /* Careful as we might have decremented ppoll_ret to 0. */ + } while ((ppoll_ret != -1 && unsignaled_count != 0) || + (ppoll_ret == -1 && (errno == EINTR || errno == EAGAIN))); + + /* We assume device loss in case of an unknown error or invalid fd. */ + if (ppoll_ret != -1) + result = VK_SUCCESS; + else if (errno == EINVAL) + result = VK_TIMEOUT; + else if (errno == ENOMEM) + result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + else + result = vk_error(NULL, VK_ERROR_DEVICE_LOST); + +end_wait_for_fences: + vk_free(srv_ws->alloc, poll_fds); + + return result; +} + +VkResult pvr_srv_winsys_syncobjs_merge(struct pvr_winsys_syncobj *src, + struct pvr_winsys_syncobj *target, + struct pvr_winsys_syncobj **syncobj_out) +{ + struct pvr_srv_winsys_syncobj *srv_target = + to_pvr_srv_winsys_syncobj(target); + struct pvr_srv_winsys_syncobj *srv_src = to_pvr_srv_winsys_syncobj(src); + struct pvr_srv_winsys_syncobj *srv_output; + struct pvr_winsys_syncobj *output = NULL; + VkResult result; + + if (!srv_src || srv_src->fd == -1) { + *syncobj_out = target; + return VK_SUCCESS; + } + + result = pvr_srv_winsys_syncobj_create(src->ws, false, &output); + if (result != VK_SUCCESS) + return result; + + srv_output = to_pvr_srv_winsys_syncobj(output); + + if (!srv_target || srv_target->fd == -1) { + int fd = dup(srv_src->fd); + if (fd < 0) { + result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_syncobj_destroy; + } + + pvr_srv_set_syncobj_payload(output, fd); + if (target) + pvr_srv_winsys_syncobj_destroy(target); + *syncobj_out = output; + return VK_SUCCESS; + } + + srv_output->fd = sync_merge("", srv_src->fd, srv_target->fd); + if (srv_output->fd < 0) { + result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_syncobj_destroy; + } + + pvr_srv_winsys_syncobj_destroy(target); + + *syncobj_out = output; + + return VK_SUCCESS; + +err_syncobj_destroy: + pvr_srv_winsys_syncobj_destroy(output); + + return result; +} diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.h new file mode 100644 index 00000000000..857d7ef18b9 --- /dev/null +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.h @@ -0,0 +1,76 @@ +/* + * Copyright © 2022 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PVR_SRV_SYNCOBJ_H +#define PVR_SRV_SYNCOBJ_H + +#include +#include +#include + +#include "pvr_winsys.h" +#include "util/macros.h" + +struct pvr_srv_winsys_syncobj { + struct pvr_winsys_syncobj base; + + /* Cached version of completion. */ + bool signaled; + + int fd; +}; + +#define to_pvr_srv_winsys_syncobj(syncobj) \ + container_of(syncobj, struct pvr_srv_winsys_syncobj, base) + +/******************************************* + function prototypes + *******************************************/ + +VkResult +pvr_srv_winsys_syncobj_create(struct pvr_winsys *ws, + bool signaled, + struct pvr_winsys_syncobj **const syncobj_out); +void pvr_srv_winsys_syncobj_destroy(struct pvr_winsys_syncobj *syncobj); +VkResult +pvr_srv_winsys_syncobjs_reset(struct pvr_winsys *ws, + struct pvr_winsys_syncobj **const syncobjs, + uint32_t count); +VkResult +pvr_srv_winsys_syncobjs_signal(struct pvr_winsys *ws, + struct pvr_winsys_syncobj **const syncobjs, + uint32_t count); +VkResult +pvr_srv_winsys_syncobjs_wait(struct pvr_winsys *ws, + struct pvr_winsys_syncobj **const syncobjs, + uint32_t count, + bool wait_all, + uint64_t timeout); +VkResult pvr_srv_winsys_syncobjs_merge(struct pvr_winsys_syncobj *src, + struct pvr_winsys_syncobj *target, + struct pvr_winsys_syncobj **out); + +void pvr_srv_set_syncobj_payload(struct pvr_winsys_syncobj *syncobj, + int payload); + +#endif /* PVR_SRV_SYNCOBJ_H */ diff --git a/src/meson.build b/src/meson.build index eac75a3304b..5104e18764f 100644 --- a/src/meson.build +++ b/src/meson.build @@ -94,6 +94,9 @@ endif if with_gallium_freedreno or with_freedreno_vk or with_tools.contains('freedreno') subdir('freedreno') endif +if with_imagination_vk + subdir('imagination') +endif if with_gallium_panfrost or with_gallium_lima or with_panfrost_vk or with_tools.contains('panfrost') subdir('panfrost') endif