diff --git a/src/intel/compiler/brw_private.h b/src/intel/compiler/brw_private.h new file mode 100644 index 00000000000..d166a29e0d5 --- /dev/null +++ b/src/intel/compiler/brw_private.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_PRIVATE_H +#define BRW_PRIVATE_H + +#include "brw_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +unsigned brw_required_dispatch_width(const struct shader_info *info, + enum brw_subgroup_size_type subgroup_size_type); + +bool brw_simd_should_compile(void *mem_ctx, + unsigned simd, + const struct intel_device_info *devinfo, + struct brw_cs_prog_data *prog_data, + unsigned required_dispatch_width, + const char **error); + +void brw_simd_mark_compiled(unsigned simd, + struct brw_cs_prog_data *prog_data, + bool spilled); + +int brw_simd_select(const struct brw_cs_prog_data *prog_data); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif // BRW_PRIVATE_H diff --git a/src/intel/compiler/brw_simd_selection.c b/src/intel/compiler/brw_simd_selection.c new file mode 100644 index 00000000000..551e882e1a8 --- /dev/null +++ b/src/intel/compiler/brw_simd_selection.c @@ -0,0 +1,163 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_private.h" +#include "compiler/shader_info.h" +#include "intel/dev/intel_debug.h" +#include "intel/dev/intel_device_info.h" +#include "util/ralloc.h" + +unsigned +brw_required_dispatch_width(const struct shader_info *info, + enum brw_subgroup_size_type subgroup_size_type) +{ + unsigned required = 0; + + if ((int)subgroup_size_type >= (int)BRW_SUBGROUP_SIZE_REQUIRE_8) { + assert(gl_shader_stage_uses_workgroup(info->stage)); + /* These enum values are expressly chosen to be equal to the subgroup + * size that they require. + */ + required = (unsigned)subgroup_size_type; + } + + if (gl_shader_stage_is_compute(info->stage) && info->cs.subgroup_size > 0) { + assert(required == 0 || required == info->cs.subgroup_size); + required = info->cs.subgroup_size; + } + + return required; +} + +static inline bool +test_bit(unsigned mask, unsigned bit) { + return mask & (1u << bit); +} + +bool +brw_simd_should_compile(void *mem_ctx, + unsigned simd, + const struct intel_device_info *devinfo, + struct brw_cs_prog_data *prog_data, + unsigned required, + const char **error) + +{ + assert(!test_bit(prog_data->prog_mask, simd)); + assert(error); + + const unsigned width = 8u << simd; + + /* For shaders with variable size workgroup, we will always compile all the + * variants, since the choice will happen only at dispatch time. + */ + const bool workgroup_size_variable = prog_data->local_size[0] == 0; + + if (!workgroup_size_variable) { + if (test_bit(prog_data->prog_spilled, simd)) { + *error = ralloc_asprintf( + mem_ctx, "SIMD%u skipped because would spill", width); + return false; + } + + const unsigned workgroup_size = prog_data->local_size[0] * + prog_data->local_size[1] * + prog_data->local_size[2]; + + unsigned max_threads = devinfo->max_cs_workgroup_threads; + + if (required && required != width) { + *error = ralloc_asprintf( + mem_ctx, "SIMD%u skipped because required dispatch width is %u", + width, required); + return false; + } + + /* TODO: Ignore SIMD larger than workgroup if previous SIMD already passed. */ + + if (DIV_ROUND_UP(workgroup_size, width) > max_threads) { + *error = ralloc_asprintf( + mem_ctx, "SIMD%u can't fit all %u invocations in %u threads", + width, workgroup_size, max_threads); + return false; + } + + /* The SIMD32 is only enabled for cases it is needed unless forced. + * + * TODO: Use performance_analysis and drop this rule. + */ + if (width == 32) { + if (!INTEL_DEBUG(DEBUG_DO32) && prog_data->prog_mask) { + *error = ralloc_strdup( + mem_ctx, "SIMD32 skipped because not required"); + return false; + } + } + } + + const bool env_skip[3] = { + INTEL_DEBUG(DEBUG_NO8), + INTEL_DEBUG(DEBUG_NO16), + INTEL_DEBUG(DEBUG_NO32), + }; + + if (unlikely(env_skip[simd])) { + *error = ralloc_asprintf( + mem_ctx, "SIMD%u skipped because INTEL_DEBUG=no%u", + width, width); + return false; + } + + return true; +} + +void +brw_simd_mark_compiled(unsigned simd, struct brw_cs_prog_data *prog_data, bool spilled) +{ + assert(!test_bit(prog_data->prog_mask, simd)); + + prog_data->prog_mask |= 1u << simd; + + /* If a SIMD spilled, all the larger ones would spill too. */ + if (spilled) { + for (unsigned i = simd; i < 3; i++) + prog_data->prog_spilled |= 1u << i; + } +} + +int +brw_simd_select(const struct brw_cs_prog_data *prog_data) +{ + assert((prog_data->prog_mask & ~0x7u) == 0); + const unsigned not_spilled_mask = + prog_data->prog_mask & ~prog_data->prog_spilled; + + /* Util functions index bits from 1 instead of 0, adjust before return. */ + + if (not_spilled_mask) + return util_last_bit(not_spilled_mask) - 1; + else if (prog_data->prog_mask) + return ffs(prog_data->prog_mask) - 1; + else + return -1; +} diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index 10c9cff703c..dadb75d43e5 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -98,6 +98,7 @@ libintel_compiler_files = files( 'brw_nir_clamp_image_1d_2d_array_sizes.c', 'brw_packed_float.c', 'brw_predicated_break.cpp', + 'brw_private.h', 'brw_reg.h', 'brw_reg_type.c', 'brw_reg_type.h', @@ -105,6 +106,7 @@ libintel_compiler_files = files( 'brw_schedule_instructions.cpp', 'brw_shader.cpp', 'brw_shader.h', + 'brw_simd_selection.c', 'brw_vec4_builder.h', 'brw_vec4_cmod_propagation.cpp', 'brw_vec4_copy_propagation.cpp', @@ -169,6 +171,7 @@ if with_tests 'test_fs_copy_propagation.cpp', 'test_fs_saturate_propagation.cpp', 'test_fs_scoreboard.cpp', + 'test_simd_selection.cpp', 'test_vec4_cmod_propagation.cpp', 'test_vec4_copy_propagation.cpp', 'test_vec4_dead_code_eliminate.cpp', diff --git a/src/intel/compiler/test_simd_selection.cpp b/src/intel/compiler/test_simd_selection.cpp new file mode 100644 index 00000000000..f1be0bf185f --- /dev/null +++ b/src/intel/compiler/test_simd_selection.cpp @@ -0,0 +1,282 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include "brw_private.h" +#include "compiler/shader_info.h" +#include "intel/dev/intel_debug.h" +#include "intel/dev/intel_device_info.h" +#include "util/ralloc.h" + +#include + +enum { + SIMD8 = 0, + SIMD16 = 1, + SIMD32 = 2, +}; + +const bool spilled = true; +const bool not_spilled = false; + +class SIMDSelectionTest : public ::testing::Test { +protected: + SIMDSelectionTest() { + mem_ctx = ralloc_context(NULL); + devinfo = rzalloc(mem_ctx, intel_device_info); + prog_data = rzalloc(mem_ctx, struct brw_cs_prog_data); + required_dispatch_width = 0; + } + + ~SIMDSelectionTest() { + ralloc_free(mem_ctx); + }; + + bool should_compile(unsigned simd) { + return brw_simd_should_compile(mem_ctx, simd, devinfo, prog_data, + required_dispatch_width, &error[simd]); + } + + void *mem_ctx; + intel_device_info *devinfo; + struct brw_cs_prog_data *prog_data; + const char *error[3]; + unsigned required_dispatch_width; +}; + +class SIMDSelectionCS : public SIMDSelectionTest { +protected: + SIMDSelectionCS() { + prog_data->base.stage = MESA_SHADER_COMPUTE; + prog_data->local_size[0] = 32; + prog_data->local_size[1] = 1; + prog_data->local_size[2] = 1; + + devinfo->max_cs_workgroup_threads = 64; + } +}; + +TEST_F(SIMDSelectionCS, DefaultsToSIMD16) +{ + ASSERT_TRUE(should_compile(SIMD8)); + brw_simd_mark_compiled(SIMD8, prog_data, not_spilled); + ASSERT_TRUE(should_compile(SIMD16)); + brw_simd_mark_compiled(SIMD16, prog_data, not_spilled); + ASSERT_FALSE(should_compile(SIMD32)); + + ASSERT_EQ(brw_simd_select(prog_data), SIMD16); +} + +TEST_F(SIMDSelectionCS, TooBigFor16) +{ + prog_data->local_size[0] = devinfo->max_cs_workgroup_threads; + prog_data->local_size[1] = 32; + prog_data->local_size[2] = 1; + + ASSERT_FALSE(should_compile(SIMD8)); + ASSERT_FALSE(should_compile(SIMD16)); + ASSERT_TRUE(should_compile(SIMD32)); + brw_simd_mark_compiled(SIMD32, prog_data, spilled); + + ASSERT_EQ(brw_simd_select(prog_data), SIMD32); +} + +TEST_F(SIMDSelectionCS, WorkgroupSize1) +{ + prog_data->local_size[0] = 1; + prog_data->local_size[1] = 1; + prog_data->local_size[2] = 1; + + ASSERT_TRUE(should_compile(SIMD8)); + brw_simd_mark_compiled(SIMD8, prog_data, not_spilled); + ASSERT_TRUE(should_compile(SIMD16)); + brw_simd_mark_compiled(SIMD16, prog_data, not_spilled); + ASSERT_FALSE(should_compile(SIMD32)); + + ASSERT_EQ(brw_simd_select(prog_data), SIMD16); +} + +TEST_F(SIMDSelectionCS, WorkgroupSize8) +{ + prog_data->local_size[0] = 8; + prog_data->local_size[1] = 1; + prog_data->local_size[2] = 1; + + ASSERT_TRUE(should_compile(SIMD8)); + brw_simd_mark_compiled(SIMD8, prog_data, not_spilled); + ASSERT_TRUE(should_compile(SIMD16)); + brw_simd_mark_compiled(SIMD16, prog_data, not_spilled); + ASSERT_FALSE(should_compile(SIMD32)); + + ASSERT_EQ(brw_simd_select(prog_data), SIMD16); +} + +TEST_F(SIMDSelectionCS, WorkgroupSizeVariable) +{ + prog_data->local_size[0] = 0; + prog_data->local_size[1] = 0; + prog_data->local_size[2] = 0; + + ASSERT_TRUE(should_compile(SIMD8)); + brw_simd_mark_compiled(SIMD8, prog_data, not_spilled); + ASSERT_TRUE(should_compile(SIMD16)); + brw_simd_mark_compiled(SIMD16, prog_data, not_spilled); + ASSERT_TRUE(should_compile(SIMD32)); + brw_simd_mark_compiled(SIMD32, prog_data, not_spilled); + + ASSERT_EQ(prog_data->prog_mask, 1u << SIMD8 | 1u << SIMD16 | 1u << SIMD32); +} + +TEST_F(SIMDSelectionCS, WorkgroupSizeVariableSpilled) +{ + prog_data->local_size[0] = 0; + prog_data->local_size[1] = 0; + prog_data->local_size[2] = 0; + + ASSERT_TRUE(should_compile(SIMD8)); + brw_simd_mark_compiled(SIMD8, prog_data, spilled); + ASSERT_TRUE(should_compile(SIMD16)); + brw_simd_mark_compiled(SIMD16, prog_data, spilled); + ASSERT_TRUE(should_compile(SIMD32)); + brw_simd_mark_compiled(SIMD32, prog_data, spilled); + + ASSERT_EQ(prog_data->prog_mask, 1u << SIMD8 | 1u << SIMD16 | 1u << SIMD32); +} + +TEST_F(SIMDSelectionCS, SpillAtSIMD8) +{ + ASSERT_TRUE(should_compile(SIMD8)); + brw_simd_mark_compiled(SIMD8, prog_data, spilled); + ASSERT_FALSE(should_compile(SIMD16)); + ASSERT_FALSE(should_compile(SIMD32)); + + ASSERT_EQ(brw_simd_select(prog_data), SIMD8); +} + +TEST_F(SIMDSelectionCS, SpillAtSIMD16) +{ + ASSERT_TRUE(should_compile(SIMD8)); + brw_simd_mark_compiled(SIMD8, prog_data, not_spilled); + ASSERT_TRUE(should_compile(SIMD16)); + brw_simd_mark_compiled(SIMD16, prog_data, spilled); + ASSERT_FALSE(should_compile(SIMD32)); + + ASSERT_EQ(brw_simd_select(prog_data), SIMD8); +} + +TEST_F(SIMDSelectionCS, EnvironmentVariable32) +{ + intel_debug |= DEBUG_DO32; + + ASSERT_TRUE(should_compile(SIMD8)); + brw_simd_mark_compiled(SIMD8, prog_data, not_spilled); + ASSERT_TRUE(should_compile(SIMD16)); + brw_simd_mark_compiled(SIMD16, prog_data, not_spilled); + ASSERT_TRUE(should_compile(SIMD32)); + brw_simd_mark_compiled(SIMD32, prog_data, not_spilled); + + ASSERT_EQ(brw_simd_select(prog_data), SIMD32); +} + +TEST_F(SIMDSelectionCS, EnvironmentVariable32ButSpills) +{ + intel_debug |= DEBUG_DO32; + + ASSERT_TRUE(should_compile(SIMD8)); + brw_simd_mark_compiled(SIMD8, prog_data, not_spilled); + ASSERT_TRUE(should_compile(SIMD16)); + brw_simd_mark_compiled(SIMD16, prog_data, not_spilled); + ASSERT_TRUE(should_compile(SIMD32)); + brw_simd_mark_compiled(SIMD32, prog_data, spilled); + + ASSERT_EQ(brw_simd_select(prog_data), SIMD16); +} + +TEST_F(SIMDSelectionCS, Require8) +{ + required_dispatch_width = 8; + + ASSERT_TRUE(should_compile(SIMD8)); + brw_simd_mark_compiled(SIMD8, prog_data, not_spilled); + ASSERT_FALSE(should_compile(SIMD16)); + ASSERT_FALSE(should_compile(SIMD32)); + + ASSERT_EQ(brw_simd_select(prog_data), SIMD8); +} + +TEST_F(SIMDSelectionCS, Require8ErrorWhenNotCompile) +{ + required_dispatch_width = 8; + + ASSERT_TRUE(should_compile(SIMD8)); + ASSERT_FALSE(should_compile(SIMD16)); + ASSERT_FALSE(should_compile(SIMD32)); + + ASSERT_EQ(brw_simd_select(prog_data), -1); +} + +TEST_F(SIMDSelectionCS, Require16) +{ + required_dispatch_width = 16; + + ASSERT_FALSE(should_compile(SIMD8)); + ASSERT_TRUE(should_compile(SIMD16)); + brw_simd_mark_compiled(SIMD16, prog_data, not_spilled); + ASSERT_FALSE(should_compile(SIMD32)); + + ASSERT_EQ(brw_simd_select(prog_data), SIMD16); +} + +TEST_F(SIMDSelectionCS, Require16ErrorWhenNotCompile) +{ + required_dispatch_width = 16; + + ASSERT_FALSE(should_compile(SIMD8)); + ASSERT_TRUE(should_compile(SIMD16)); + ASSERT_FALSE(should_compile(SIMD32)); + + ASSERT_EQ(brw_simd_select(prog_data), -1); +} + +TEST_F(SIMDSelectionCS, Require32) +{ + required_dispatch_width = 32; + + ASSERT_FALSE(should_compile(SIMD8)); + ASSERT_FALSE(should_compile(SIMD16)); + ASSERT_TRUE(should_compile(SIMD32)); + brw_simd_mark_compiled(SIMD32, prog_data, not_spilled); + + ASSERT_EQ(brw_simd_select(prog_data), SIMD32); +} + +TEST_F(SIMDSelectionCS, Require32ErrorWhenNotCompile) +{ + required_dispatch_width = 32; + + ASSERT_FALSE(should_compile(SIMD8)); + ASSERT_FALSE(should_compile(SIMD16)); + ASSERT_TRUE(should_compile(SIMD32)); + + ASSERT_EQ(brw_simd_select(prog_data), -1); +}