util: Remove util_cpu_detect
util_cpu_detect is an anti-pattern: it relies on callers high up in the call chain initializing a local implementation detail. As a real example, I added: ...a Mali compiler unit test ...that called bi_imm_f16() to construct an FP16 immediate ...that calls _mesa_float_to_half internally ...that calls util_get_cpu_caps internally, but only on x86_64! ...that relies on util_cpu_detect having been called before. As a consequence, this unit test: ...crashes on x86_64 with USE_X86_64_ASM set ...passes on every other architecture ...works on my local arm64 workstation and on my test board ...failed CI which runs on x86_64 ...needed to have a random util_cpu_detect() call sprinkled in. This is a bad design decision. It pollutes the tree with magic, it causes mysterious CI failures especially for non-x86_64 developers, and it is not justified by a micro-optimization. Instead, let's call util_cpu_detect directly from util_get_cpu_caps, avoiding the footgun where it fails to be called. This cleans up Mesa's design, simplifies the tree, and avoids a class of a (possibly platform-specific) failures. To mitigate the added overhead, wrap it all in a (fast) atomic load check and declare the whole thing as ATTRIBUTE_CONST so the compiler will CSE calls to util_cpu_detect. Co-authored-by: Alyssa Rosenzweig <alyssa@collabora.com> Reviewed-by: Marek Olšák <maraeo@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15580>
This commit is contained in:
parent
90a0675989
commit
1b8a43a0ba
|
@ -701,8 +701,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
/* Add some margin of error, though this shouldn't be needed in theory. */
|
||||
info->all_vram_visible = info->vram_size * 0.9 < info->vram_vis_size;
|
||||
|
||||
util_cpu_detect();
|
||||
|
||||
/* Set chip identification. */
|
||||
info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
|
||||
info->pci_rev_id = amdinfo->pci_rev_id;
|
||||
|
|
|
@ -34,8 +34,6 @@
|
|||
#include "aco_ir.h"
|
||||
#include "framework.h"
|
||||
|
||||
#include "util/u_cpu_detect.h"
|
||||
|
||||
static const char *help_message =
|
||||
"Usage: %s [-h] [-l --list] [--no-check] [TEST [TEST ...]]\n"
|
||||
"\n"
|
||||
|
@ -243,8 +241,6 @@ int main(int argc, char **argv)
|
|||
return 99;
|
||||
}
|
||||
|
||||
util_cpu_detect();
|
||||
|
||||
if (do_list) {
|
||||
for (auto test : tests)
|
||||
printf("%s\n", test.first.c_str());
|
||||
|
|
|
@ -51,7 +51,6 @@
|
|||
|
||||
#include "util/build_id.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
|
||||
#ifdef VK_USE_PLATFORM_XCB_KHR
|
||||
#include <xcb/xcb.h>
|
||||
|
@ -253,8 +252,6 @@ v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
|
|||
}
|
||||
}
|
||||
|
||||
util_cpu_detect();
|
||||
|
||||
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
|
||||
|
||||
*pInstance = v3dv_instance_to_handle(instance);
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
#include "compiler/glsl/glsl_parser_extras.h"
|
||||
#include "glsl_types.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
#include "util/u_string.h"
|
||||
|
||||
|
||||
|
@ -520,11 +519,6 @@ hash_free_type_function(struct hash_entry *entry)
|
|||
void
|
||||
glsl_type_singleton_init_or_ref()
|
||||
{
|
||||
/* This is required for _mesa_half_to_float() which is
|
||||
* required for constant-folding 16-bit float ops.
|
||||
*/
|
||||
util_cpu_detect();
|
||||
|
||||
mtx_lock(&glsl_type::hash_mutex);
|
||||
glsl_type_users++;
|
||||
mtx_unlock(&glsl_type::hash_mutex);
|
||||
|
|
|
@ -773,8 +773,6 @@ isa_decode(void *bin, int sz, FILE *out, const struct isa_decode_options *option
|
|||
if (!options)
|
||||
options = &default_options;
|
||||
|
||||
util_cpu_detect(); /* needed for _mesa_half_to_float() */
|
||||
|
||||
state = rzalloc_size(NULL, sizeof(*state));
|
||||
state->options = options;
|
||||
state->num_instr = sz / (BITMASK_WORDS * sizeof(BITSET_WORD));
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
#include "pipe/p_context.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_helpers.h"
|
||||
#include "util/u_prim.h"
|
||||
|
@ -85,9 +84,6 @@ draw_create_context(struct pipe_context *pipe, void *context,
|
|||
if (!draw)
|
||||
goto err_out;
|
||||
|
||||
/* we need correct cpu caps for disabling denorms in draw_vbo() */
|
||||
util_cpu_detect();
|
||||
|
||||
#ifdef DRAW_LLVM_AVAILABLE
|
||||
if (try_llvm && draw_get_option_use_llvm()) {
|
||||
draw->llvm = draw_llvm_create(draw, (LLVMContextRef)context);
|
||||
|
|
|
@ -430,8 +430,6 @@ lp_build_init(void)
|
|||
|
||||
lp_set_target_options();
|
||||
|
||||
util_cpu_detect();
|
||||
|
||||
/* For simulating less capable machines */
|
||||
#ifdef DEBUG
|
||||
if (debug_get_bool_option("LP_FORCE_SSE2", FALSE)) {
|
||||
|
|
|
@ -27,7 +27,6 @@
|
|||
|
||||
#include "pipe_loader_priv.h"
|
||||
|
||||
#include "util/u_cpu_detect.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_string.h"
|
||||
|
@ -165,7 +164,6 @@ pipe_loader_create_screen_vk(struct pipe_loader_device *dev, bool sw_vk)
|
|||
{
|
||||
struct pipe_screen_config config;
|
||||
|
||||
util_cpu_detect();
|
||||
pipe_loader_load_options(dev);
|
||||
config.options_info = &dev->option_info;
|
||||
config.options = &dev->option_cache;
|
||||
|
|
|
@ -37,7 +37,6 @@ DEBUG_GET_ONCE_BOOL_OPTION(nosse, "GALLIUM_NOSSE", false);
|
|||
|
||||
static const struct util_cpu_caps_t *get_cpu_caps(void)
|
||||
{
|
||||
util_cpu_detect();
|
||||
return util_get_cpu_caps();
|
||||
}
|
||||
|
||||
|
|
|
@ -2159,7 +2159,6 @@ struct x86_reg x86_fn_arg( struct x86_function *p,
|
|||
|
||||
static void x86_init_func_common( struct x86_function *p )
|
||||
{
|
||||
util_cpu_detect();
|
||||
p->caps = 0;
|
||||
if(util_get_cpu_caps()->has_mmx)
|
||||
p->caps |= X86_MMX;
|
||||
|
|
|
@ -4299,8 +4299,6 @@ threaded_context_create(struct pipe_context *pipe,
|
|||
if (!pipe)
|
||||
return NULL;
|
||||
|
||||
util_cpu_detect();
|
||||
|
||||
if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1))
|
||||
return pipe;
|
||||
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
*/
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
|
||||
#include <err.h>
|
||||
#include <stdio.h>
|
||||
|
@ -176,9 +175,6 @@ main(int argc, char **argv)
|
|||
return -1;
|
||||
}
|
||||
|
||||
/* Needed by _mesa_half_to_float() */
|
||||
util_cpu_detect();
|
||||
|
||||
if (is_frag) {
|
||||
assert((size & 0x3) == 0);
|
||||
size >>= 2;
|
||||
|
|
|
@ -921,8 +921,8 @@ static void update_cache_sha1_cpu(struct mesa_sha1 *ctx)
|
|||
* Don't need the cpu cache affinity stuff. The rest
|
||||
* is contained in first 5 dwords.
|
||||
*/
|
||||
STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 5 * sizeof(uint32_t));
|
||||
_mesa_sha1_update(ctx, cpu_caps, 5 * sizeof(uint32_t));
|
||||
STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 6 * sizeof(uint32_t));
|
||||
_mesa_sha1_update(ctx, cpu_caps, 6 * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
static void lp_disk_cache_create(struct llvmpipe_screen *screen)
|
||||
|
@ -1024,8 +1024,6 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
|
|||
{
|
||||
struct llvmpipe_screen *screen;
|
||||
|
||||
util_cpu_detect();
|
||||
|
||||
glsl_type_singleton_init_or_ref();
|
||||
|
||||
#ifdef DEBUG
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "util/u_cpu_detect.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include "gallivm/lp_bld_const.h"
|
||||
|
@ -381,7 +380,6 @@ int main(int argc, char **argv)
|
|||
boolean single = FALSE;
|
||||
unsigned fpstate;
|
||||
|
||||
util_cpu_detect();
|
||||
fpstate = util_fpstate_get();
|
||||
util_fpstate_set_denorms_to_zero(fpstate);
|
||||
|
||||
|
|
|
@ -994,8 +994,6 @@ static void si_init_renderer_string(struct si_screen *sscreen)
|
|||
|
||||
void si_init_screen_get_functions(struct si_screen *sscreen)
|
||||
{
|
||||
util_cpu_detect();
|
||||
|
||||
sscreen->b.get_name = si_get_name;
|
||||
sscreen->b.get_vendor = si_get_vendor;
|
||||
sscreen->b.get_device_vendor = si_get_device_vendor;
|
||||
|
|
|
@ -27,7 +27,6 @@
|
|||
#include "pipe/p_screen.h"
|
||||
#include "pipe/p_state.h"
|
||||
|
||||
#include "util/u_cpu_detect.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/format/u_format.h"
|
||||
|
@ -591,8 +590,6 @@ vc4_screen_create(int fd, struct renderonly *ro)
|
|||
if (!vc4_get_chip_info(screen))
|
||||
goto fail;
|
||||
|
||||
util_cpu_detect();
|
||||
|
||||
slab_create_parent(&screen->transfer_pool, sizeof(struct vc4_transfer), 16);
|
||||
|
||||
vc4_fence_screen_init(screen);
|
||||
|
|
|
@ -2035,7 +2035,6 @@ zink_internal_create_screen(const struct pipe_screen_config *config)
|
|||
if (!screen)
|
||||
return NULL;
|
||||
|
||||
util_cpu_detect();
|
||||
screen->threaded = util_get_cpu_caps()->nr_cpus > 1 && debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1);
|
||||
|
||||
zink_debug = debug_get_option_zink_debug();
|
||||
|
|
|
@ -69,8 +69,6 @@ int main(int argc, char** argv)
|
|||
|
||||
create_fn = 0;
|
||||
|
||||
util_cpu_detect();
|
||||
|
||||
if (argc <= 1 ||
|
||||
!strcmp(argv[1], "default") )
|
||||
create_fn = translate_create;
|
||||
|
|
|
@ -36,7 +36,6 @@ test(void)
|
|||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
util_cpu_detect();
|
||||
test();
|
||||
|
||||
/* Test non-f16c. */
|
||||
|
|
|
@ -35,15 +35,9 @@
|
|||
#include "main/glformats.h"
|
||||
#include "main/format_unpack.h"
|
||||
#include "main/format_pack.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
|
||||
// Test fixture for Format tests.
|
||||
// Currently just ensures that util_cpu_detect() has been called
|
||||
class MesaFormatsTest : public ::testing::Test {
|
||||
protected:
|
||||
MesaFormatsTest() {
|
||||
util_cpu_detect();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -480,8 +480,6 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe,
|
|||
uint i;
|
||||
struct st_context *st = CALLOC_STRUCT( st_context);
|
||||
|
||||
util_cpu_detect();
|
||||
|
||||
st->options = *options;
|
||||
|
||||
ctx->st_opts = &st->options;
|
||||
|
@ -862,8 +860,6 @@ st_create_context(gl_api api, struct pipe_context *pipe,
|
|||
struct dd_function_table funcs;
|
||||
struct st_context *st;
|
||||
|
||||
util_cpu_detect();
|
||||
|
||||
memset(&funcs, 0, sizeof(funcs));
|
||||
st_init_driver_functions(pipe->screen, &funcs, has_egl_image_validate);
|
||||
|
||||
|
|
|
@ -869,8 +869,6 @@ int main(int argc, char **argv)
|
|||
{
|
||||
boolean success;
|
||||
|
||||
util_cpu_detect();
|
||||
|
||||
success = test_all();
|
||||
|
||||
return success ? 0 : 1;
|
||||
|
|
|
@ -860,6 +860,9 @@ util_cpu_detect_once(void)
|
|||
printf("util_cpu_caps.num_L3_caches = %u\n", util_cpu_caps.num_L3_caches);
|
||||
printf("util_cpu_caps.num_cpu_mask_bits = %u\n", util_cpu_caps.num_cpu_mask_bits);
|
||||
}
|
||||
|
||||
/* This must happen at the end as it's used to guard everything else */
|
||||
p_atomic_set(&util_cpu_caps.detect_done, 1);
|
||||
}
|
||||
|
||||
static once_flag cpu_once_flag = ONCE_FLAG_INIT;
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include <stdbool.h>
|
||||
|
||||
#include "pipe/p_config.h"
|
||||
#include "util/u_atomic.h"
|
||||
#include "util/u_thread.h"
|
||||
|
||||
|
||||
|
@ -60,6 +61,12 @@ enum cpu_family {
|
|||
typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32];
|
||||
|
||||
struct util_cpu_caps_t {
|
||||
/**
|
||||
* Initialized to 0 and set to non-zero with an atomic after the entire
|
||||
* struct has been initialized.
|
||||
*/
|
||||
uint32_t detect_done;
|
||||
|
||||
/**
|
||||
* Number of CPUs available to the process.
|
||||
*
|
||||
|
@ -127,21 +134,41 @@ struct util_cpu_caps_t {
|
|||
|
||||
#define U_CPU_INVALID_L3 0xffff
|
||||
|
||||
static inline const struct util_cpu_caps_t *
|
||||
util_get_cpu_caps(void)
|
||||
{
|
||||
extern struct util_cpu_caps_t util_cpu_caps;
|
||||
|
||||
/* If you hit this assert, it means that something is using the
|
||||
* cpu-caps without having first called util_cpu_detect()
|
||||
*/
|
||||
assert(util_cpu_caps.nr_cpus >= 1);
|
||||
|
||||
return &util_cpu_caps;
|
||||
}
|
||||
|
||||
void util_cpu_detect(void);
|
||||
|
||||
static inline ATTRIBUTE_CONST const struct util_cpu_caps_t *
|
||||
util_get_cpu_caps(void)
|
||||
{
|
||||
extern struct util_cpu_caps_t util_cpu_caps;
|
||||
|
||||
/* On most CPU architectures, an atomic read is simply a regular memory
|
||||
* load instruction with some extra compiler magic to prevent code
|
||||
* re-ordering around it. The perf impact of doing this check should be
|
||||
* negligible in most cases.
|
||||
*
|
||||
* Also, even though it looks like a bit of a lie, we've declared this
|
||||
* function with ATTRIBUTE_CONST. The GCC docs say:
|
||||
*
|
||||
* "Calls to functions whose return value is not affected by changes to
|
||||
* the observable state of the program and that have no observable
|
||||
* effects on such state other than to return a value may lend
|
||||
* themselves to optimizations such as common subexpression elimination.
|
||||
* Declaring such functions with the const attribute allows GCC to avoid
|
||||
* emitting some calls in repeated invocations of the function with the
|
||||
* same argument values."
|
||||
*
|
||||
* The word "observable" is important here. With the exception of a
|
||||
* llvmpipe debug flag behind an environment variable and a few unit tests,
|
||||
* all of which emulate worse CPUs, this function neither affects nor is
|
||||
* affected by any "observable" state. It has its own internal state for
|
||||
* sure, but that state is such that it appears to return exactly the same
|
||||
* value with the same internal data every time.
|
||||
*/
|
||||
if (unlikely(!p_atomic_read(&util_cpu_caps.detect_done)))
|
||||
util_cpu_detect();
|
||||
|
||||
return &util_cpu_caps;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -263,9 +263,6 @@ util_queue_thread_func(void *input)
|
|||
|
||||
memset(mask, 0xff, sizeof(mask));
|
||||
|
||||
/* Ensure util_cpu_caps.num_cpu_mask_bits is initialized: */
|
||||
util_cpu_detect();
|
||||
|
||||
util_set_current_thread_affinity(mask, NULL,
|
||||
util_get_cpu_caps()->num_cpu_mask_bits);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue