From f831ba238f659abe6b9738a54d91b8d0567e5b5d Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Wed, 16 Mar 2022 15:50:00 -0700 Subject: [PATCH] ci/turnip: Increase the hangcheck timer to 2 seconds. We get a lot of useful coverage from running graphicsfuzz with spilling enabled, but it's also pretty slow and can cause intermittent hangcheck failures. I thought I'd categorized them when merging !14839 (device loss on reset), but it looks like not all of them and we're now more likely to have flakes take out the whole test run when a single flake makes the rest of the caselist a flake. This is a little unfortunate in that it means our test environment is not the same as a stock system you would want to run deqp on to submit conformance, but I think it's an improvement in the test maintenance work vs needing to fix things up later. We have some other tests besides turnip that can trigger hangchecks which we might also like this increase for (some disabled traces, for example). However, freedreno GL has a 5-second timeout waiting for idle when mapping, and a couple of 2-second timeouts in a row can result in spurious failures in other tests! Fixes: #6163 Part-of: --- .gitlab-ci/common/generate-env.sh | 1 + .gitlab-ci/common/init-stage1.sh | 1 + .gitlab-ci/common/init-stage2.sh | 6 ++++++ src/freedreno/ci/gitlab-ci.yml | 6 ++++++ 4 files changed, 14 insertions(+) diff --git a/.gitlab-ci/common/generate-env.sh b/.gitlab-ci/common/generate-env.sh index dfbc84b2b15..937ab8ea6d0 100755 --- a/.gitlab-ci/common/generate-env.sh +++ b/.gitlab-ci/common/generate-env.sh @@ -49,6 +49,7 @@ for var in \ FDO_UPSTREAM_REPO \ FD_MESA_DEBUG \ FLAKES_CHANNEL \ + FREEDRENO_HANGCHECK_MS \ GALLIUM_DRIVER \ GALLIVM_PERF \ GPU_VERSION \ diff --git a/.gitlab-ci/common/init-stage1.sh b/.gitlab-ci/common/init-stage1.sh index 648c37a2f90..3b3317a2c02 100755 --- a/.gitlab-ci/common/init-stage1.sh +++ b/.gitlab-ci/common/init-stage1.sh @@ -9,6 +9,7 @@ cd / mount -t proc none /proc mount -t sysfs none /sys +mount -t debugfs none /sys/kernel/debug mount -t devtmpfs none /dev || echo possibly already mounted mkdir -p /dev/pts mount -t devpts devpts /dev/pts diff --git a/.gitlab-ci/common/init-stage2.sh b/.gitlab-ci/common/init-stage2.sh index c0669c7f4cf..fe46338b78a 100755 --- a/.gitlab-ci/common/init-stage2.sh +++ b/.gitlab-ci/common/init-stage2.sh @@ -38,6 +38,12 @@ if [ "$HWCI_FREQ_MAX" = "true" ]; then test -z "$GPU_AUTOSUSPEND" || echo -1 > $GPU_AUTOSUSPEND || true fi +# Increase freedreno hangcheck timer because it's right at the edge of the +# spilling tests timing out (and some traces, too) +if [ -n "$FREEDRENO_HANGCHECK_MS" ]; then + echo $FREEDRENO_HANGCHECK_MS | tee -a /sys/kernel/debug/dri/128/hangcheck_period_ms +fi + # Start a little daemon to capture the first devcoredump we encounter. (They # expire after 5 minutes, so we poll for them). ./capture-devcoredump.sh & diff --git a/src/freedreno/ci/gitlab-ci.yml b/src/freedreno/ci/gitlab-ci.yml index 75af8285437..b8c0aa220df 100644 --- a/src/freedreno/ci/gitlab-ci.yml +++ b/src/freedreno/ci/gitlab-ci.yml @@ -22,6 +22,9 @@ variables: DEQP_VER: vk VK_DRIVER: freedreno + # Increase the hangcheck timer for our spilling tests which bump up against + # the .5s default. + FREEDRENO_HANGCHECK_MS: 2000 .freedreno-test-traces: extends: @@ -150,6 +153,9 @@ a618_vk: BOOT_METHOD: depthcharge KERNEL_IMAGE_TYPE: "" RUNNER_TAG: mesa-ci-x86-64-lava-sc7180-trogdor-lazor-limozeen + # Increase the hangcheck timer for our spilling tests which bump up against + # the .5s default. + FREEDRENO_HANGCHECK_MS: 2000 a618_vk_full: extends: