diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d77f03b0139..b0a1c67d961 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1240,7 +1240,7 @@ debian-mingw32-x86_64: - debian/x86_test-gl - debian-testing -.test-vk: +.use-debian/x86_test-vk: extends: - .test - .set-image-base-tag @@ -1248,6 +1248,12 @@ debian-mingw32-x86_64: MESA_BASE_TAG: *debian-x86_test-base MESA_IMAGE_PATH: "debian/x86_test-vk" MESA_IMAGE_TAG: *debian-x86_test-vk + needs: + - debian/x86_test-vk + +.test-vk: + extends: + - .use-debian/x86_test-vk needs: - debian-testing - debian/x86_test-vk @@ -1415,3 +1421,152 @@ debian-mingw32-x86_64: variables: HWCI_START_XORG: 1 HWCI_TEST_SCRIPT: "/install/skqp-runner.sh" + +# For Valve's bare-metal testing farm jobs. +.b2c-test: + # It would be nice to use ci-templates within Mesa CI for this job's + # image:, but the integration is not possible for the current + # use-case. Within this job, two containers are managed. 1) the + # gitlab runner container from which the job is submitted to the + # DUT, and 2) the test container (e.g. debian/x86_test-vk) within + # which the test cases will run on the DUT. Since ci-templates and + # the associated image setting macros in this file rely on variables + # like FDO_DISTRIBUTION_TAG for *the* image, there is no way to + # depend on more than one image per job. So, the job container is + # built as part of the CI in the boot2container project. + image: registry.freedesktop.org/mupuf/valve-infra/mesa-trigger:2022-02-17.1 + extends: + # Only pull in what is needed to build up the MESA_IMAGE (which is + # called for clarity IMAGE_UNDER_TEST). This is in distinction to + # the image within which the job runs on the runner machines. The + # IMAGE_UNDER_TEST is deployed to the DUTs. + - .incorporate-base-tag+templates-commit + variables: + # No need by default to pull the whole repo + GIT_STRATEGY: none + # boot2container initrd configuration parameters. + B2C_KERNEL_URL: 'https://gitlab.freedesktop.org/mupuf/valve-infra/-/package_files/117/download' # 5.16-for-mesa-ci + B2C_INITRAMFS_URL: 'https://gitlab.freedesktop.org/mupuf/boot2container/-/releases/v0.9.4/downloads/initramfs.linux_amd64.cpio.xz' + B2C_JOB_SUCCESS_REGEX: '\[.*\]: Execution is over, pipeline status: 0\r$' + B2C_JOB_WARN_REGEX: 'null' + B2C_LOG_LEVEL: 6 + B2C_POWEROFF_DELAY: 15 + B2C_SESSION_END_REGEX: '^.*It''s now safe to turn off your computer\r$' + B2C_SESSION_REBOOT_REGEX: 'GPU hang detected!' + B2C_TIMEOUT_BOOT_MINUTES: 240 + B2C_TIMEOUT_BOOT_RETRIES: 2 + B2C_TIMEOUT_FIRST_MINUTES: 5 + B2C_TIMEOUT_FIRST_RETRIES: 3 + B2C_TIMEOUT_MINUTES: 2 + B2C_TIMEOUT_OVERALL_MINUTES: 240 + B2C_TIMEOUT_RETRIES: 0 + + MESA_BASE_TAG: *debian-x86_test-base + MESA_IMAGE_PATH: "debian/x86_test-vk" + MESA_IMAGE_TAG: *debian-x86_test-vk + IMAGE_UNDER_TEST: "$CI_REGISTRY_IMAGE/${MESA_IMAGE_PATH}:${FDO_DISTRIBUTION_TAG}" + + INSTALL_TARBALL: "./artifacts/install.tar" + CI_VALVE_ARTIFACTS: "./artifacts/valve" + CI_COMMON_SCRIPTS: "./artifacts/ci-common" + GENERATE_ENV_SCRIPT: "${CI_COMMON_SCRIPTS}/generate-env.sh" + B2C_JOB_TEMPLATE: "${CI_VALVE_ARTIFACTS}/b2c.yml.jinja2.jinja2" + JOB_FOLDER: "job_folder" + before_script: + # We don't want the tarball unpacking of .test, but will take the JWT bits. + - !reference [default, before_script] + - | + set -x + + # Useful as a hook point for runner admins. You may edit the + # config.toml for the Gitlab runner and use a bind-mount to + # populate the hook script with some executable commands. This + # allows quicker feedback than resubmitting pipelines and + # potentially having to wait for a debug build of Mesa to + # complete. + if [ -x /runner-before-script.sh ]; then + echo "Executing runner before-script hook..." + sh /runner-before-script.sh + if [ $? -ne 0 ]; then + echo "Runner hook failed, goodbye" + exit $? + fi + fi + + [ -s "$INSTALL_TARBALL" ] || exit 1 + [ -d "$CI_VALVE_ARTIFACTS" ] || exit 1 + [ -d "$CI_COMMON_SCRIPTS" ] || exit 1 + + + B2C_TEST_SCRIPT="bash -c 'source ./set-job-env-vars.sh ; ${B2C_TEST_SCRIPT}'" + + # The Valve CI gateway receives jobs in a YAML format. Create a + # job description from the CI environment. + python3 "$CI_VALVE_ARTIFACTS"/generate_b2c.py \ + --ci-job-id "${CI_JOB_ID}" \ + --container-cmd "${B2C_TEST_SCRIPT}" \ + --initramfs-url "${B2C_INITRAMFS_URL}" \ + --job-success-regex "${B2C_JOB_SUCCESS_REGEX}" \ + --job-warn-regex "${B2C_JOB_WARN_REGEX}" \ + --kernel-url "${B2C_KERNEL_URL}" \ + --log-level "${B2C_LOG_LEVEL}" \ + --poweroff-delay "${B2C_POWEROFF_DELAY}" \ + --session-end-regex "${B2C_SESSION_END_REGEX}" \ + --session-reboot-regex "${B2C_SESSION_REBOOT_REGEX}" \ + --tags "${CI_RUNNER_TAGS}" \ + --template "${B2C_JOB_TEMPLATE}" \ + --timeout-boot-minutes "${B2C_TIMEOUT_BOOT_MINUTES}" \ + --timeout-boot-retries "${B2C_TIMEOUT_BOOT_RETRIES}" \ + --timeout-first-minutes "${B2C_TIMEOUT_FIRST_MINUTES}" \ + --timeout-first-retries "${B2C_TIMEOUT_FIRST_RETRIES}" \ + --timeout-minutes "${B2C_TIMEOUT_MINUTES}" \ + --timeout-overall-minutes "${B2C_TIMEOUT_OVERALL_MINUTES}" \ + --timeout-retries "${B2C_TIMEOUT_RETRIES}" \ + --job-volume-exclusions "${B2C_JOB_VOLUME_EXCLUSIONS}" \ + --local-container "${IMAGE_UNDER_TEST}" \ + ${B2C_EXTRA_VOLUME_ARGS} \ + --working-dir "$CI_PROJECT_DIR" + + cat b2c.yml.jinja2 + + rm -rf ${JOB_FOLDER} || true + mkdir -v ${JOB_FOLDER} + # Create a script to regenerate the CI environment when this job + # begins running on the remote DUT. + set +x + "$CI_COMMON_SCRIPTS"/generate-env.sh > ${JOB_FOLDER}/set-job-env-vars.sh + chmod +x ${JOB_FOLDER}/set-job-env-vars.sh + echo "Variables passed through:" + cat ${JOB_FOLDER}/set-job-env-vars.sh + echo "export CI_JOB_JWT=${CI_JOB_JWT}" >> ${JOB_FOLDER}/set-job-env-vars.sh + set -x + + # Extract the Mesa distribution into the location expected by + # the Mesa CI deqp-runner scripts. + tar x -C ${JOB_FOLDER} -f $INSTALL_TARBALL + + script: | + slugify () { + echo "$1" | sed -r s/[~\^]+//g | sed -r s/[^a-zA-Z0-9]+/-/g | sed -r s/^-+\|-+$//g | tr A-Z a-z + } + + # Submit the job to Valve's CI gateway service with the CI + # provisioned job_folder. + env PYTHONUNBUFFERED=1 executorctl \ + run -w b2c.yml.jinja2 -j $(slugify "$CI_JOB_NAME") -s ${JOB_FOLDER} + + ls -l + # Anything our job places in results/ will be collected by the + # Gitlab coordinator for status presentation. results/junit.xml + # will be parsed by the UI for more detailed explanations of + # test execution. + needs: + - debian/x86_test-vk + - debian-testing + artifacts: + when: always + name: "mesa_${CI_JOB_NAME}" + paths: + - ${JOB_FOLDER}/results + reports: + junit: ${JOB_FOLDER}/results/junit.xml diff --git a/src/amd/ci/gitlab-ci.yml b/src/amd/ci/gitlab-ci.yml index b50ff36e57d..7d4998245ca 100644 --- a/src/amd/ci/gitlab-ci.yml +++ b/src/amd/ci/gitlab-ci.yml @@ -1,19 +1,3 @@ -.test-radv: - extends: - - .test-vk - - .radv-rules - variables: - VK_DRIVER: radeon - DRIVER_NAME: radv - # The SPIRV -> NIR code generator generates so many harmless - # warnings on the shaders in VK-CTS that jobs fail due to the log - # size getting too big. Since it doesn't seem feasible right now - # to fix these shaders in the CTS, instead, ignore warnings from - # the code generator. - MESA_SPIRV_LOG_LEVEL: error - ACO_DEBUG: validateir,validatera - MESA_VK_IGNORE_CONFORMANCE_WARNING: 1 - ############### LAVA # Run three jobs in parallel each running 1/30th of the test suite radv_stoney_vkcts:amd64: @@ -37,6 +21,137 @@ radv_stoney_vkcts:amd64: DRIVER_NAME: radv RUNNER_TAG: mesa-ci-x86-64-lava-hp-11A-G6-EE-grunt +############### Valve Infra +.test-radv: + extends: + - .radv-rules + variables: + VK_DRIVER: radeon + DRIVER_NAME: radv + # The SPIRV -> NIR code generator generates so many harmless + # warnings on the shaders in VK-CTS that jobs fail due to the log + # size getting too big. Since it's not feasible right now + # to fix these shaders in the CTS, instead, ignore warnings from + # the code generator. + MESA_SPIRV_LOG_LEVEL: error + ACO_DEBUG: validateir,validatera + MESA_VK_IGNORE_CONFORMANCE_WARNING: 1 + +.b2c-test-radv: + extends: + - .test-radv + - .b2c-test + +############### dEQP tests ############### + +# A note on the variables to control fractional dEQP runs +# parallel: N means to split the test list into chunks of 1/N lines, and +# DEQP_FRACTION: M means to run every Mth test from the resulting lines. +# So parallel: 2 and DEQP_FRACTION: 5 would imply running 1/10th of the suite. +.deqp-test-valve: + extends: + - .b2c-test-radv + # Keep all the dEQP jobs as manual trigger jobs for now, until we + # have enough resources to meet the throughput requirements for + # automatic pre-merge in the Mesa CI. + - .test-manual-mr + variables: + B2C_JOB_SUCCESS_REGEX: '^\+ DEQP_EXITCODE=0\r$' + B2C_TEST_SCRIPT: ./install/deqp-runner.sh + B2C_JOB_VOLUME_EXCLUSIONS: "*.shader_cache" + DEQP_VER: vk + +# dEQP never finishes on gfx7 due to all the GPU resets and hangs. +# Hence, disable it for now. +.deqp-kabini-valve: + extends: + - .deqp-test-valve + timeout: 6h 10m + variables: + # FIXME: Create this baseline for KABINI + GPU_VERSION: radv-kabini-aco + B2C_TIMEOUT_OVERALL_MINUTES: 360 + B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=8g' + tags: + - amdgpu:codename:KABINI + +# Disabled for now due to Igalia farm issues. +.deqp-stoney-valve: + extends: + - .deqp-test-valve + parallel: 2 + # The thin clients can't manage a full run in less than an hour + # currently. Interpolation suggests four more thin clients (6 + # total) would get the runtime under 15 minutes. + timeout: 4h 10m + variables: + GPU_VERSION: radv-stoney-aco + # Note, it only has a wee 32g disk! + B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=8g' + tags: + - amdgpu:codename:STONEY + +deqp-polaris10-valve: + extends: + - .deqp-test-valve + timeout: 3h 10m + variables: + GPU_VERSION: radv-polaris10-aco + FDO_CI_CONCURRENT: 16 + # (2022-01) tsc=unstable was added in response to this message in kmsg, + # TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'. + B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=16g tsc=unstable' + tags: + - amdgpu:codename:POLARIS10 + +deqp-vega10-valve: + extends: + - .deqp-test-valve + timeout: 3h 10m + variables: + GPU_VERSION: radv-vega10-aco + FDO_CI_CONCURRENT: 16 + B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=16g' + tags: + - amdgpu:codename:VEGA10 + +deqp-renoir-valve: + extends: + - .deqp-test-valve + timeout: 2h 10m + variables: + GPU_VERSION: radv-renoir-aco + FDO_CI_CONCURRENT: 24 + B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=16g' + tags: + - amdgpu:codename:RENOIR + +deqp-navi10-valve: + extends: + - .deqp-test-valve + timeout: 2h 10m + variables: + GPU_VERSION: radv-navi10-aco + FDO_CI_CONCURRENT: 24 + # (2022-01) noapic is set because the serial adapter on this + # machine is using non-MSI IRQs and is generating a lot of + # IRQs. This tends to confuse the interrupt controller and breaks + # MSI IRQs leading to GPU hangs on amdgpu. + B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=16g noapic' + tags: + - amdgpu:codename:NAVI10 + +deqp-navi21-valve: + extends: + - .deqp-test-valve + timeout: 2h 10m + variables: + GPU_VERSION: radv-sienna_cichlid-aco + FDO_CI_CONCURRENT: 16 + B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=16g' + tags: + - amdgpu:codename:NAVI21 + ############### Fossilize .radv-fossils: extends: