ci, valve: Add the dEQP runners for Valve CI

v2.

  - Build the runner image as part of the CI for the boot2container
  project, rather than as a manually step using the build instructions
  in valve-trigger.dockerfile.

  - Depend on a non-default kernel build hosted in the valve-infra
  package repository. This does reduce the current caching feature of
  local artifacts, but makes it easier to chop and change kernels on a
  per-project or even per-test basis.

v3.

  - Depend on a kernel built and stored in the valve-infra generic
  package repo.

  - Build the runner container using ci-templates as part of the CI in
  valve-infra.

  - Now that the runner container is built in the valve-infra CI, I
  dropped the source import of client.py and message.py. They are
  built in the runner container.

Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Martin Roukala <martin.roukala@mupuf.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14660>
This commit is contained in:
Charlie Turner 2022-02-21 12:38:25 +00:00
parent f0aee991bf
commit 16b417b8d6
2 changed files with 287 additions and 17 deletions

View File

@ -1240,7 +1240,7 @@ debian-mingw32-x86_64:
- debian/x86_test-gl
- debian-testing
.test-vk:
.use-debian/x86_test-vk:
extends:
- .test
- .set-image-base-tag
@ -1248,6 +1248,12 @@ debian-mingw32-x86_64:
MESA_BASE_TAG: *debian-x86_test-base
MESA_IMAGE_PATH: "debian/x86_test-vk"
MESA_IMAGE_TAG: *debian-x86_test-vk
needs:
- debian/x86_test-vk
.test-vk:
extends:
- .use-debian/x86_test-vk
needs:
- debian-testing
- debian/x86_test-vk
@ -1415,3 +1421,152 @@ debian-mingw32-x86_64:
variables:
HWCI_START_XORG: 1
HWCI_TEST_SCRIPT: "/install/skqp-runner.sh"
# For Valve's bare-metal testing farm jobs.
.b2c-test:
# It would be nice to use ci-templates within Mesa CI for this job's
# image:, but the integration is not possible for the current
# use-case. Within this job, two containers are managed. 1) the
# gitlab runner container from which the job is submitted to the
# DUT, and 2) the test container (e.g. debian/x86_test-vk) within
# which the test cases will run on the DUT. Since ci-templates and
# the associated image setting macros in this file rely on variables
# like FDO_DISTRIBUTION_TAG for *the* image, there is no way to
# depend on more than one image per job. So, the job container is
# built as part of the CI in the boot2container project.
image: registry.freedesktop.org/mupuf/valve-infra/mesa-trigger:2022-02-17.1
extends:
# Only pull in what is needed to build up the MESA_IMAGE (which is
# called for clarity IMAGE_UNDER_TEST). This is in distinction to
# the image within which the job runs on the runner machines. The
# IMAGE_UNDER_TEST is deployed to the DUTs.
- .incorporate-base-tag+templates-commit
variables:
# No need by default to pull the whole repo
GIT_STRATEGY: none
# boot2container initrd configuration parameters.
B2C_KERNEL_URL: 'https://gitlab.freedesktop.org/mupuf/valve-infra/-/package_files/117/download' # 5.16-for-mesa-ci
B2C_INITRAMFS_URL: 'https://gitlab.freedesktop.org/mupuf/boot2container/-/releases/v0.9.4/downloads/initramfs.linux_amd64.cpio.xz'
B2C_JOB_SUCCESS_REGEX: '\[.*\]: Execution is over, pipeline status: 0\r$'
B2C_JOB_WARN_REGEX: 'null'
B2C_LOG_LEVEL: 6
B2C_POWEROFF_DELAY: 15
B2C_SESSION_END_REGEX: '^.*It''s now safe to turn off your computer\r$'
B2C_SESSION_REBOOT_REGEX: 'GPU hang detected!'
B2C_TIMEOUT_BOOT_MINUTES: 240
B2C_TIMEOUT_BOOT_RETRIES: 2
B2C_TIMEOUT_FIRST_MINUTES: 5
B2C_TIMEOUT_FIRST_RETRIES: 3
B2C_TIMEOUT_MINUTES: 2
B2C_TIMEOUT_OVERALL_MINUTES: 240
B2C_TIMEOUT_RETRIES: 0
MESA_BASE_TAG: *debian-x86_test-base
MESA_IMAGE_PATH: "debian/x86_test-vk"
MESA_IMAGE_TAG: *debian-x86_test-vk
IMAGE_UNDER_TEST: "$CI_REGISTRY_IMAGE/${MESA_IMAGE_PATH}:${FDO_DISTRIBUTION_TAG}"
INSTALL_TARBALL: "./artifacts/install.tar"
CI_VALVE_ARTIFACTS: "./artifacts/valve"
CI_COMMON_SCRIPTS: "./artifacts/ci-common"
GENERATE_ENV_SCRIPT: "${CI_COMMON_SCRIPTS}/generate-env.sh"
B2C_JOB_TEMPLATE: "${CI_VALVE_ARTIFACTS}/b2c.yml.jinja2.jinja2"
JOB_FOLDER: "job_folder"
before_script:
# We don't want the tarball unpacking of .test, but will take the JWT bits.
- !reference [default, before_script]
- |
set -x
# Useful as a hook point for runner admins. You may edit the
# config.toml for the Gitlab runner and use a bind-mount to
# populate the hook script with some executable commands. This
# allows quicker feedback than resubmitting pipelines and
# potentially having to wait for a debug build of Mesa to
# complete.
if [ -x /runner-before-script.sh ]; then
echo "Executing runner before-script hook..."
sh /runner-before-script.sh
if [ $? -ne 0 ]; then
echo "Runner hook failed, goodbye"
exit $?
fi
fi
[ -s "$INSTALL_TARBALL" ] || exit 1
[ -d "$CI_VALVE_ARTIFACTS" ] || exit 1
[ -d "$CI_COMMON_SCRIPTS" ] || exit 1
B2C_TEST_SCRIPT="bash -c 'source ./set-job-env-vars.sh ; ${B2C_TEST_SCRIPT}'"
# The Valve CI gateway receives jobs in a YAML format. Create a
# job description from the CI environment.
python3 "$CI_VALVE_ARTIFACTS"/generate_b2c.py \
--ci-job-id "${CI_JOB_ID}" \
--container-cmd "${B2C_TEST_SCRIPT}" \
--initramfs-url "${B2C_INITRAMFS_URL}" \
--job-success-regex "${B2C_JOB_SUCCESS_REGEX}" \
--job-warn-regex "${B2C_JOB_WARN_REGEX}" \
--kernel-url "${B2C_KERNEL_URL}" \
--log-level "${B2C_LOG_LEVEL}" \
--poweroff-delay "${B2C_POWEROFF_DELAY}" \
--session-end-regex "${B2C_SESSION_END_REGEX}" \
--session-reboot-regex "${B2C_SESSION_REBOOT_REGEX}" \
--tags "${CI_RUNNER_TAGS}" \
--template "${B2C_JOB_TEMPLATE}" \
--timeout-boot-minutes "${B2C_TIMEOUT_BOOT_MINUTES}" \
--timeout-boot-retries "${B2C_TIMEOUT_BOOT_RETRIES}" \
--timeout-first-minutes "${B2C_TIMEOUT_FIRST_MINUTES}" \
--timeout-first-retries "${B2C_TIMEOUT_FIRST_RETRIES}" \
--timeout-minutes "${B2C_TIMEOUT_MINUTES}" \
--timeout-overall-minutes "${B2C_TIMEOUT_OVERALL_MINUTES}" \
--timeout-retries "${B2C_TIMEOUT_RETRIES}" \
--job-volume-exclusions "${B2C_JOB_VOLUME_EXCLUSIONS}" \
--local-container "${IMAGE_UNDER_TEST}" \
${B2C_EXTRA_VOLUME_ARGS} \
--working-dir "$CI_PROJECT_DIR"
cat b2c.yml.jinja2
rm -rf ${JOB_FOLDER} || true
mkdir -v ${JOB_FOLDER}
# Create a script to regenerate the CI environment when this job
# begins running on the remote DUT.
set +x
"$CI_COMMON_SCRIPTS"/generate-env.sh > ${JOB_FOLDER}/set-job-env-vars.sh
chmod +x ${JOB_FOLDER}/set-job-env-vars.sh
echo "Variables passed through:"
cat ${JOB_FOLDER}/set-job-env-vars.sh
echo "export CI_JOB_JWT=${CI_JOB_JWT}" >> ${JOB_FOLDER}/set-job-env-vars.sh
set -x
# Extract the Mesa distribution into the location expected by
# the Mesa CI deqp-runner scripts.
tar x -C ${JOB_FOLDER} -f $INSTALL_TARBALL
script: |
slugify () {
echo "$1" | sed -r s/[~\^]+//g | sed -r s/[^a-zA-Z0-9]+/-/g | sed -r s/^-+\|-+$//g | tr A-Z a-z
}
# Submit the job to Valve's CI gateway service with the CI
# provisioned job_folder.
env PYTHONUNBUFFERED=1 executorctl \
run -w b2c.yml.jinja2 -j $(slugify "$CI_JOB_NAME") -s ${JOB_FOLDER}
ls -l
# Anything our job places in results/ will be collected by the
# Gitlab coordinator for status presentation. results/junit.xml
# will be parsed by the UI for more detailed explanations of
# test execution.
needs:
- debian/x86_test-vk
- debian-testing
artifacts:
when: always
name: "mesa_${CI_JOB_NAME}"
paths:
- ${JOB_FOLDER}/results
reports:
junit: ${JOB_FOLDER}/results/junit.xml

View File

@ -1,19 +1,3 @@
.test-radv:
extends:
- .test-vk
- .radv-rules
variables:
VK_DRIVER: radeon
DRIVER_NAME: radv
# The SPIRV -> NIR code generator generates so many harmless
# warnings on the shaders in VK-CTS that jobs fail due to the log
# size getting too big. Since it doesn't seem feasible right now
# to fix these shaders in the CTS, instead, ignore warnings from
# the code generator.
MESA_SPIRV_LOG_LEVEL: error
ACO_DEBUG: validateir,validatera
MESA_VK_IGNORE_CONFORMANCE_WARNING: 1
############### LAVA
# Run three jobs in parallel each running 1/30th of the test suite
radv_stoney_vkcts:amd64:
@ -37,6 +21,137 @@ radv_stoney_vkcts:amd64:
DRIVER_NAME: radv
RUNNER_TAG: mesa-ci-x86-64-lava-hp-11A-G6-EE-grunt
############### Valve Infra
.test-radv:
extends:
- .radv-rules
variables:
VK_DRIVER: radeon
DRIVER_NAME: radv
# The SPIRV -> NIR code generator generates so many harmless
# warnings on the shaders in VK-CTS that jobs fail due to the log
# size getting too big. Since it's not feasible right now
# to fix these shaders in the CTS, instead, ignore warnings from
# the code generator.
MESA_SPIRV_LOG_LEVEL: error
ACO_DEBUG: validateir,validatera
MESA_VK_IGNORE_CONFORMANCE_WARNING: 1
.b2c-test-radv:
extends:
- .test-radv
- .b2c-test
############### dEQP tests ###############
# A note on the variables to control fractional dEQP runs
# parallel: N means to split the test list into chunks of 1/N lines, and
# DEQP_FRACTION: M means to run every Mth test from the resulting lines.
# So parallel: 2 and DEQP_FRACTION: 5 would imply running 1/10th of the suite.
.deqp-test-valve:
extends:
- .b2c-test-radv
# Keep all the dEQP jobs as manual trigger jobs for now, until we
# have enough resources to meet the throughput requirements for
# automatic pre-merge in the Mesa CI.
- .test-manual-mr
variables:
B2C_JOB_SUCCESS_REGEX: '^\+ DEQP_EXITCODE=0\r$'
B2C_TEST_SCRIPT: ./install/deqp-runner.sh
B2C_JOB_VOLUME_EXCLUSIONS: "*.shader_cache"
DEQP_VER: vk
# dEQP never finishes on gfx7 due to all the GPU resets and hangs.
# Hence, disable it for now.
.deqp-kabini-valve:
extends:
- .deqp-test-valve
timeout: 6h 10m
variables:
# FIXME: Create this baseline for KABINI
GPU_VERSION: radv-kabini-aco
B2C_TIMEOUT_OVERALL_MINUTES: 360
B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=8g'
tags:
- amdgpu:codename:KABINI
# Disabled for now due to Igalia farm issues.
.deqp-stoney-valve:
extends:
- .deqp-test-valve
parallel: 2
# The thin clients can't manage a full run in less than an hour
# currently. Interpolation suggests four more thin clients (6
# total) would get the runtime under 15 minutes.
timeout: 4h 10m
variables:
GPU_VERSION: radv-stoney-aco
# Note, it only has a wee 32g disk!
B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=8g'
tags:
- amdgpu:codename:STONEY
deqp-polaris10-valve:
extends:
- .deqp-test-valve
timeout: 3h 10m
variables:
GPU_VERSION: radv-polaris10-aco
FDO_CI_CONCURRENT: 16
# (2022-01) tsc=unstable was added in response to this message in kmsg,
# TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.
B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=16g tsc=unstable'
tags:
- amdgpu:codename:POLARIS10
deqp-vega10-valve:
extends:
- .deqp-test-valve
timeout: 3h 10m
variables:
GPU_VERSION: radv-vega10-aco
FDO_CI_CONCURRENT: 16
B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=16g'
tags:
- amdgpu:codename:VEGA10
deqp-renoir-valve:
extends:
- .deqp-test-valve
timeout: 2h 10m
variables:
GPU_VERSION: radv-renoir-aco
FDO_CI_CONCURRENT: 24
B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=16g'
tags:
- amdgpu:codename:RENOIR
deqp-navi10-valve:
extends:
- .deqp-test-valve
timeout: 2h 10m
variables:
GPU_VERSION: radv-navi10-aco
FDO_CI_CONCURRENT: 24
# (2022-01) noapic is set because the serial adapter on this
# machine is using non-MSI IRQs and is generating a lot of
# IRQs. This tends to confuse the interrupt controller and breaks
# MSI IRQs leading to GPU hangs on amdgpu.
B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=16g noapic'
tags:
- amdgpu:codename:NAVI10
deqp-navi21-valve:
extends:
- .deqp-test-valve
timeout: 2h 10m
variables:
GPU_VERSION: radv-sienna_cichlid-aco
FDO_CI_CONCURRENT: 16
B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=16g'
tags:
- amdgpu:codename:NAVI21
############### Fossilize
.radv-fossils:
extends: