ci: Use cts_runner for our dEQP runs.

This runner is a little project by Bas, written in C++, that spawns threads that then loop grabbing chunks of the (randomly shuffled but consistently so) test list and hand it to a dEQP instance. As the remaining list gets shorter, so do the chunks, so hopefully the threads all complete effectively at once. It also handles restarting after crashes automatically. I've extended the runner a bit to do what I was doing in the bash scripts before, like the skip list and expected failures handling. This project should also be a good baseline for extending to handle retesting of intermittent failures. By switching to it, we can have the swrast tests just take up one job slot on the shared runners and keep their allotment of CPUs busy, instead of taking up job slots with single-threaded dEQP jobs. It will also let us (eventually, once I reprovision) switch the freedreno runners over to threading within the job instead of running concurrent jobs, so that memory scribbles in one pipeline don't affect unrelated pipelines, and I can experiment with their parallelism (particularly on a306 where we are frequently backed up) without trashing other people's jobs. What we lose in this process is per-test output in the log (not a big loss, I think, since we summarize fails at the end and reducing log length keeps chrome from choking on our logs so badly). We also drop the renderer sanity checking, since it's not saving qpa files for us to go poke through. Given that all the drivers involved have fail lists, if we got the wrong renderer somehow, we'd get a job failure anyway. v2: Rebase on droppong of the autoscale cluster and the arm64 build/test split. Use a script to deduplicate the cts-runner build. v3: Rebase on the amd64 build/test container split. Acked-by: Daniel Stone <daniels@collabora.com> (v1) Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com> (v2)
2019-11-04 10:54:41 -08:00 · 2019-11-04 10:54:41 -08:00 · f08c810028
parent 7f52df7fc9
commit f08c810028
6 changed files with 56 additions and 87 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -15,9 +15,9 @@
 variables:
  UPSTREAM_REPO: mesa/mesa
  DEBIAN_TAG: "amd64-2019-11-13-2"
-  DEBIAN_TEST_TAG: "amd64-test-2019-11-12"
+  DEBIAN_TEST_TAG: "amd64-test-2019-11-12-2"
  DEBIAN_ARM64_TAG: "arm64v8-2019-11-06"
-  DEBIAN_ARM64_TEST_TAG: "arm64v8-test-2019-11-12"
+  DEBIAN_ARM64_TEST_TAG: "arm64v8-test-2019-11-12-2"
  STRETCH_TAG: "2019-09-18"
  DEBIAN_VERSION: buster-slim
  STRETCH_VERSION: stretch-slim
@ -520,19 +520,21 @@ piglit-glslparser+quick_shader:
    - ./artifacts/deqp-runner.sh
 test-llvmpipe-gles2:
  parallel: 4
  variables:
    DEQP_VER: gles2
    DEQP_PARALLEL: 4
    # Don't use threads inside llvmpipe, we've already got all 4 cores
    # busy with DEQP_PARALLEL.
    LP_NUM_THREADS: 0
    DEQP_EXPECTED_FAILS: deqp-llvmpipe-fails.txt
    LIBGL_ALWAYS_SOFTWARE: "true"
    DEQP_RENDERER_MATCH: "llvmpipe"
  extends: .deqp-test
 test-softpipe-gles2:
  extends: test-llvmpipe-gles2
  variables:
    DEQP_EXPECTED_FAILS: deqp-softpipe-fails.txt
-    DEQP_RENDERER_MATCH: "softpipe"
+    DEQP_SKIPS: deqp-softpipe-skips.txt
    GALLIUM_DRIVER: "softpipe"
 # The GLES2 CTS run takes about 8 minutes of CPU time, while GLES3 is
@ -541,9 +543,9 @@ test-softpipe-gles2:
 test-softpipe-gles3-limited:
  variables:
    DEQP_VER: gles3
    DEQP_PARALLEL: 4
    DEQP_EXPECTED_FAILS: deqp-softpipe-fails.txt
    LIBGL_ALWAYS_SOFTWARE: "true"
    DEQP_RENDERER_MATCH: "softpipe"
    GALLIUM_DRIVER: "softpipe"
    CI_NODE_INDEX: 1
    CI_NODE_TOTAL: 10
@ -554,7 +556,6 @@ arm64_a630_gles2:
  image: $DEBIAN_ARM64_TEST_IMAGE
  variables:
    DEQP_VER: gles2
    DEQP_RENDERER_MATCH: "FD630"
    DEQP_EXPECTED_FAILS: deqp-freedreno-a630-fails.txt
    DEQP_SKIPS: deqp-freedreno-a630-skips.txt
    NIR_VALIDATE: 0
@ -584,6 +585,5 @@ arm64_a306_gles2:
  variables:
    DEQP_EXPECTED_FAILS: deqp-freedreno-a307-fails.txt
    DEQP_SKIPS: deqp-default-skips.txt
    DEQP_RENDERER_MATCH: "FD307"
  tags:
    - db410c
--- a/.gitlab-ci/build-cts-runner.sh
+++ b/.gitlab-ci/build-cts-runner.sh
@ -0,0 +1,10 @@
 #!/bin/bash
 set -ex
 git clone https://github.com/anholt/cts_runner.git --depth 1 -b anholt-mesa-ci-2
 cd cts_runner
 meson build/
 ninja -C build -j4 install
 cd ..
 rm -rf cts_runner
--- a/.gitlab-ci/debian-arm64-test-install.sh
+++ b/.gitlab-ci/debian-arm64-test-install.sh
@ -23,13 +23,20 @@ apt-get -y install \
 	libllvm8 \
 	libpng16-16 \
 	libpng-dev \
 	libvulkan-dev \
 	libvulkan1 \
 	meson \
 	pkg-config \
 	procps \
 	python \
 	waffle-utils \
 	wget \
 	zlib1g
 ############### Build dEQP runner
 . .gitlab-ci/build-cts-runner.sh
 ############### Build dEQP
 . .gitlab-ci/build-deqp.sh
@ -47,6 +54,7 @@ apt-get purge -y \
        libgbm-dev \
        libgles2-mesa-dev \
        libpng-dev \
        libvulkan-dev \
        meson \
        pkg-config \
        python \
--- a/.gitlab-ci/debian-test-install.sh
+++ b/.gitlab-ci/debian-test-install.sh
@ -32,6 +32,7 @@ apt-get install -y --no-remove \
      libpng16-16 \
      libpng-dev \
      libvulkan1 \
      libvulkan-dev \
      libwaffle-dev \
      libwayland-server0 \
      libxcb-xfixes0 \
@ -65,6 +66,10 @@ rm -rf target_api
 popd
 ############### Build dEQP runner
 . .gitlab-ci/build-cts-runner.sh
 ############### Build dEQP
 . .gitlab-ci/build-deqp.sh
--- a/.gitlab-ci/deqp-freedreno-a630-skips.txt
+++ b/.gitlab-ci/deqp-freedreno-a630-skips.txt
@ -27,3 +27,6 @@ dEQP-GLES3.functional.texture.specification.texsubimage2d_pbo.r16ui_2d
 # Layered rendering is sysmem only and needs working clears
 dEQP-GLES31.functional.geometry_shading.layered.*
 dEQP-GLES31.functional.geometry_shading.instanced.*layer.*
 # Intermittent timeout
 dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23
--- a/.gitlab-ci/deqp-runner.sh
+++ b/.gitlab-ci/deqp-runner.sh
@ -6,8 +6,6 @@ DEQP_OPTIONS=(--deqp-surface-width=256 --deqp-surface-height=256)
 DEQP_OPTIONS+=(--deqp-surface-type=pbuffer)
 DEQP_OPTIONS+=(--deqp-gl-config-name=rgba8888d24s8ms0)
 DEQP_OPTIONS+=(--deqp-visibility=hidden)
 DEQP_OPTIONS+=(--deqp-log-images=disable)
 DEQP_OPTIONS+=(--deqp-crashhandler=enable)
 # It would be nice to be able to enable the watchdog, so that hangs in a test
 # don't need to wait the full hour for the run to time out.  However, some
@ -26,20 +24,7 @@ if [ -z "$DEQP_SKIPS" ]; then
   exit 1
 fi
-# Prep the expected failure list
+ARTIFACTS=`pwd`/artifacts
 if [ -n "$DEQP_EXPECTED_FAILS" ]; then
   export DEQP_EXPECTED_FAILS=`pwd`/artifacts/$DEQP_EXPECTED_FAILS
 else
   export DEQP_EXPECTED_FAILS=/tmp/expect-no-failures.txt
   touch $DEQP_EXPECTED_FAILS
 fi
 sort < $DEQP_EXPECTED_FAILS > /tmp/expected-fails.txt
 # Fix relative paths on inputs.
 export DEQP_SKIPS=`pwd`/artifacts/$DEQP_SKIPS
 # Be a good citizen on the shared runners.
 export LP_NUM_THREADS=4
 # Set up the driver environment.
 export LD_LIBRARY_PATH=`pwd`/install/lib/
@ -52,19 +37,9 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
 RESULTS=`pwd`/results
 mkdir -p $RESULTS
 cd /deqp/modules/$DEQP_VER
 # Generate test case list file
 cp /deqp/mustpass/$DEQP_VER-master.txt /tmp/case-list.txt
 # Note: not using sorted input and comm, becuase I want to run the tests in
 # the same order that dEQP would.
 while read -r line; do
   if echo "$line" | grep -q '^[^#]'; then
       sed -i "/$line/d" /tmp/case-list.txt
   fi
 done < $DEQP_SKIPS
 # If the job is parallel, take the corresponding fraction of the caselist.
 # Note: N~M is a gnu sed extension to match every nth line (first line is #1).
 if [ -n "$CI_NODE_INDEX" ]; then
@ -76,61 +51,29 @@ if [ ! -s /tmp/case-list.txt ]; then
    exit 1
 fi
-# Cannot use tee because dash doesn't have pipefail
+if [ -n "$DEQP_EXPECTED_FAILS" ]; then
-touch /tmp/result.txt
+    XFAIL="--xfail-list $ARTIFACTS/$DEQP_EXPECTED_FAILS"
-tail -f /tmp/result.txt &
+fi
-./deqp-$DEQP_VER "${DEQP_OPTIONS[@]}" --deqp-log-filename=$RESULTS/results.qpa --deqp-caselist-file=/tmp/case-list.txt >> /tmp/result.txt
+set +e
 vulkan-cts-runner \
    --deqp /deqp/modules/$DEQP_VER/deqp-$DEQP_VER \
    --output $RESULTS/cts-runner-results.txt \
    --caselist /tmp/case-list.txt \
    --exclude-list $ARTIFACTS/$DEQP_SKIPS \
    $XFAIL \
    --job ${DEQP_PARALLEL:-1} \
    -- \
    "${DEQP_OPTIONS[@]}"
 DEQP_EXITCODE=$?
 sed -ne \
    '/StatusCode="Fail"/{x;p}; s/#beginTestCaseResult //; T; h' \
    $RESULTS/results.qpa \
    > /tmp/unsorted-fails.txt
 # Scrape out the renderer that the test run used, so we can validate that the
 # right driver was used.
 if grep -q "dEQP-.*.info.renderer" /tmp/case-list.txt; then
    # This is an ugly dependency on the .qpa format: Print 3 lines after the
    # match, which happens to contain the result.
    RENDERER=`sed -n '/#beginTestCaseResult dEQP-.*.info.renderer/{n;n;n;p}' $RESULTS/results.qpa | sed -n -E "s|<Text>(.*)</Text>|\1|p"`
    echo "GL_RENDERER for this test run: $RENDERER"
    if [ -n "$DEQP_RENDERER_MATCH" ]; then
        echo $RENDERER | grep -q $DEQP_RENDERER_MATCH > /dev/null
    fi
 fi
 if grep -q "dEQP-.*.info.version" /tmp/case-list.txt; then
    # This is an ugly dependency on the .qpa format: Print 3 lines after the
    # match, which happens to contain the result.
    VERSION=`sed -n '/#beginTestCaseResult dEQP-.*.info.version/{n;n;n;p}' $RESULTS/results.qpa | sed -n -E "s|<Text>(.*)</Text>|\1|p"`
    echo "Driver version tested: $VERSION"
 fi
 if [ $DEQP_EXITCODE -ne 0 ]; then
-   exit $DEQP_EXITCODE
+    echo "Some unexpected results found (see cts-runner-results.txt in artifacts for full results):"
-fi
+    cat $RESULTS/cts-runner-results.txt | \
-
+        grep -v ",Pass" | \
-sort < /tmp/unsorted-fails.txt > $RESULTS/fails.txt
+        grep -v ",Skip" | \
-
+        grep -v ",ExpectedFail" | \
-comm -23 $RESULTS/fails.txt /tmp/expected-fails.txt > /tmp/new-fails.txt
+        head -n 50
-if [ -s /tmp/new-fails.txt ]; then
+    exit $DEQP_EXITCODE
    echo "Unexpected failures:"
    cat /tmp/new-fails.txt
    exit 1
 else
    echo "No new failures"
 fi
 sort /tmp/case-list.txt > /tmp/sorted-case-list.txt
 comm -12 /tmp/sorted-case-list.txt /tmp/expected-fails.txt > /tmp/expected-fails-in-caselist.txt
 comm -13 $RESULTS/fails.txt /tmp/expected-fails-in-caselist.txt > /tmp/new-passes.txt
 if [ -s /tmp/new-passes.txt ]; then
    echo "Unexpected passes, please update $DEQP_EXPECTED_FAILS (or add flaky tests to $DEQP_SKIPS):"
    cat /tmp/new-passes.txt
    exit 1
 else
    echo "No new passes"
 fi