ci/bare-metal: Add test phase timeouts to all boards.

This should help with "marge got stuck for an hour and all I got was this
failed job with no results/" when a system intermittently wedges.

This replaces the BM_POE_TIMEOUT ("did we get something on serial in the
last 3 minutes?") that rpi had, in favor of checking that the whole test
job gets through in 20 minutes.

Acked-by: Juan A. Suarez <jasuarez@igalia.com>
Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17096>
This commit is contained in:
Emma Anholt 2022-06-16 14:38:50 -07:00 committed by Marge Bot
parent cd3d9a7a92
commit 5f09b1ebe9
9 changed files with 30 additions and 27 deletions

View File

@ -90,7 +90,8 @@ echo "$BM_CMDLINE" > /tftp/cmdline
set +e
python3 $BM/cros_servo_run.py \
--cpu $BM_SERIAL \
--ec $BM_SERIAL_EC
--ec $BM_SERIAL_EC \
--test-timeout ${TEST_PHASE_TIMEOUT:-20}
ret=$?
set -e

View File

@ -31,13 +31,14 @@ import threading
class CrosServoRun:
def __init__(self, cpu, ec):
def __init__(self, cpu, ec, test_timeout):
self.cpu_ser = SerialBuffer(
cpu, "results/serial.txt", "R SERIAL-CPU> ")
# Merge the EC serial into the cpu_ser's line stream so that we can
# effectively poll on both at the same time and not have to worry about
self.ec_ser = SerialBuffer(
ec, "results/serial-ec.txt", "R SERIAL-EC> ", line_queue=self.cpu_ser.line_queue)
self.test_timeout = test_timeout
def close(self):
self.ec_ser.close()
@ -90,7 +91,7 @@ class CrosServoRun:
return 2
tftp_failures = 0
for line in self.cpu_ser.lines(timeout=120 * 60, phase="test"):
for line in self.cpu_ser.lines(timeout=self.test_timeout, phase="test"):
if re.search("---. end Kernel panic", line):
return 1
@ -150,7 +151,7 @@ class CrosServoRun:
self.print_error(
"Reached the end of the CPU serial log without finding a result")
return 1
return 2
def main():
@ -159,9 +160,11 @@ def main():
help='CPU Serial device', required=True)
parser.add_argument(
'--ec', type=str, help='EC Serial device', required=True)
parser.add_argument(
'--test-timeout', type=int, help='Test phase timeout (minutes)', required=True)
args = parser.parse_args()
servo = CrosServoRun(args.cpu, args.ec)
servo = CrosServoRun(args.cpu, args.ec, args.test_timeout * 60)
while True:
retval = servo.run()

View File

@ -133,6 +133,7 @@ fi
set +e
$BM/fastboot_run.py \
--dev="$BM_SERIAL" \
--test-timeout ${TEST_PHASE_TIMEOUT:-20} \
--fbserial="$BM_FASTBOOT_SERIAL" \
--powerup="$BM_POWERUP" \
--powerdown="$BM_POWERDOWN"

View File

@ -30,14 +30,13 @@ import threading
class FastbootRun:
def __init__(self, args):
def __init__(self, args, test_timeout):
self.powerup = args.powerup
# We would like something like a 1 minute timeout, but the piglit traces
# jobs stall out for long periods of time.
self.ser = SerialBuffer(
args.dev, "results/serial-output.txt", "R SERIAL> ", timeout=600)
args.dev, "results/serial-output.txt", "R SERIAL> ")
self.fastboot = "fastboot boot -s {ser} artifacts/fastboot.img".format(
ser=args.fbserial)
self.test_timeout = test_timeout
def close(self):
self.ser.close()
@ -76,7 +75,7 @@ class FastbootRun:
return 1
print_more_lines = -1
for line in self.ser.lines(timeout=20 * 60, phase="test"):
for line in self.ser.lines(timeout=self.test_timeout, phase="test"):
if print_more_lines == 0:
return 2
if print_more_lines > 0:
@ -138,9 +137,11 @@ def main():
help='shell command for powering off', required=True)
parser.add_argument('--fbserial', type=str,
help='fastboot serial number of the board', required=True)
parser.add_argument('--test-timeout', type=int,
help='Test phase timeout (minutes)', required=True)
args = parser.parse_args()
fastboot = FastbootRun(args)
fastboot = FastbootRun(args, args.test_timeout * 60)
while True:
retval = fastboot.run()
@ -148,7 +149,7 @@ def main():
if retval != 2:
break
fastboot = FastbootRun(args)
fastboot = FastbootRun(args, args.test_timeout * 60)
fastboot.logged_system(args.powerdown)

View File

@ -131,7 +131,7 @@ while [ $((ATTEMPTS--)) -gt 0 ]; do
--dev="$BM_SERIAL" \
--powerup="$BM_POWERUP" \
--powerdown="$BM_POWERDOWN" \
--timeout="${BM_POE_TIMEOUT:-60}"
--test-timeout ${TEST_PHASE_TIMEOUT:-20}
ret=$?
if [ $ret -eq 2 ]; then

View File

@ -30,11 +30,12 @@ import threading
class PoERun:
def __init__(self, args):
def __init__(self, args, test_timeout):
self.powerup = args.powerup
self.powerdown = args.powerdown
self.ser = SerialBuffer(
args.dev, "results/serial-output.txt", "", args.timeout)
args.dev, "results/serial-output.txt", "")
self.test_timeout = test_timeout
def print_error(self, message):
RED = '\033[0;31m'
@ -60,7 +61,7 @@ class PoERun:
"Something wrong; couldn't detect the boot start up sequence")
return 2
for line in self.ser.lines(timeout=20 * 60, phase="test"):
for line in self.ser.lines(timeout=self.test_timeout, phase="test"):
if re.search("---. end Kernel panic", line):
return 1
@ -93,11 +94,11 @@ def main():
help='shell command for rebooting', required=True)
parser.add_argument('--powerdown', type=str,
help='shell command for powering off', required=True)
parser.add_argument('--timeout', type=int, default=60,
help='time in seconds to wait for activity', required=False)
parser.add_argument(
'--test-timeout', type=int, help='Test phase timeout (minutes)', required=True)
args = parser.parse_args()
poe = PoERun(args)
poe = PoERun(args, args.test_timeout * 60)
retval = poe.run()
poe.logged_system(args.powerdown)

View File

@ -40,7 +40,6 @@ vc4-rpi3-egl:armhf:
- .piglit-test
- .vc4-rpi3-test:armhf
variables:
BM_POE_TIMEOUT: 180
HWCI_START_XORG: 1
PIGLIT_PLATFORM: mixed_glx_egl
@ -68,7 +67,6 @@ vc4-rpi3-piglit-quick_shader:armhf:
variables:
HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
BM_BOOTFS: /boot/raspberrypi_armhf
BM_POE_TIMEOUT: 300
FLAKES_CHANNEL: "#videocore-ci"
GPU_VERSION: broadcom-rpi4
HWCI_KERNEL_MODULES: v3d,vc4
@ -113,7 +111,6 @@ v3d-rpi4-piglit:armhf:
variables:
HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
BM_BOOTFS: /boot/raspberrypi_armhf
BM_POE_TIMEOUT: 300
FLAKES_CHANNEL: "#videocore-ci"
GPU_VERSION: broadcom-rpi4
HWCI_KERNEL_MODULES: v3d,vc4
@ -134,7 +131,6 @@ v3d-rpi4-traces:arm64:
- .test-manual-mr
variables:
BM_BOOTFS: /boot/raspberrypi_arm64
BM_POE_TIMEOUT: 300
GPU_VERSION: broadcom-rpi4
HWCI_KERNEL_MODULES: v3d,vc4
HWCI_START_XORG: 1
@ -164,7 +160,6 @@ v3dv-rpi4-vk:arm64:
variables:
HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
BM_BOOTFS: /boot/raspberrypi_arm64
BM_POE_TIMEOUT: 300
DEQP_EXPECTED_RENDERER: "V3D.4.2"
DEQP_FRACTION: 10
DEQP_VER: vk

View File

@ -231,8 +231,9 @@ a630_vk:
DEQP_SUITE: freedreno-a630-vk
a630_vk_full:
# We use a longer timeout to keep the parallel down so that we don't lock up
# too many runners for a long time when a dev is trying out at full VK status.
# We use a longer timeout (2 hour job, 90 minute deqp) to keep the parallel
# down so that we don't lock up too many runners for a long time when a dev is
# testing full VK status.
timeout: 2h
extends:
- a630_vk
@ -240,6 +241,7 @@ a630_vk_full:
parallel: 2
variables:
DEQP_SUITE: freedreno-a630-vk-full
TEST_PHASE_TIMEOUT: 90
a630_vk_asan:
extends:

View File

@ -6,7 +6,6 @@
variables:
HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
BM_BOOTFS: /baremetal-files/jetson-nano/
BM_POE_TIMEOUT: 300
BM_CMDLINE: "console=ttyS0,115200n8 rw nfsrootdebug init=/init"
FLAKES_CHANNEL: "#nouveau-ci"
GPU_VERSION: nouveau-gm20b