virgl/ci: Setup virtio-vsock based IPC

The mechanism currently used to pass data from the dEQP child process
executed in a crosvm guest environment towards the deqp-runner wrapper
script that starts the crosvm instance is based on creating, writing
and reading regular files.

In addition to the main drawback of using the storage, this approach
is potentially unreliable because the data cannot be transferred in
real-time and there is no control on ending the transmission. It also
requires a forced sleep for syncing the content, while the minimum
amount of time necessary to wait cannot be easily and safely
determined.

Replace this with an IPC based on the virtio transport for virtual
sockets (virtio-vsock).

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Reviewed-by: Daniel Stone <daniels@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14995>
This commit is contained in:
Cristian Ciocaltea 2022-02-08 22:48:39 +02:00 committed by Marge Bot
parent fcce90a095
commit 9ef8af357d
2 changed files with 110 additions and 51 deletions

View File

@ -2,7 +2,9 @@
set -e
export DEQP_TEMP_DIR="$1"
VSOCK_STDOUT=$1
VSOCK_STDERR=$2
VSOCK_TEMP_DIR=$3
mount -t proc none /proc
mount -t sysfs none /sys
@ -10,29 +12,31 @@ mkdir -p /dev/pts
mount -t devpts devpts /dev/pts
mount -t tmpfs tmpfs /tmp
. $DEQP_TEMP_DIR/crosvm-env.sh
. ${VSOCK_TEMP_DIR}/crosvm-env.sh
# .gitlab-ci.yml script variable is using relative paths to install directory,
# so change to that dir before running `crosvm-script`
cd "${CI_PROJECT_DIR}"
# The exception is the dEQP binary, since it needs to run from the directory
# it's in
if [ -d "${DEQP_BIN_DIR}" ]
then
cd "${DEQP_BIN_DIR}"
fi
# The exception is the dEQP binary, as it needs to run from its own directory
[ -z "${DEQP_BIN_DIR}" ] || cd "${DEQP_BIN_DIR}"
dmesg --level crit,err,warn -w >> $DEQP_TEMP_DIR/stderr &
# Use a FIFO to collect relevant error messages
STDERR_FIFO=/tmp/crosvm-stderr.fifo
mkfifo -m 600 ${STDERR_FIFO}
set +e
stdbuf -oL sh $DEQP_TEMP_DIR/crosvm-script.sh 2>> $DEQP_TEMP_DIR/stderr >> $DEQP_TEMP_DIR/stdout
echo $? > $DEQP_TEMP_DIR/exit_code
set -e
dmesg --level crit,err,warn -w > ${STDERR_FIFO} &
DMESG_PID=$!
# Transfer the errors and crosvm-script output via a pair of virtio-vsocks
socat -d -u pipe:${STDERR_FIFO} vsock-listen:${VSOCK_STDERR} &
socat -d -U vsock-listen:${VSOCK_STDOUT} \
system:"stdbuf -eL sh ${VSOCK_TEMP_DIR}/crosvm-script.sh 2> ${STDERR_FIFO}; echo \$? > ${VSOCK_TEMP_DIR}/exit_code",nofork
kill ${DMESG_PID}
wait
sync
sleep 1
poweroff -d -n -f || true
sleep 1 # Just in case init would exit before the kernel shuts down the VM

View File

@ -1,58 +1,113 @@
#!/bin/sh
set -ex
set -e
# This script can be called concurrently, pass arguments and env in a
# per-instance tmp dir
DEQP_TEMP_DIR=$(mktemp -d /tmp.XXXXXXXXXX)
export DEQP_TEMP_DIR
#
# Helper to generate CIDs for virtio-vsock based communication with processes
# running inside crosvm guests.
#
# A CID is a 32-bit Context Identifier to be assigned to a crosvm instance
# and must be unique across the host system. For this purpose, let's take
# the least significant 26 bits from CI_JOB_ID as a base and generate a 6-bit
# prefix number to handle up to 64 concurrent crosvm instances per job runner.
#
# As a result, the following variables are set:
# - VSOCK_CID: the crosvm unique CID to be passed as a run argument
#
# - VSOCK_STDOUT, VSOCK_STDERR: the port numbers the guest should accept
# vsock connections on in order to transfer output messages
#
# - VSOCK_TEMP_DIR: the temporary directory path used to pass additional
# context data towards the guest
#
set_vsock_context() {
[ -n "${CI_JOB_ID}" ] || {
echo "Missing or unset CI_JOB_ID env variable" >&2
exit 1
}
local dir_prefix="/tmp-vsock."
local cid_prefix=0
unset VSOCK_TEMP_DIR
while [ ${cid_prefix} -lt 64 ]; do
VSOCK_TEMP_DIR=${dir_prefix}${cid_prefix}
mkdir "${VSOCK_TEMP_DIR}" >/dev/null 2>&1 && break || unset VSOCK_TEMP_DIR
cid_prefix=$((cid_prefix + 1))
done
[ -n "${VSOCK_TEMP_DIR}" ] || return 1
VSOCK_CID=$(((CI_JOB_ID & 0x3ffffff) | ((cid_prefix & 0x3f) << 26)))
VSOCK_STDOUT=5001
VSOCK_STDERR=5002
return 0
}
# The dEQP binary needs to run from the directory it's in
if [ -n "${1##*.sh}" ] && [ -z "${1##*"deqp"*}" ]; then
DEQP_BIN_DIR=$(dirname "$1")
export DEQP_BIN_DIR
DEQP_BIN_DIR=$(dirname "$1")
export DEQP_BIN_DIR
fi
set_vsock_context || { echo "Could not generate crosvm vsock CID" >&2; exit 1; }
# Ensure cleanup on script exit
trap 'exit ${exit_code}' INT TERM
trap 'exit_code=$?; [ -z "${SOCAT_PIDS}" ] || kill ${SOCAT_PIDS} >/dev/null 2>&1 || true; rm -rf ${VSOCK_TEMP_DIR}' EXIT
# Securely pass the current variables to the crosvm environment
CI_COMMON="$CI_PROJECT_DIR"/install/common
CI_COMMON="${CI_PROJECT_DIR}"/install/common
echo "Variables passed through:"
"${CI_COMMON}"/generate-env.sh | tee ${DEQP_TEMP_DIR}/crosvm-env.sh
"${CI_COMMON}"/generate-env.sh | tee ${VSOCK_TEMP_DIR}/crosvm-env.sh
CROSVM_KERNEL_ARGS="quiet console=null root=my_root rw rootfstype=virtiofs init=$CI_PROJECT_DIR/install/crosvm-init.sh ip=192.168.30.2::192.168.30.1:255.255.255.0:crosvm:eth0 -- $DEQP_TEMP_DIR"
# Set the crosvm-script as the arguments of the current script
echo "$@" > ${VSOCK_TEMP_DIR}/crosvm-script.sh
# Set the crosvm-script as the arguments of the current script.
echo "$@" > $DEQP_TEMP_DIR/crosvm-script.sh
unset DISPLAY
unset XDG_RUNTIME_DIR
# Start background processes to receive output from guest
socat -u vsock-connect:${VSOCK_CID}:${VSOCK_STDERR},retry=200,interval=0.1 stderr &
SOCAT_PIDS=$!
socat -u vsock-connect:${VSOCK_CID}:${VSOCK_STDOUT},retry=200,interval=0.1 stdout &
SOCAT_PIDS="${SOCAT_PIDS} $!"
# Setup networking
/usr/sbin/iptables-legacy -w -t nat -A POSTROUTING -o eth0 -j MASQUERADE
echo 1 > /proc/sys/net/ipv4/ip_forward
# Send output from guest to host
touch $DEQP_TEMP_DIR/stderr $DEQP_TEMP_DIR/stdout
tail -f $DEQP_TEMP_DIR/stderr >> /dev/stderr &
ERR_TAIL_PID=$!
tail -f $DEQP_TEMP_DIR/stdout >> /dev/stdout &
OUT_TAIL_PID=$!
# Prepare to start crosvm
unset DISPLAY
unset XDG_RUNTIME_DIR
trap "exit \$exit_code" INT TERM
trap "exit_code=\$?; kill $ERR_TAIL_PID $OUT_TAIL_PID; rm -rf $DEQP_TEMP_DIR" EXIT
CROSVM_KERN_ARGS="quiet console=null root=my_root rw rootfstype=virtiofs ip=192.168.30.2::192.168.30.1:255.255.255.0:crosvm:eth0"
CROSVM_KERN_ARGS="${CROSVM_KERN_ARGS} init=${CI_PROJECT_DIR}/install/crosvm-init.sh -- ${VSOCK_STDOUT} ${VSOCK_STDERR} ${VSOCK_TEMP_DIR}"
set +e -x
# We aren't testing LLVMPipe here, so we don't need to validate NIR on the host
NIR_DEBUG="novalidate" LIBGL_ALWAYS_SOFTWARE="true" GALLIUM_DRIVER="$CROSVM_GALLIUM_DRIVER" crosvm run \
--gpu "$CROSVM_GPU_ARGS" \
-m 4096 \
-c 2 \
--disable-sandbox \
--shared-dir /:my_root:type=fs:writeback=true:timeout=60:cache=always \
--host_ip=192.168.30.1 --netmask=255.255.255.0 --mac "AA:BB:CC:00:00:12" \
-p "$CROSVM_KERNEL_ARGS" \
/lava-files/bzImage > $DEQP_TEMP_DIR/crosvm 2>&1
NIR_DEBUG="novalidate" LIBGL_ALWAYS_SOFTWARE="true" GALLIUM_DRIVER=${CROSVM_GALLIUM_DRIVER} \
crosvm run \
--gpu "${CROSVM_GPU_ARGS}" -m 4096 -c 2 --disable-sandbox \
--shared-dir /:my_root:type=fs:writeback=true:timeout=60:cache=always \
--host_ip "192.168.30.1" --netmask "255.255.255.0" --mac "AA:BB:CC:00:00:12" \
--cid ${VSOCK_CID} -p "${CROSVM_KERN_ARGS}" \
/lava-files/bzImage > ${VSOCK_TEMP_DIR}/crosvm 2>&1
RET=$(cat $DEQP_TEMP_DIR/exit_code || true)
CROSVM_RET=$?
[ ${CROSVM_RET} -eq 0 ] && {
# socat bg processes should terminate as soon as the remote peers exit
wait
# The actual return code is the crosvm guest script's exit code
CROSVM_RET=$(cat ${VSOCK_TEMP_DIR}/exit_code 2>/dev/null)
# Force error when the guest script's exit code is not available
CROSVM_RET=${CROSVM_RET:-1}
}
# Got no exit code from the script, show crosvm output to help with debugging
[ -n "$RET" ] || cat $DEQP_TEMP_DIR/crosvm || true
# Show crosvm output on error to help with debugging
[ ${CROSVM_RET} -eq 0 ] || {
set +x
echo "Dumping crosvm output.." >&2
cat ${VSOCK_TEMP_DIR}/crosvm >&2
set -x
}
exit ${RET:-1}
exit ${CROSVM_RET}