ci/lava: Sleep before, not after, API calls

We rate-limit LAVA API calls as they are standard polling calls rather than blocking for changes. However when we sleep after making the calls rather than before, we can block when we want to exit - e.g. after getting the final logs, we will still sleep even though we can drop out. Fix this by moving the calls to before the API calls, rather than after. This means that the first calls (when we're waiting to be scheduled, or haven't got our first log lines yet), will be delayed compared to previously, but that's not going to slow us down as even in the best case we won't be executing in a device within the first 15 seconds. Signed-off-by: Daniel Stone <daniels@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15938>
2022-02-16 17:52:23 +00:00 · 2022-02-16 17:52:23 +00:00 · b3ba448ba5
parent d3ef3657b2
commit b3ba448ba5
1 changed files with 6 additions and 7 deletions
--- a/.gitlab-ci/lava/lava_job_submitter.py
+++ b/.gitlab-ci/lava/lava_job_submitter.py
@ -242,10 +242,10 @@ def wait_until_job_is_started(proxy, job_id):
    current_state = "Submitted"
    waiting_states = ["Submitted", "Scheduling", "Scheduled"]
    while current_state in waiting_states:
+        time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC)
        job_state = _call_proxy(proxy.scheduler.job_state, job_id)
        current_state = job_state["job_state"]

-        time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC)
    print_log(f"Job {job_id} started.")

 def follow_job_execution(proxy, job_id):
@ -253,6 +253,11 @@ def follow_job_execution(proxy, job_id):
    finished = False
    last_time_logs = datetime.now()
    while not finished:
+        # `proxy.scheduler.jobs.logs` does not block, even when there is no
+        # new log to be fetched. To avoid dosing the LAVA dispatcher
+        # machine, let's add a sleep to save them some stamina.
+        time.sleep(LOG_POLLING_TIME_SEC)
+
        (finished, data) = _call_proxy(proxy.scheduler.jobs.logs, job_id, line_count)
        if logs := yaml.load(str(data), Loader=loader(False)):
            # Reset the timeout
@ -261,18 +266,12 @@ def follow_job_execution(proxy, job_id):
                print("{} {}".format(line["dt"], line["msg"]))

            line_count += len(logs)
-
        else:
            time_limit = timedelta(seconds=DEVICE_HANGING_TIMEOUT_SEC)
            if datetime.now() - last_time_logs > time_limit:
                print_log("LAVA job {} doesn't advance (machine got hung?). Retry.".format(job_id))
                return False

-        # `proxy.scheduler.jobs.logs` does not block, even when there is no
-        # new log to be fetched. To avoid dosing the LAVA dispatcher
-        # machine, let's add a sleep to save them some stamina.
-        time.sleep(LOG_POLLING_TIME_SEC)
-
    return True

 def show_job_data(proxy, job_id):