From b3ba448ba529caa9d88f180794d479cc441d5f2f Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Wed, 16 Feb 2022 17:52:23 +0000 Subject: [PATCH] ci/lava: Sleep before, not after, API calls We rate-limit LAVA API calls as they are standard polling calls rather than blocking for changes. However when we sleep after making the calls rather than before, we can block when we want to exit - e.g. after getting the final logs, we will still sleep even though we can drop out. Fix this by moving the calls to before the API calls, rather than after. This means that the first calls (when we're waiting to be scheduled, or haven't got our first log lines yet), will be delayed compared to previously, but that's not going to slow us down as even in the best case we won't be executing in a device within the first 15 seconds. Signed-off-by: Daniel Stone Part-of: --- .gitlab-ci/lava/lava_job_submitter.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py index 0975a3a0d8d..1f9eb8bffcf 100755 --- a/.gitlab-ci/lava/lava_job_submitter.py +++ b/.gitlab-ci/lava/lava_job_submitter.py @@ -242,10 +242,10 @@ def wait_until_job_is_started(proxy, job_id): current_state = "Submitted" waiting_states = ["Submitted", "Scheduling", "Scheduled"] while current_state in waiting_states: + time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC) job_state = _call_proxy(proxy.scheduler.job_state, job_id) current_state = job_state["job_state"] - time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC) print_log(f"Job {job_id} started.") def follow_job_execution(proxy, job_id): @@ -253,6 +253,11 @@ def follow_job_execution(proxy, job_id): finished = False last_time_logs = datetime.now() while not finished: + # `proxy.scheduler.jobs.logs` does not block, even when there is no + # new log to be fetched. To avoid dosing the LAVA dispatcher + # machine, let's add a sleep to save them some stamina. + time.sleep(LOG_POLLING_TIME_SEC) + (finished, data) = _call_proxy(proxy.scheduler.jobs.logs, job_id, line_count) if logs := yaml.load(str(data), Loader=loader(False)): # Reset the timeout @@ -261,18 +266,12 @@ def follow_job_execution(proxy, job_id): print("{} {}".format(line["dt"], line["msg"])) line_count += len(logs) - else: time_limit = timedelta(seconds=DEVICE_HANGING_TIMEOUT_SEC) if datetime.now() - last_time_logs > time_limit: print_log("LAVA job {} doesn't advance (machine got hung?). Retry.".format(job_id)) return False - # `proxy.scheduler.jobs.logs` does not block, even when there is no - # new log to be fetched. To avoid dosing the LAVA dispatcher - # machine, let's add a sleep to save them some stamina. - time.sleep(LOG_POLLING_TIME_SEC) - return True def show_job_data(proxy, job_id):