From 02cac49721510ee98390bdb8cc0701759e94a7bc Mon Sep 17 00:00:00 2001 From: Guilherme Gallo Date: Thu, 30 Jun 2022 17:58:17 -0300 Subject: [PATCH] ci/lava: heartbeat: don't consider kernel message logs Currently, the submitter consider that every new log that comes from the DUT console is a signal that the device is healthy, but maybe that is not the case, since in some kernel hangs/failures, no output is presented except from some kernel messages. This commit bypass the heartbeat when the LogFollower detect a kernel message. Any log line that does follow the kmsg pattern will make the job labeled as healthy again. Signed-off-by: Guilherme Gallo Part-of: --- .gitlab-ci/lava/lava_job_submitter.py | 6 ++--- .gitlab-ci/lava/utils/lava_log.py | 34 ++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py index 40d1ae2a983..07087e8a985 100755 --- a/.gitlab-ci/lava/lava_job_submitter.py +++ b/.gitlab-ci/lava/lava_job_submitter.py @@ -278,8 +278,6 @@ class LAVAJob: # When there is no new log data, the YAML is empty if loaded_lines := yaml.load(str(data), Loader=loader(False)): lines = loaded_lines - # If we had non-empty log data, we can assure that the device is alive. - self.heartbeat() self.last_log_line += len(lines) return lines @@ -391,7 +389,9 @@ def fetch_logs(job, max_idle_time, log_follower) -> None: else: raise MesaCIParseException - log_follower.feed(new_log_lines) + if log_follower.feed(new_log_lines): + # If we had non-empty log data, we can assure that the device is alive. + job.heartbeat() parsed_lines = log_follower.flush() for line in parsed_lines: diff --git a/.gitlab-ci/lava/utils/lava_log.py b/.gitlab-ci/lava/utils/lava_log.py index 49b98a8943f..e2eb4ef6589 100644 --- a/.gitlab-ci/lava/utils/lava_log.py +++ b/.gitlab-ci/lava/utils/lava_log.py @@ -253,13 +253,45 @@ class LogFollower: if new_section := log_section.from_log_line_to_section(line): self.update_section(new_section) - def feed(self, new_lines: list[dict[str, str]]) -> None: + def detect_kernel_dump_line(self, line: dict[str, Union[str, list]]) -> bool: + # line["msg"] can be a list[str] when there is a kernel dump + if isinstance(line["msg"], list): + return line["lvl"] == "debug" + + # result level has dict line["msg"] + if not isinstance(line["msg"], str): + return False + + # we have a line, check if it is a kernel message + if re.search(r"\[[\d\s]{5}\.[\d\s]{6}\] +\S{2,}", line["msg"]): + print_log(f"{line['msg']}") + return True + + return False + + def feed(self, new_lines: list[dict[str, str]]) -> bool: + """Input data to be processed by LogFollower instance + Returns true if the DUT (device under test) seems to be alive. + """ + self.watchdog() + + # No signal of job health in the log + is_job_healthy = False + for line in new_lines: + if self.detect_kernel_dump_line(line): + continue + + # At least we are fed with a non-kernel dump log, it seems that the + # job is progressing + is_job_healthy = True self.manage_gl_sections(line) if parsed_line := parse_lava_line(line): self._buffer.append(parsed_line) + return is_job_healthy + def flush(self) -> list[str]: buffer = self._buffer self._buffer = []