From 862b77e40900792ae93a22dcedab2f7d1289d643 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 4 Nov 2021 09:36:56 +0100 Subject: [PATCH 1/3] Fixed check crashing when RF merged results contained HTML, refactored HTML stripping (closes #176) --- CHANGELOG.md | 6 ++++ checks/v1/robotmk | 78 ++++++++++---------------------------------- checks/v2/robotmk.py | 75 +++++++++--------------------------------- 3 files changed, 40 insertions(+), 119 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76a5efc9..c3b4c74c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### Fixed + +* Check: Crashed when Robot Framework merged results contained HTML; refactored HTML stripping (#176) + ## 1.2.1.2 - 2021-09-30 ### Changed diff --git a/checks/v1/robotmk b/checks/v1/robotmk index 197d239b..8608c962 100644 --- a/checks/v1/robotmk +++ b/checks/v1/robotmk @@ -480,13 +480,19 @@ class RobotItem(object): @property def text(self): + text = self._text + + # Remove nasty Playwright log header lines and log messages + text = re.sub('Note: use DEBUG=pw:api environment variable to capture Playwright logs.', '', text) + text = re.sub('={60}|={27} logs ={27}', '', text) + # Return back plain text if the text has a HTML prefix. # This is the necessary for test messages created by rebot after merging test results. - # Change when this https://github.com/robotframework/robotframework/issues/4068 has been solved. - if self._text.startswith('*HTML* '): - return html_to_text(self._text).replace('*HTML* ', '') + # Change when this https://github.com/robotframework/robotframework/issues/4068 has been solved. + if text.startswith('*HTML* '): + return html_to_text(text) else: - return self._text + return text @text.setter def text(self, text): @@ -1228,66 +1234,18 @@ def roundup(number, ndigits=0, return_type=None): return return_type(result) -""" -HTML <-> text conversions. -http://stackoverflow.com/questions/328356/extracting-text-from-html-file-using-python -""" - def html_to_text(html): - - class _HTMLToText(HTMLParser): - def __init__(self): - HTMLParser.__init__(self) - self._buf = [] - self.hide_output = False - - def handle_starttag(self, tag, attrs): - if tag in ('p', 'br') and not self.hide_output: - self._buf.append('\n') - elif tag in ('script', 'style'): - self.hide_output = True - - def handle_startendtag(self, tag, attrs): - if tag == 'br': - self._buf.append('\n') - - def handle_endtag(self, tag): - if tag == 'p': - self._buf.append('\n') - elif tag in ('script', 'style'): - self.hide_output = False - - def handle_data(self, text): - if text and not self.hide_output: - self._buf.append(re.sub(r'\s+', ' ', text)) - - def handle_entityref(self, name): - if name in name2codepoint and not self.hide_output: - c = chr(name2codepoint[name]) - self._buf.append(c) - - def handle_charref(self, name): - if not self.hide_output: - n = int(name[1:], 16) if name.startswith('x') else int(name) - self._buf.append(chr(n)) - - def get_text(self): - return re.sub(r' +', ' ', ''.join(self._buf)) - """ Given a piece of HTML, return the plain text it contains. - This handles entities and char refs, but not javascript and stylesheets. """ - try: - from html.parser import HTMLParser - from html.entities import name2codepoint - parser = _HTMLToText() - parser.feed(html) - parser.close() - return parser.get_text() - except: - # on Checkmk1 there is no HTML.Parser; :-/ - return html + # Remove Prefix + html = re.sub('\*HTML\* ', '', html) + html = re.sub('Test has been re-executed and results merged.', 'Test has been re-executed and the results were merged: ', html) + + html = re.sub('|||