diff --git a/run_tests.py b/run_tests.py
index a857df6..539faba 100755
--- a/run_tests.py
+++ b/run_tests.py
@@ -9,6 +9,7 @@
 import sys
 
 from playwright.sync_api import sync_playwright, Playwright
+import bs4
 
 RED = "\u001b[31m";
 GREEN = "\u001b[32m";
@@ -55,6 +56,68 @@ def matches(test_matcher, test_name):
 			return True
 	return False
 
+def diff_HTML(output, expected_output):
+	expected_output_soup = bs4.BeautifulSoup(expected_output, 'html.parser')
+	output_soup = bs4.BeautifulSoup(output, 'html.parser')
+	errors = []
+	def extract_attrs_in_canonical_form(element):
+		attrs = dict(element.attrs)
+		if 'class' in attrs:
+			attrs['class'] = sorted(attrs['class'])
+		return attrs
+
+	def filter_children_elements(element):
+		def process(subelement):
+			if isinstance(subelement, bs4.element.Comment):
+				return None
+			if isinstance(subelement, str):
+				subelement = subelement.strip()
+				if subelement:
+					return subelement
+				return None
+			if isinstance(subelement, bs4.element.Tag):
+				return subelement
+			print(RED + f'Unknown element type: {type(subelement)}' + RESET, file=sys.stderr)
+			assert False
+
+		return [px for px in (process(x) for x in element.contents) if px is not None]
+
+	def format(element):
+		LEN_THRESHOLD = 100
+		ret = repr(element)
+		if len(ret) > LEN_THRESHOLD:
+			ret = ret[:LEN_THRESHOLD] + '...'
+		return ret.replace('\n', r'\n')
+
+	def format_list(element_list):
+		return f'{len(element_list)} elements: ' + ", ".join((format(x) for x in element_list))
+
+	def rec_cmp(output_element, expected_output_element):
+		output_element_canonical_form_attrs = extract_attrs_in_canonical_form(output_element)
+		expected_output_element_canonical_form_attrs = extract_attrs_in_canonical_form(expected_output_element)
+		if output_element.name != expected_output_element.name:
+			errors.append(f'Mismatched tag type, expected: {expected_output_element.name}, found {output_element.name} at {format(output_element)}')
+
+		if output_element_canonical_form_attrs != expected_output_element_canonical_form_attrs:
+			errors.append(f'Mismatched attributes, expected: {expected_output_element_canonical_form_attrs}, found: {output_element_canonical_form_attrs}')
+
+		output_element_filtered_children = filter_children_elements(output_element)
+		expected_output_element_filtered_children = filter_children_elements(expected_output_element)
+		if len(output_element_filtered_children) != len(expected_output_element_filtered_children):
+			errors.append(f'Mismatched element length: expected: {format_list(expected_output_element_filtered_children)}, found: {format_list(output_element_filtered_children)}')
+		else:
+			for output_subelement, expected_output_subelement in zip(output_element_filtered_children, expected_output_element_filtered_children):
+				if type(output_subelement) != type(expected_output_subelement):
+					errors.append(f'Mismatched types of subcontent, expected: {format(expected_output_subelement)}, found: {output_subelement}')
+				if isinstance(output_subelement, str):
+					if output_subelement != expected_output_subelement:
+						errors.append(f'Mismatched content, expected: {expected_output_subelement}, found {output_subelement}')
+				else:
+					rec_cmp(output_subelement, expected_output_subelement)
+
+	rec_cmp(output_soup.select('body')[0], expected_output_soup.select('body')[0])
+	return errors
+
 def main():
 	parser = argparse.ArgumentParser()
 	parser.add_argument('files', nargs='*')
@@ -89,7 +152,7 @@ def main():
 			os.mkdir(output)
 		except FileExistsError:
 			pass
-		possible_outputs = sorted(os.listdir(output))
+		possible_outputs = sorted([x for x in os.listdir(output) if x.endswith('.html')])
 		if possible_outputs and args.generate_missing_only:
 			continue
 		if args.files and not any([matches(matcher, test_name) for matcher in args.files]):
@@ -128,14 +191,20 @@ def main():
 				test_log += f'Succesfully saved new output as file://{os.getcwd()}/{output}/{output_name}.html\n'
 				color_code = YELLOW
 			else:
+				all_mismatches = []
 				found = False
 				for possible_output in possible_outputs:
+					print(os.path.join(output, possible_output))
 					with open(os.path.join(output, possible_output), 'r') as f:
-						if f.read() == test_output:
-							found = True;
-							test_log += f'\tMATCHING: file://{os.getcwd()}/{output}/{possible_output}\n'
-							color_code = GREEN
-							break;
+						found_content = f.read()
+					errors = diff_HTML(test_output, found_content)
+					if errors:
+						all_mismatches.append(errors)
+					else:
+						found = True;
+						test_log += f'\tMATCHING: file://{os.getcwd()}/{output}/{possible_output}\n'
+						color_code = GREEN
+						break;
 				if not found:
 					if args.add_alternate_outputs:
 						for i in itertools.count(1):
@@ -151,10 +220,10 @@ def main():
 					else:
 						failed_tests.append(test_name)
 						if len(possible_outputs) == 1:
-							test_log += f'EXPECTED OUTPUT: file://{os.getcwd()}/{output}/{possible_outputs[0]}\n'
+							test_log += f'EXPECTED OUTPUT: file://{os.getcwd()}/{output}/{possible_outputs[0]}\n\t' + '\n\t'.join(all_mismatches[0]) + '\n'
 						else:
-							outputs = " ".join([f"file://{os.getcwd()}/{output}/{x}" for x in possible_outputs])
-							test_log += f'VALID OUTPUTS: {outputs}\n'
+							outputs = "\n".join([f"\tfile://{os.getcwd()}/{output}/{x}\n\t\t" + '\n\t\t'.join(errors) for x, errors in zip(possible_outputs, all_mismatches)])
+							test_log += f'VALID OUTPUTS:\n{outputs}\n'
 						test_log += 'FAILED\n'
 						color_code = RED
 		print(color_code + test_log + RESET)