Skip to content

Commit

Permalink
Merge branch 'master' into pfitzinger-xmlformatter-replacing
Browse files Browse the repository at this point in the history
  • Loading branch information
regebro authored Jan 4, 2024
2 parents b9198a8 + 681b4f9 commit 17e8f54
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 33 deletions.
9 changes: 2 additions & 7 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,10 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ['3.7', '3.11', 'pypy-3.7']
python-version: ['3.8', '3.12', 'pypy-3.10']
exclude:
- os: windows-latest
python-version: '3.7'
- os: windows-latest
python-version: 'pypy-3.7'
include:
- os: windows-latest
python-version: '3.8'
python-version: 'pypy-3.10'
steps:
- name: Checkout
uses: actions/checkout@v3
Expand Down
5 changes: 4 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ Changes
2.6.4 (unreleased)
------------------

- Nothing changed yet.
- Changed the comparison to make accurate and standard more accurate,
although fast gets less accurate as a result.

- Changed usage of deprecated `pkg_resources` package to `importlib.metadata`.


2.6.3 (2023-05-21)
Expand Down
4 changes: 3 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,5 +90,7 @@ Contributors

* Thomas Pfitzinger, [email protected]

The diff algorithm is based on "`Change Detection in Hierarchically Structured Information <http://ilpubs.stanford.edu/115/1/1995-46.pdf>`_",
* Jacek Chałupka, [email protected]

The diff algorithm is based on "`Change Detection in Hierarchically Structured Information <http://infolab.stanford.edu/c3/papers/html/tdiff3-8/tdiff3-8.html>`_",
and the text diff is using Google's ``diff_match_patch`` algorithm.
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ classifiers =
Operating System :: OS Independent
Programming Language :: Python
Programming Language :: Python :: 3
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
Programming Language :: Python :: 3 :: Only
Programming Language :: Python :: Implementation :: PyPy
keywords = xml, html, diff
Expand All @@ -28,7 +28,7 @@ project_urls =
Source Code = https://github.com/Shoobx/xmldiff

[options]
python_requires = >=3.7
python_requires = >=3.8
zip_safe = True
include_package_data = True
packages = find:
Expand Down
38 changes: 19 additions & 19 deletions tests/test_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,12 @@ def test_compare_different_leafs(self):
left = lefttree.xpath("/document/story/section[2]/para")[0]
right = righttree.xpath("/document/story/section[2]/para")[0]

self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.75)
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.80952380)

# These nodes should not be very similar
left = lefttree.xpath("/document/story/section[1]/para")[0]
right = righttree.xpath("/document/story/section[1]/para")[0]
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.45614035087719)
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.53731343)

def test_compare_different_nodes(self):
left = """<document>
Expand Down Expand Up @@ -293,7 +293,7 @@ def test_compare_with_xmlid(self):
right = differ.right.xpath("/document/story/section[1]")[0]

# These are very similar
self.assertEqual(differ.leaf_ratio(left, right), 0.9)
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.9210526)
# And one out of two children in common
self.assertEqual(differ.child_ratio(left, right), 0.5)
# But different id's, hence 0 as match
Expand All @@ -312,7 +312,7 @@ def test_compare_with_xmlid(self):
# has an xml:id, so they do not match.
left = differ.left.xpath("/document/story/section[3]")[0]
right = differ.right.xpath("/document/story/section[3]")[0]
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.81818181818)
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.8666667)
self.assertEqual(differ.child_ratio(left, right), 1.0)
self.assertEqual(differ.node_ratio(left, right), 0)

Expand Down Expand Up @@ -376,7 +376,7 @@ def test_compare_with_uniqueattrs(self):
right = differ.right.xpath("/document/story/section[1]")[0]

# These are very similar
self.assertEqual(differ.leaf_ratio(left, right), 0.90625)
self.assertEqual(differ.leaf_ratio(left, right), 0.925)
# And one out of two children in common
self.assertEqual(differ.child_ratio(left, right), 0.5)
# But different names, hence 0 as match
Expand All @@ -395,17 +395,17 @@ def test_compare_with_uniqueattrs(self):
# has an name, so they do not match.
left = differ.left.xpath("/document/story/section[3]")[0]
right = differ.right.xpath("/document/story/section[3]")[0]
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.78260869565)
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.8387097)
self.assertEqual(differ.child_ratio(left, right), 1.0)
self.assertEqual(differ.node_ratio(left, right), 0)

# Now these are structurally similar, have the same name, but
# one of them is not a section, so the uniqueattr does not match
left = differ.left.xpath("/document/story/section[1]")[0]
right = differ.right.xpath("/document/story/subsection[1]")[0]
self.assertAlmostEqual(differ.leaf_ratio(left, right), 1.0)
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.9638554)
self.assertEqual(differ.child_ratio(left, right), 0.5)
self.assertAlmostEqual(differ.node_ratio(left, right), 0.7905694150420949)
self.assertAlmostEqual(differ.node_ratio(left, right), 0.7677947)

def test_compare_node_rename(self):
left = """<document>
Expand All @@ -430,24 +430,24 @@ def test_compare_node_rename(self):
left = differ.left.xpath("/document/para[1]")[0]
right = differ.right.xpath("/document/section[1]")[0]

# These have different tags, but should still match
self.assertEqual(differ.leaf_ratio(left, right), 1.0)
# These have different tags, so don't match that great any more
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.7441860)

# These have different tags, and different attribute value,
# but still similar enough
left = differ.left.xpath("/document/para[2]")[0]
right = differ.right.xpath("/document/section[2]")[0]

# These have different tags, but should still match
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.76190476190476)
# These have different tags, so don't match that great any more
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.6578947)

# These have different tags, and different attribute value,
# but still similar enough
left = differ.left.xpath("/document/para[3]")[0]
right = differ.right.xpath("/document/section[3]")[0]

# These are too different
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.45161290322580)
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.4)

def test_compare_namespaces(self):
left = """<document>
Expand All @@ -472,8 +472,8 @@ def test_compare_namespaces(self):
"/document/foo:para[1]", namespaces={"foo": "otheruri"}
)[0]

# These have different namespaces, but should still match
self.assertEqual(differ.leaf_ratio(left, right), 1.0)
# These have different namespaces, but should still match OK
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.915254237)

def test_different_ratio_modes(self):
node1 = etree.Element("para")
Expand All @@ -485,19 +485,19 @@ def test_different_ratio_modes(self):

# These texts are very different
differ = Differ(ratio_mode="accurate")
self.assertAlmostEqual(differ.leaf_ratio(node1, node2), 0.24)
self.assertAlmostEqual(differ.leaf_ratio(node1, node2), 0.3666667)
# However, the quick_ratio doesn't catch that, and think they match
differ = Differ(ratio_mode="fast")
self.assertAlmostEqual(differ.leaf_ratio(node1, node2), 0.64)
self.assertAlmostEqual(differ.leaf_ratio(node1, node2), 0.7)
# It still realizes these sentences are different, though.
differ = Differ(ratio_mode="fast")
self.assertAlmostEqual(differ.leaf_ratio(node1, node3), 0.4561403508)
self.assertAlmostEqual(differ.leaf_ratio(node1, node3), 0.53731343)
# Faster thinks the first two are the same!
differ = Differ(ratio_mode="faster")
self.assertAlmostEqual(differ.leaf_ratio(node1, node2), 1.0)
# And that the third is almost the same
differ = Differ(ratio_mode="faster")
self.assertAlmostEqual(differ.leaf_ratio(node1, node3), 0.8771929824)
self.assertAlmostEqual(differ.leaf_ratio(node1, node3), 0.89552238)

# Invalid modes raise error:
with self.assertRaises(ValueError):
Expand Down
2 changes: 1 addition & 1 deletion xmldiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def node_text(self, node):
if node in self._text_cache:
return self._text_cache[node]
# Get the texts and the tag as a start
texts = node.xpath("text()")
texts = [node.tag] + node.xpath("text()")

# Then add attributes and values
for tag, value in sorted(self.node_attribs(node).items()):
Expand Down
4 changes: 2 additions & 2 deletions xmldiff/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""All major API points and command-line tools"""
import pkg_resources
from importlib import metadata

from argparse import ArgumentParser, ArgumentTypeError
from lxml import etree
from xmldiff import diff, formatting, patch

__version__ = pkg_resources.require("xmldiff")[0].version
__version__ = metadata.version("xmldiff")

FORMATTERS = {
"diff": formatting.DiffFormatter,
Expand Down

0 comments on commit 17e8f54

Please sign in to comment.