Skip to content

Commit

Permalink
fixes buggy extraction of XML attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
huettenhain committed Dec 3, 2024
1 parent cbd2ca0 commit b095a81
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 7 deletions.
9 changes: 4 additions & 5 deletions refinery/units/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,12 +342,11 @@ def rank_attribute(attribute: str):
def walk(node: XMLNodeBase):
candidates = [
candidate for candidate, count in Counter(
key
for child in node.children
for key, val in child.attributes.items()
if re.fullmatch(R'[-\s\w+,.;@(){}]{2,64}', nval(val))
key for child in node.children for key, val in child.attributes.items()
if len(val) in range(2, 65) and re.fullmatch(R'[-\s\w+,.;@()]+', nval(val))
).items()
if count == len(node.children)
if count == len(node.children) == len(
{child.attributes[candidate] for child in node.children})
]
if not candidates:
attr = None
Expand Down
14 changes: 12 additions & 2 deletions refinery/units/formats/xml.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from refinery.lib.structures import MemoryFile
from refinery.lib.meta import metavars
from refinery.lib.meta import metavars, is_valid_variable_name
from refinery.lib import xml
from refinery.units.sinks.ppxml import ppxml
from refinery.units.formats import XMLToPathExtractorUnit, UnpackResult
Expand All @@ -23,7 +23,17 @@ def extract(node: xml.XMLNode = node):
with MemoryFile() as stream:
node.write(stream)
return bytes(stream.getbuffer() | ppxml)
yield UnpackResult('/'.join(parts), extract, **node.attributes)

attributes = {
self._normalize_key(k): self._normalize_val(v)
for k, v in node.attributes.items()
}

if not all(is_valid_variable_name(k) for k in attributes):
attributes = {F'_{k}': v for k, v in attributes.items()}

yield UnpackResult('/'.join(parts), extract, **attributes)

for child in node.children:
yield from walk(child, *parts, path(child))

Expand Down
10 changes: 10 additions & 0 deletions test/units/formats/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,13 @@ class data:
unit = self.load('WLANProfile/SSIDConfig')
test = str(data | unit)
self.assertIn('33344333', test)

def test_lure_document(self):
data = self.download_sample('e6daa00e095948acfc176d71c5bf667a0403e5259653ea5ac8950aee13180ae0')
data = data | self.ldu('xt', 'settings.xml') | bytes

pipe = self.load_pipeline('xtxml w.settings/w.rsids/w.rsidRoot [| eat val ]')
self.assertEqual(data | pipe | str, '00BA5B2D')

pipe = self.load_pipeline('xtxml docVars/10* [| eat val ]| hex | wshenc | carve -dn5 string [| dedup | pop k | swap k | hex | xor var:k ]| xtp url')
self.assertEqual(data | pipe | str, 'http'':/''/trust-certificate''.net')

0 comments on commit b095a81

Please sign in to comment.