Skip to content

Commit

Permalink
allow case insensitive queries when it does not matter
Browse files Browse the repository at this point in the history
  • Loading branch information
huettenhain committed Nov 18, 2024
1 parent 0713566 commit a012f8d
Showing 1 changed file with 19 additions and 9 deletions.
28 changes: 19 additions & 9 deletions refinery/units/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,24 @@ def __init__(self):


class PathPattern:
def __init__(self, pp: Union[str, re.Pattern], regex=False, fuzzy=0):
if isinstance(pp, re.Pattern):
def __init__(self, query: Union[str, re.Pattern], regex=False, fuzzy=0):
self.query = query
self.regex = regex
self.fuzzy = fuzzy
self.compile()

def compile(self, **kw):
query = self.query
if isinstance(query, re.Pattern):
self.stops = []
self.pattern = pp
self.pattern = query
return
elif not regex:
self.stops = [stop for stop in re.split(R'(.*?[/*?])', pp) if stop]
pp, _, _ = fnmatch.translate(pp).partition(r'\Z')
p1 = re.compile(pp)
p2 = re.compile(F'.*?{pp}')
elif not self.regex:
self.stops = [stop for stop in re.split(R'(.*?[/*?])', query) if stop]
query, _, _ = fnmatch.translate(query).partition(r'\Z')
p1 = re.compile(query, **kw)
p2 = re.compile(F'.*?{query}')
self.matchers = [p1.fullmatch, p2.fullmatch, p1.search]
self.fuzzy = fuzzy

def reach(self, path):
if not any(self.stops):
Expand Down Expand Up @@ -246,6 +252,10 @@ def normalize(_path: str) -> str:
result.path = F'{base}.v{counter:0{width}d}{extension}'
self.log_warn(F'read chunk with duplicate path; deduplicating to {result.path}')

if len({r.path.lower() for r in results}) == len(results):
for p in patterns:
p.compile(flags=re.IGNORECASE)

for p in patterns:
for fuzzy in range(3):
done = self.args.exact
Expand Down

0 comments on commit a012f8d

Please sign in to comment.