From a012f8d18512df9bddd3b17e3d7b3a8be0c41db3 Mon Sep 17 00:00:00 2001 From: jesko Date: Mon, 18 Nov 2024 19:23:31 +0100 Subject: [PATCH] allow case insensitive queries when it does not matter --- refinery/units/formats/__init__.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/refinery/units/formats/__init__.py b/refinery/units/formats/__init__.py index 0d0365d52..aabba80cf 100644 --- a/refinery/units/formats/__init__.py +++ b/refinery/units/formats/__init__.py @@ -49,18 +49,24 @@ def __init__(self): class PathPattern: - def __init__(self, pp: Union[str, re.Pattern], regex=False, fuzzy=0): - if isinstance(pp, re.Pattern): + def __init__(self, query: Union[str, re.Pattern], regex=False, fuzzy=0): + self.query = query + self.regex = regex + self.fuzzy = fuzzy + self.compile() + + def compile(self, **kw): + query = self.query + if isinstance(query, re.Pattern): self.stops = [] - self.pattern = pp + self.pattern = query return - elif not regex: - self.stops = [stop for stop in re.split(R'(.*?[/*?])', pp) if stop] - pp, _, _ = fnmatch.translate(pp).partition(r'\Z') - p1 = re.compile(pp) - p2 = re.compile(F'.*?{pp}') + elif not self.regex: + self.stops = [stop for stop in re.split(R'(.*?[/*?])', query) if stop] + query, _, _ = fnmatch.translate(query).partition(r'\Z') + p1 = re.compile(query, **kw) + p2 = re.compile(F'.*?{query}') self.matchers = [p1.fullmatch, p2.fullmatch, p1.search] - self.fuzzy = fuzzy def reach(self, path): if not any(self.stops): @@ -246,6 +252,10 @@ def normalize(_path: str) -> str: result.path = F'{base}.v{counter:0{width}d}{extension}' self.log_warn(F'read chunk with duplicate path; deduplicating to {result.path}') + if len({r.path.lower() for r in results}) == len(results): + for p in patterns: + p.compile(flags=re.IGNORECASE) + for p in patterns: for fuzzy in range(3): done = self.args.exact