From db45c694b981e4edf9bc4e37462505add8a84cb1 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 12 Sep 2024 17:07:36 +0200 Subject: [PATCH] MongoDB: Avoid URL object <-> string conversions on a few spots --- cratedb_toolkit/api/main.py | 7 +++++-- cratedb_toolkit/io/mongodb/api.py | 22 +++++++++++++++------- cratedb_toolkit/io/mongodb/copy.py | 6 +++--- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/cratedb_toolkit/api/main.py b/cratedb_toolkit/api/main.py index c68a016..32bcece 100644 --- a/cratedb_toolkit/api/main.py +++ b/cratedb_toolkit/api/main.py @@ -146,7 +146,7 @@ def load_table(self, resource: InputOutputResource, target: TableAddress, transf return mongodb_relay_cdc(str(source_url_obj), target_url, progress=True) else: return mongodb_copy_generic( - str(source_url_obj), + source_url_obj, target_url, transformation=transformation, progress=True, @@ -157,7 +157,10 @@ def load_table(self, resource: InputOutputResource, target: TableAddress, transf def mongodb_copy_generic( - source_url: str, target_url: str, transformation: t.Union[Path, None] = None, progress: bool = False + source_url: t.Union[str, URL], + target_url: t.Union[str, URL], + transformation: t.Union[Path, None] = None, + progress: bool = False, ): from cratedb_toolkit.io.mongodb.api import mongodb_copy diff --git a/cratedb_toolkit/io/mongodb/api.py b/cratedb_toolkit/io/mongodb/api.py index de8c31d..b201731 100644 --- a/cratedb_toolkit/io/mongodb/api.py +++ b/cratedb_toolkit/io/mongodb/api.py @@ -90,7 +90,12 @@ def mongodb_copy_migr8(source_url, target_url, transformation: Path = None, limi return True -def mongodb_copy(source_url, target_url, transformation: t.Union[Path, None] = None, progress: bool = False): +def mongodb_copy( + source_url: t.Union[str, URL], + target_url: t.Union[str, URL], + transformation: t.Union[Path, None] = None, + progress: bool = False, +): """ Transfer MongoDB collection using translator component. @@ -102,6 +107,9 @@ def mongodb_copy(source_url, target_url, transformation: t.Union[Path, None] = N logger.info(f"mongodb_copy. source={source_url}, target={target_url}") + source_url = URL(source_url) + target_url = URL(target_url) + # Optionally configure transformations. tm = None if transformation: @@ -110,9 +118,9 @@ def mongodb_copy(source_url, target_url, transformation: t.Union[Path, None] = N tasks = [] has_table = True - if "*" in source_url: + if "*" in source_url.path: has_table = False - mongodb_address = DatabaseAddress.from_string(source_url) + mongodb_address = DatabaseAddress(source_url) mongodb_uri, mongodb_collection_address = mongodb_address.decode() if mongodb_collection_address.table is None: has_table = False @@ -129,8 +137,8 @@ def mongodb_copy(source_url, target_url, transformation: t.Union[Path, None] = N ) else: logger.info(f"Inquiring collections at {source_url}") - mongodb_uri = URL(source_url) - cratedb_uri = URL(target_url) + mongodb_uri = source_url + cratedb_uri = target_url # What the hack? if ( mongodb_uri.scheme.startswith("mongodb") @@ -151,8 +159,8 @@ def mongodb_copy(source_url, target_url, transformation: t.Union[Path, None] = N cratedb_uri_effective = cratedb_uri.navigate(Path(collection_path).stem) tasks.append( MongoDBFullLoad( - mongodb_url=str(mongodb_uri_effective), - cratedb_url=str(cratedb_uri_effective), + mongodb_url=mongodb_uri_effective, + cratedb_url=cratedb_uri_effective, tm=tm, progress=progress, ) diff --git a/cratedb_toolkit/io/mongodb/copy.py b/cratedb_toolkit/io/mongodb/copy.py index ec894c0..893a428 100644 --- a/cratedb_toolkit/io/mongodb/copy.py +++ b/cratedb_toolkit/io/mongodb/copy.py @@ -69,8 +69,8 @@ class MongoDBFullLoad: def __init__( self, - mongodb_url: str, - cratedb_url: str, + mongodb_url: t.Union[str, URL], + cratedb_url: t.Union[str, URL], tm: t.Union[TransformationManager, None], on_error: t.Literal["ignore", "raise"] = "raise", progress: bool = False, @@ -83,7 +83,7 @@ def __init__( self.mongodb_adapter = mongodb_adapter_factory(self.mongodb_uri) # Decode database URL: CrateDB. - self.cratedb_address = DatabaseAddress.from_string(cratedb_url) + self.cratedb_address = DatabaseAddress(self.cratedb_uri) self.cratedb_sqlalchemy_url, self.cratedb_table_address = self.cratedb_address.decode() cratedb_table = self.cratedb_table_address.fullname