Skip to content

Commit

Permalink
Implement new feature copy-links (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
drscholly authored Apr 9, 2024
1 parent 81d0274 commit 4116c63
Show file tree
Hide file tree
Showing 8 changed files with 252 additions and 12 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Toolbox allows you to perform admin operations on [DataGalaxy](https://www.datag
- **Copy dictionary** objects from a workspace to another.
- **Copy dataprocessings** from a workspace to another.
- **Copy usages** from a workspace to another.
- **Copy links** from a workspace to another.

## Installation

Expand Down Expand Up @@ -86,6 +87,13 @@ datagalaxy-toolbox.exe copy-usages [-h] --url-source URL_SOURCE --token-source T
```
`--url-target` and `--token-target` are optional if the copy is made on the same client space.

#### copy-links

```
datagalaxy-toolbox.exe copy-links [-h] --url-source URL_SOURCE --token-source TOKEN_SOURCE [--url-target URL_TARGET] [--token-target TOKEN_TARGET] --workspace-source WORKSPACE_SOURCE --workspace-target WORKSPACE_TARGET
```
`--url-target` and `--token-target` are optional if the copy is made on the same client space.


## Development

Expand Down
37 changes: 37 additions & 0 deletions tests/test_copy_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from toolbox.commands.copy_links import copy_links
from toolbox.api.datagalaxy_api_workspaces import DataGalaxyApiWorkspace
from toolbox.api.datagalaxy_api import DataGalaxyApiAuthentication, Token
import pytest as pytest


# Mocks

def mock_list_links_on_source_workspace(self, workspace_name):
if workspace_name == 'workspace_source':
return ['link1', 'link2', 'link3']
return []


# Scenarios

def test_copy_links_when_workspace_source_does_not_exist(mocker):
# GIVEN
client_space_mock = mocker.patch.object(Token, 'get_client_space_id', autospec=True)
client_space_mock.return_value = 'cid'
api_authenticate_mock = mocker.patch.object(DataGalaxyApiAuthentication, 'authenticate', autospec=True)
api_authenticate_mock.return_value = 'token'
workspaces = mocker.patch.object(DataGalaxyApiWorkspace, 'list_workspaces', autospec=True)
workspaces.return_value = ['workspace_source']
workspace_source_mock = mocker.patch.object(DataGalaxyApiWorkspace, 'get_workspace', autospec=True)
workspace_source_mock.return_value = None

# ASSERT / VERIFY
with pytest.raises(Exception, match='workspace workspace_source does not exist'):
copy_links(
url_source='url_source',
token_source='token_source',
url_target='url_target',
token_target='token_target',
workspace_source_name='workspace_source',
workspace_target_name='workspace_target'
)
9 changes: 9 additions & 0 deletions toolbox/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from toolbox.commands.delete_attributes import delete_attributes_parse, delete_attributes
from toolbox.commands.copy_glossary import copy_glossary_parse, copy_glossary
from toolbox.commands.copy_dictionary import copy_dictionary, copy_dictionary_parse
from toolbox.commands.copy_links import copy_links, copy_links_parse


def run(args):
Expand All @@ -30,6 +31,7 @@ def run(args):
copy_usages_parse(subparsers)
copy_dictionary_parse(subparsers)
copy_dataprocessings_parse(subparsers)
copy_links_parse(subparsers)
# parse some argument lists
result = parser.parse_args(args)
if result.verbose:
Expand Down Expand Up @@ -92,6 +94,13 @@ def run(args):
logging.info("<<< copy_dataprocessings")
return 0

if result.subparsers_name == 'copy-links':
logging.info(">>> copy_links")
copy_links(result.url_source, result.url_target, result.token_source, result.token_target,
result.workspace_source, result.workspace_target)
logging.info("<<< copy_links")
return 0

parser.print_help(sys.stderr)
return 1

Expand Down
28 changes: 20 additions & 8 deletions toolbox/api/datagalaxy_api_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ def __init__(self, url: str, access_token: str, workspace: dict):
self.access_token = access_token
self.workspace = workspace

def list_sources(self, workspace_name: str) -> list:
def list_sources(self, workspace_name: str, include_links=False) -> list:
version_id = self.workspace['defaultVersionId']
params = {'versionId': version_id, 'includeAttributes': 'true'}
if include_links is True:
params = {'versionId': version_id, 'includeAttributes': 'false', 'includeLinks': 'true'}
else:
params = {'versionId': version_id, 'includeAttributes': 'true'}
headers = {'Authorization': f"Bearer {self.access_token}"}
response = requests.get(f"{self.url}/sources", params=params, headers=headers)
code = response.status_code
Expand All @@ -38,9 +41,12 @@ def list_sources(self, workspace_name: str) -> list:
result = result + body_json['results']
return result

def list_containers(self, workspace_name: str) -> list:
def list_containers(self, workspace_name: str, include_links=False) -> list:
version_id = self.workspace['defaultVersionId']
params = {'versionId': version_id, 'includeAttributes': 'true'}
if include_links is True:
params = {'versionId': version_id, 'includeAttributes': 'false', 'includeLinks': 'true'}
else:
params = {'versionId': version_id, 'includeAttributes': 'true'}
headers = {'Authorization': f"Bearer {self.access_token}"}
response = requests.get(f"{self.url}/containers", params=params, headers=headers)
code = response.status_code
Expand All @@ -65,9 +71,12 @@ def list_containers(self, workspace_name: str) -> list:
result = result + body_json['results']
return result

def list_structures(self, workspace_name: str) -> list:
def list_structures(self, workspace_name: str, include_links=False) -> list:
version_id = self.workspace['defaultVersionId']
params = {'versionId': version_id, 'includeAttributes': 'true'}
if include_links is True:
params = {'versionId': version_id, 'includeAttributes': 'false', 'includeLinks': 'true'}
else:
params = {'versionId': version_id, 'includeAttributes': 'true'}
headers = {'Authorization': f"Bearer {self.access_token}"}
response = requests.get(f"{self.url}/structures", params=params, headers=headers)
code = response.status_code
Expand All @@ -92,9 +101,12 @@ def list_structures(self, workspace_name: str) -> list:
result = result + body_json['results']
return result

def list_fields(self, workspace_name: str) -> list:
def list_fields(self, workspace_name: str, include_links=False) -> list:
version_id = self.workspace['defaultVersionId']
params = {'versionId': version_id, 'includeAttributes': 'true'}
if include_links is True:
params = {'versionId': version_id, 'includeAttributes': 'false', 'includeLinks': 'true'}
else:
params = {'versionId': version_id, 'includeAttributes': 'true'}
headers = {'Authorization': f"Bearer {self.access_token}"}
response = requests.get(f"{self.url}/fields", params=params, headers=headers)
code = response.status_code
Expand Down
7 changes: 5 additions & 2 deletions toolbox/api/datagalaxy_api_glossary.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@ def __init__(self, url: str, access_token: str, workspace: dict):
self.access_token = access_token
self.workspace = workspace

def list_properties(self, workspace_name: str) -> list:
def list_properties(self, workspace_name: str, include_links=False) -> list:
if not self.workspace["isVersioningEnabled"]:
version_id = self.workspace['defaultVersionId']
params = {'versionId': version_id, 'includeAttributes': 'true'}
if include_links is True:
params = {'versionId': version_id, 'includeAttributes': 'false', 'includeLinks': 'true'}
else:
params = {'versionId': version_id, 'includeAttributes': 'true'}
headers = {'Authorization': f"Bearer {self.access_token}"}
response = requests.get(f"{self.url}/properties", params=params, headers=headers)
code = response.status_code
Expand Down
30 changes: 30 additions & 0 deletions toolbox/api/datagalaxy_api_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import requests as requests
from toolbox.api.datagalaxy_api import DataGalaxyBulkResult


class DataGalaxyApiLinks:
def __init__(self, url: str, access_token: str, workspace: dict):
self.url = url
self.access_token = access_token
self.workspace = workspace

def bulk_create_links(self, workspace_name: str, links: list) -> DataGalaxyBulkResult:
# Creating links between entities based on their path
if self.workspace["isVersioningEnabled"]:
raise Exception('Workspace with versioning enabled are currently not supported.')

version_id = self.workspace['defaultVersionId']
headers = {'Authorization': f"Bearer {self.access_token}"}
response = requests.post(f"{self.url}/links/bulktree/{version_id}", json=links,
headers=headers)
code = response.status_code
body_json = response.json()
if code != 201:
raise Exception(body_json['error'])

result = DataGalaxyBulkResult(total=body_json["total"],
created=body_json["created"],
deleted=body_json["deleted"],
unchanged=body_json["unchanged"],
updated=body_json["updated"])
return result
7 changes: 5 additions & 2 deletions toolbox/api/datagalaxy_api_usages.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,13 @@ def __init__(self, url: str, access_token: str, workspace: dict):
self.access_token = access_token
self.workspace = workspace

def list_usages(self, workspace_name: str) -> list:
def list_usages(self, workspace_name: str, include_links=False) -> list:
if not self.workspace["isVersioningEnabled"]:
version_id = self.workspace['defaultVersionId']
params = {'versionId': version_id, 'includeAttributes': 'true'}
if include_links is True:
params = {'versionId': version_id, 'includeAttributes': 'false', 'includeLinks': 'true'}
else:
params = {'versionId': version_id, 'includeAttributes': 'true'}
headers = {'Authorization': f"Bearer {self.access_token}"}
response = requests.get(f"{self.url}/usages", params=params, headers=headers)
code = response.status_code
Expand Down
138 changes: 138 additions & 0 deletions toolbox/commands/copy_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import logging
from typing import Optional

from toolbox.api.datagalaxy_api import get_access_token, Token, DataGalaxyBulkResult
from toolbox.api.datagalaxy_api_usages import DataGalaxyApiUsages
from toolbox.api.datagalaxy_api_glossary import DataGalaxyApiGlossary
from toolbox.api.datagalaxy_api_dictionary import DataGalaxyApiDictionary
from toolbox.api.datagalaxy_api_links import DataGalaxyApiLinks
from toolbox.api.datagalaxy_api_workspaces import DataGalaxyApiWorkspace


def copy_links(url_source: str,
url_target: Optional[str],
token_source: str,
token_target: Optional[str],
workspace_source_name: str,
workspace_target_name: str) -> DataGalaxyBulkResult:
if token_target is None:
token_target = token_source

if url_target is None:
url_target = url_source

integration_token_source = Token(token_source)
integration_token_target = Token(token_target)
source_access_token = get_access_token(url_source, integration_token_source)
target_access_token = get_access_token(url_target, integration_token_target)
workspaces_api_on_source_env = DataGalaxyApiWorkspace(
url=url_source,
access_token=source_access_token)
source_workspace = workspaces_api_on_source_env.get_workspace(workspace_source_name)
if source_workspace is None:
raise Exception(f'workspace {workspace_source_name} does not exist')

workspaces_api_on_target_env = DataGalaxyApiWorkspace(
url=url_target,
access_token=target_access_token
)
target_workspace = workspaces_api_on_target_env.get_workspace(workspace_target_name)
if target_workspace is None:
raise Exception(f'workspace {workspace_target_name} does not exist')

source_usages_api = DataGalaxyApiUsages(
url=url_source,
access_token=source_access_token,
workspace=source_workspace
)
source_glossary_api = DataGalaxyApiGlossary(
url=url_source,
access_token=source_access_token,
workspace=source_workspace
)
source_dictionary_api = DataGalaxyApiDictionary(
url=url_source,
access_token=source_access_token,
workspace=source_workspace
)

# Find all links in source workspace
source_usages = source_usages_api.list_usages(workspace_source_name, include_links=True)
source_properties = source_glossary_api.list_properties(workspace_source_name, include_links=True)
source_sources = source_dictionary_api.list_sources(workspace_source_name, include_links=True)
source_containers = source_dictionary_api.list_containers(workspace_source_name, include_links=True)
source_structures = source_dictionary_api.list_structures(workspace_source_name, include_links=True)
source_fields = source_dictionary_api.list_fields(workspace_source_name, include_links=True)

# Collecting all links
links = parse_links(source_usages)
links += parse_links(source_properties)
links += parse_links(source_sources)
links += parse_links(source_containers)
links += parse_links(source_structures)
links += parse_links(source_fields)
logging.info(f'copy-links - {len(links)} links found')

target_links_api = DataGalaxyApiLinks(
url=url_target,
access_token=target_access_token,
workspace=target_workspace
)

# Creating links in target workspace
return target_links_api.bulk_create_links(workspace_name=workspace_target_name, links=links)


def parse_links(objs: list) -> list:
links = []
for obj in objs:
# DPI are ignored since they are handled differently
if "DataProcessingItem" in obj["typePath"]:
continue
for key in obj["links"]:
for dest in obj["links"][key]:
if "DataProcessingItem" in dest["typePath"]:
continue
logging.info(f'copy-links - {obj["path"]} {key} {dest["path"]}')
link = {
'fromPath': obj["path"],
'fromType': obj["typePath"],
'linkType': key,
'toPath': dest["path"],
'toType': dest["typePath"]
}
links.append(link)
return links


def copy_links_parse(subparsers):
# create the parser for the "copy_links" command
copy_links_parse = subparsers.add_parser('copy-links', help='copy-links help')
copy_links_parse.add_argument(
'--url-source',
type=str,
help='url source environnement',
required=True)
copy_links_parse.add_argument(
'--token-source',
type=str,
help='integration token source environnement',
required=True)
copy_links_parse.add_argument(
'--url-target',
type=str,
help='url target environnement (if undefined, use url source)')
copy_links_parse.add_argument(
'--token-target',
type=str,
help='integration token target environnement (if undefined, use token source)')
copy_links_parse.add_argument(
'--workspace-source',
type=str,
help='workspace source name',
required=True)
copy_links_parse.add_argument(
'--workspace-target',
type=str,
help='workspace target name',
required=True)

0 comments on commit 4116c63

Please sign in to comment.