diff --git a/.gitignore b/.gitignore index bdaf4a664..72ed6e54d 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,13 @@ target/ /vagrant/README.html /.pytest_cache/ /.venv/ + +# asdf +.envrc +.tool-versions + +# vscode +.vscode/ + +# environment +.env \ No newline at end of file diff --git a/Makefile b/Makefile index ddf65fc4a..e51a1f681 100644 --- a/Makefile +++ b/Makefile @@ -117,12 +117,6 @@ lint: ## check style with pylint test: ## Run tests quickly with the default Python and generate code coverage report pytest -xv --cov-report term-missing --cov-report xml --cov=./twindb_backup tests/unit -test-including-azure-blob: ## Like 'make test' but includes tests for azure blob destination - coverage run --source=twindb_backup -m pytest -xv tests/unit - coverage run -a --source=twindb_backup -m unittest -cvf --locals tests/unittests/azblob_testing/environment_access_tests/test_remote_data_generation.py - coverage run -a --source=twindb_backup -m unittest -cvf --locals tests/unittests/azblob_testing/destination_tests/test_AzureBlob_functions.py - coverage report - test-integration: ## Run integration tests. Must be run in vagrant py.test -xsv tests/integration/ @@ -174,6 +168,9 @@ endif ifeq ($(OS_VERSION),7) PLATFORM = centos endif +ifeq ($(OS_VERSION),8) + PLATFORM = centos +endif package: ## Build package - OS_VERSION must be one of: jammy, focal. @docker run \ diff --git a/README.rst b/README.rst index 54b8cd7e2..10b8e53b0 100644 --- a/README.rst +++ b/README.rst @@ -21,8 +21,8 @@ TwinDB Backup :alt: Join the chat at https://gitter.im/twindb/backup TwinDB Backup is a multipurpose tool for backing up MySQL database and regular files/directories on the file system. -It can store backup copies on a remote SSH server, Amazon S3 or -Google Cloud Storage. +It can store backup copies on a remote SSH server, Amazon S3, Azure Blob Storage, +or Google Cloud Storage. TwinDB Backup accepts a backup copy stream from any of supported sources (MySQL Server, Percona Server, Percona XtraDB Cluster, or file system) @@ -49,6 +49,7 @@ After the stream passed all modifiers it is sent to one of the configured backup destination. It can be: - Amazon S3 bucket +- Azure Blob Storage account - Google Cloud Storage bucket - Any server with SSH demon @@ -88,6 +89,7 @@ Features **TwinDB Backup** storage options: - Amazon S3 +- Azure Blob Storage - Google Cloud Storage - Remote SSH server - Optional local copy @@ -143,6 +145,7 @@ Possible ``OS_VERSION`` values: * jammy * focal * 7 (for CentOS 7) + * 8 (for CentOS 8) .. code-block:: console @@ -178,6 +181,7 @@ Credits * `Arda Beyazoğlu `_ * `Egor Lyutov `_ * `fonthead `_ + * `James Salzman `_ * `Maksym Kryva `_ * `Manjot Singh `_ * `Michael Rikmas `_ diff --git a/docs/conf.py b/docs/conf.py index e8972b6a0..dfc38dd6e 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,8 +13,8 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys import os +import sys # If extensions (or modules to document with autodoc) are in another # directory, add these directories to sys.path here. If the directory is @@ -40,23 +40,23 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'TwinDB Backup' -copyright = u"2016-2019, TwinDB Development Team" +project = "TwinDB Backup" +copyright = "2016-2019, TwinDB Development Team" # The version info for the project you're documenting, acts as replacement # for |version| and |release|, also used in various other places throughout @@ -79,7 +79,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -97,7 +97,7 @@ # show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -111,7 +111,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' +html_theme = "default" # Theme options are theme-specific and customize the look and feel of a # theme further. For a list of options available for each theme, see the @@ -131,18 +131,18 @@ # The name of an image file (relative to this directory) to place at the # top of the sidebar. -html_logo = '_static/logo.png' +html_logo = "_static/logo.png" # The name of an image file (within the static path) to use as favicon # of the docs. This file should be a Windows icon file (.ico) being # 16x16 or 32x32 pixels large. -html_favicon = '_static/favicon.png' +html_favicon = "_static/favicon.png" # Add any paths that contain custom static files (such as style sheets) # here, relative to this directory. They are copied after the builtin # static files, so a file named "default.css" will overwrite the builtin # "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page # bottom, using the given strftime format. @@ -188,7 +188,7 @@ # html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'twindb_backupdoc' +htmlhelp_basename = "twindb_backupdoc" # -- Options for LaTeX output ------------------------------------------ @@ -196,10 +196,8 @@ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # 'preamble': '', } @@ -208,9 +206,7 @@ # (source start file, target name, title, author, documentclass # [howto/manual]). latex_documents = [ - ('index', 'twindb_backup.tex', - u'TwinDB Backup Documentation', - u'TwinDB Development Team', 'manual'), + ("index", "twindb_backup.tex", "TwinDB Backup Documentation", "TwinDB Development Team", "manual"), ] # The name of an image file (relative to this directory) to place at @@ -238,11 +234,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'twindb_backup', - u'TwinDB Backup Documentation', - [u'TwinDB Development Team'], 1) -] +man_pages = [("index", "twindb_backup", "TwinDB Backup Documentation", ["TwinDB Development Team"], 1)] # If true, show URL addresses after external links. # man_show_urls = False @@ -254,12 +246,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'twindb_backup', - u'TwinDB Backup Documentation', - u'TwinDB Development Team', - 'twindb_backup', - 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "twindb_backup", + "TwinDB Backup Documentation", + "TwinDB Development Team", + "twindb_backup", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. diff --git a/docs/twindb_backup.configuration.destinations.rst b/docs/twindb_backup.configuration.destinations.rst index c447eef82..0614c9cf2 100644 --- a/docs/twindb_backup.configuration.destinations.rst +++ b/docs/twindb_backup.configuration.destinations.rst @@ -4,6 +4,14 @@ twindb\_backup.configuration.destinations package Submodules ---------- +twindb\_backup.configuration.destinations.az module +---------------------------------------------------- + +.. automodule:: twindb_backup.configuration.destinations.az + :members: + :undoc-members: + :show-inheritance: + twindb\_backup.configuration.destinations.gcs module ---------------------------------------------------- diff --git a/docs/twindb_backup.destination.rst b/docs/twindb_backup.destination.rst index db387da4b..569ebdd94 100644 --- a/docs/twindb_backup.destination.rst +++ b/docs/twindb_backup.destination.rst @@ -4,14 +4,6 @@ twindb\_backup.destination package Submodules ---------- -twindb\_backup.destination.azblob module ----------------------------------------- - -.. automodule:: twindb_backup.destination.azblob - :members: - :undoc-members: - :show-inheritance: - twindb\_backup.destination.base\_destination module --------------------------------------------------- @@ -28,6 +20,14 @@ twindb\_backup.destination.exceptions module :undoc-members: :show-inheritance: +twindb\_backup.destination.az module +------------------------------------- + +.. automodule:: twindb_backup.destination.az + :members: + :undoc-members: + :show-inheritance: + twindb\_backup.destination.gcs module ------------------------------------- diff --git a/docs/usage.rst b/docs/usage.rst index 13d4d3e0b..f10b0aa09 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -47,7 +47,7 @@ Backup Destination The ``[destination]`` section specifies where to store backup copies. ``backup_destination`` can be either ``ssh`` (if you want to store backups on a remote SSH server), -or ``s3`` (if you want to store backups in Amazon S3), or ``gsc`` (if the backup should be stored in Google Cloud). +``s3`` (if you want to store backups in Amazon S3), ``az`` (if the backup should be stored in Azure Blob Storage), or ``gcs`` (if the backup should be stored in Google Cloud). In the optional ``keep_local_path`` you can specify a local path where the tool will store a local copy of the backup. It's useful if you want to stream a MySQL backup to S3 and would like to keep a local copy as well. @@ -89,6 +89,20 @@ In the ``[s3]`` section you specify Amazon credentials as well as an S3 bucket w AWS_DEFAULT_REGION = us-east-1 BUCKET = twindb-backups +Azure Blob Storage +~~~~~~~~~~~~~~~~~~~~ + +In the ``[az]`` section you specify Azure credentials as well as Azure Blob Storage container where to store backups. + +.. code-block:: ini + + [az] + + connection_string = "DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" + container_name = twindb-backups + remote_path = /backups/mysql # optional + + Google Cloud Storage ~~~~~~~~~~~~~~~~~~~~ diff --git a/requirements.in b/requirements.in index 25f1c749a..21a74946c 100644 --- a/requirements.in +++ b/requirements.in @@ -1,6 +1,6 @@ #@IgnoreInspection BashAddShebang azure-core ~= 1.24 -azure-storage-blob ~= 12.12 +azure-storage-blob ~= 12.19 Click ~= 8.1 PyMySQL ~= 1.0 boto3 ~= 1.7 diff --git a/requirements_dev.in b/requirements_dev.in index 2d392d889..6bcafcb3a 100644 --- a/requirements_dev.in +++ b/requirements_dev.in @@ -1,4 +1,5 @@ #@IgnoreInspection BashAddShebang +azure-storage-blob ~= 12.19 black ~= 24.3 Sphinx ~= 4.5 bumpversion ~= 0.6 diff --git a/requirements_dev.txt b/requirements_dev.txt index c21380256..8d27ebcab 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -8,13 +8,17 @@ alabaster==0.7.13 # via sphinx astroid==2.15.8 # via pylint +azure-core==1.31.0 + # via azure-storage-blob +azure-storage-blob==12.23.0 + # via -r requirements_dev.in babel==2.16.0 # via sphinx black==24.8.0 # via -r requirements_dev.in -boto3==1.35.19 +boto3==1.35.25 # via moto -botocore==1.35.19 +botocore==1.35.25 # via # boto3 # moto @@ -36,7 +40,9 @@ click==8.1.7 coverage[toml]==7.6.1 # via pytest-cov cryptography==43.0.1 - # via moto + # via + # azure-storage-blob + # moto dill==0.3.8 # via pylint docker==5.0.3 @@ -45,7 +51,7 @@ docutils==0.17.1 # via sphinx exceptiongroup==1.2.2 # via pytest -idna==3.9 +idna==3.10 # via requests imagesize==1.4.1 # via sphinx @@ -53,6 +59,8 @@ importlib-metadata==8.5.0 # via sphinx iniconfig==2.0.0 # via pytest +isodate==0.6.1 + # via azure-storage-blob isort==5.13.2 # via # -r requirements_dev.in @@ -88,7 +96,7 @@ pathspec==0.12.1 # via # black # yamllint -platformdirs==4.3.3 +platformdirs==4.3.6 # via # black # pylint @@ -126,6 +134,7 @@ pyyaml==6.0.2 requests==2.31.0 # via # -r requirements_dev.in + # azure-core # docker # moto # responses @@ -137,7 +146,10 @@ runlike==1.4.14 s3transfer==0.10.2 # via boto3 six==1.16.0 - # via python-dateutil + # via + # azure-core + # isodate + # python-dateutil snowballstemmer==2.2.0 # via sphinx sphinx==4.5.0 @@ -165,6 +177,8 @@ tomlkit==0.13.2 typing-extensions==4.12.2 # via # astroid + # azure-core + # azure-storage-blob # black # pylint urllib3==1.26.20 diff --git a/setup.py b/setup.py index 9833ed11a..df20a2a27 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,8 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import os -from setuptools import setup, find_packages + +from setuptools import find_packages, setup del os.link diff --git a/support/make_release.py b/support/make_release.py index a7eba8081..098d5ae1c 100644 --- a/support/make_release.py +++ b/support/make_release.py @@ -1,4 +1,4 @@ -from os import listdir, environ +from os import environ, listdir from os import path as osp from subprocess import run @@ -11,23 +11,14 @@ "jammy", "focal", # CentOS - "7" + "7", ] PKG_DIR = "omnibus/pkg" OS_DETAILS = { - "jammy": { - "flavor": "Ubuntu", - "name": "Ubuntu jammy" - }, - "focal": { - "flavor": "Ubuntu", - "name": "Ubuntu focal" - }, - "7": { - "flavor": "CentOS", - "name": "CentOS 7" - }, + "jammy": {"flavor": "Ubuntu", "name": "Ubuntu jammy"}, + "focal": {"flavor": "Ubuntu", "name": "Ubuntu focal"}, + "7": {"flavor": "CentOS", "name": "CentOS 7"}, } @@ -41,15 +32,14 @@ def main(): my_env["OS_VERSION"] = os run(["make", "package"], env=my_env, check=True) for fi_name in listdir(PKG_DIR): - if ( - fi_name.endswith(".rpm") - or fi_name.endswith(".deb") - or fi_name.endswith(".json") - ): + if fi_name.endswith(".rpm") or fi_name.endswith(".deb") or fi_name.endswith(".json"): key = f"twindb-backup/{__version__}/{os}/{fi_name}" with open(osp.join(PKG_DIR, fi_name), "rb") as fp: client.put_object( - ACL="public-read", Body=fp, Bucket="twindb-release", Key=key, + ACL="public-read", + Body=fp, + Bucket="twindb-release", + Key=key, ) print(f"https://twindb-release.s3.amazonaws.com/{key}") @@ -60,7 +50,7 @@ def main(): print(f" * {details['name']}") key = f"twindb-backup/{__version__}/{os}/" response = client.list_objects( - Bucket='twindb-release', + Bucket="twindb-release", Prefix=key, ) for fil in response["Contents"]: diff --git a/support/twindb-backup.cfg b/support/twindb-backup.cfg index 42359f752..e2f9b0e5a 100644 --- a/support/twindb-backup.cfg +++ b/support/twindb-backup.cfg @@ -31,6 +31,14 @@ AWS_SECRET_ACCESS_KEY=YYYYY AWS_DEFAULT_REGION=us-east-1 BUCKET=twindb-backups +[az] + +# Azure destination settings + +connection_string="DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" +container_name=twindb-backups +#remote_path = /backups/mysql # optional + [gcs] # GCS destination settings diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 300f368f3..9c2aaea9b 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -28,6 +28,11 @@ def config_content(): AWS_DEFAULT_REGION="us-east-1" BUCKET="twindb-backups" +[az] +connection_string="DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" +container_name="twindb-backups" +remote_path="/backups/mysql" + [gcs] GC_CREDENTIALS_FILE="XXXXX" GC_ENCRYPTION_KEY= diff --git a/tests/unit/destination/az/__init__.py b/tests/unit/destination/az/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/destination/az/test_config.py b/tests/unit/destination/az/test_config.py new file mode 100644 index 000000000..8fa56c8f1 --- /dev/null +++ b/tests/unit/destination/az/test_config.py @@ -0,0 +1,37 @@ +import pytest + +from twindb_backup.configuration.destinations.az import AZConfig + +from .util import AZConfigParams + + +def test_initialization_success(): + """Test initialization of AZConfig with all parameters set.""" + p = AZConfigParams() + c = AZConfig(**dict(p)) + assert c.connection_string == p.connection_string + assert c.container_name == p.container_name + assert c.chunk_size == p.chunk_size + assert c.remote_path == p.remote_path + + +def test_initialization_success_defaults(): + """Test initialization of AZConfig with only required parameters set and ensure default values.""" + p = AZConfigParams(only_required=True) + c = AZConfig(**dict(p)) + assert c.connection_string == p.connection_string + assert c.container_name == p.container_name + assert c.chunk_size == 4 * 1024 * 1024 + assert c.remote_path == "/" + + +def test_invalid_params(): + """Test initialization of AZConfig with invalid parameters.""" + with pytest.raises(ValueError): + AZConfig( + connection_string="test_connection_string", container_name="test_container", chunk_size="invalid_chunk_size" + ) + with pytest.raises(ValueError): + AZConfig(connection_string="test_connection_string", container_name="test_container", remote_path=1) + with pytest.raises(TypeError): + AZConfig(connection_string="test_connection_string") diff --git a/tests/unit/destination/az/test_delete.py b/tests/unit/destination/az/test_delete.py new file mode 100644 index 000000000..357ac84f0 --- /dev/null +++ b/tests/unit/destination/az/test_delete.py @@ -0,0 +1,23 @@ +import pytest + +import twindb_backup.destination.az as az + +from .util import mocked_az + + +def test_delete_success(): + """Tests AZ.delete method, ensuring the blob is deleted.""" + c = mocked_az() + + c.delete("test") + c._container_client.delete_blob.assert_called_once_with(c.render_path("test")) + + +def test_delete_fail(): + """Tests AZ.delete method, re-raising an exception on failure""" + c = mocked_az() + c._container_client.delete_blob.side_effect = Exception() + + with pytest.raises(Exception): + c.delete("test") + c._container_client.delete_blob.assert_called_once_with(c.render_path("test")) diff --git a/tests/unit/destination/az/test_download_to_pipe.py b/tests/unit/destination/az/test_download_to_pipe.py new file mode 100644 index 000000000..837da0c40 --- /dev/null +++ b/tests/unit/destination/az/test_download_to_pipe.py @@ -0,0 +1,40 @@ +from unittest.mock import MagicMock, patch + +import azure.core.exceptions as ae +import pytest + +from .util import mocked_az + + +def test_download_to_pipe_success(): + """Tests AZ.download_to_pipe method, mocks calls for os and ContainerClient""" + with patch("twindb_backup.destination.az.os") as mc_os: + mc_fdopen = MagicMock() + mc_os.fdopen.return_value = mc_fdopen + + c = mocked_az() + + mc_dbr = MagicMock() + c._container_client.download_blob.return_value = mc_dbr + + c._download_to_pipe(c.render_path("foo-key"), 100, 200) + + mc_os.close.assert_called_once_with(100) + mc_os.fdopen.assert_called_once_with(200, "wb") + c._container_client.download_blob.assert_called_once_with(c.render_path("foo-key")) + mc_dbr.readinto.assert_called_once_with(mc_fdopen.__enter__()) + + +def test_download_to_pipe_fail(): + """Tests AZ.download_to_pipe method, re-raises exception when download fails in child process""" + with patch("twindb_backup.destination.az.os") as mc_os: + c = mocked_az() + + c._container_client.download_blob.side_effect = ae.HttpResponseError() + + with pytest.raises(Exception): + c._download_to_pipe(c.render_path("foo-key"), 100, 200) + + mc_os.close.assert_called_once_with(100) + mc_os.fdopen.assert_called_once_with(200, "wb") + c._container_client.download_blob.assert_called_once_with(c.render_path("foo-key")) diff --git a/tests/unit/destination/az/test_get_stream.py b/tests/unit/destination/az/test_get_stream.py new file mode 100644 index 000000000..68c444dc1 --- /dev/null +++ b/tests/unit/destination/az/test_get_stream.py @@ -0,0 +1,58 @@ +from multiprocessing import Process +from unittest.mock import MagicMock, patch + +import mock +import pytest + +import twindb_backup.destination.az as az + +from .util import mocked_az + + +def test_get_stream_success(): + """Tests AZ.get_stream method, mocks calls for process and os""" + with patch("twindb_backup.destination.az.os") as mc_os: + with patch("twindb_backup.destination.az.Process") as mc_process: + mc = MagicMock(spec=Process) + mc_process.return_value = mc + mc.exitcode = 0 + + mc_os.pipe.return_value = (100, 200) + c = mocked_az() + + mock_copy = mock.Mock() + mock_copy.key = "foo-key" + + with c.get_stream(mock_copy): + pass + + az.Process.assert_called_once_with(target=c._download_to_pipe, args=(c.render_path("foo-key"), 100, 200)) + mc_os.close.assert_called_once_with(200) + mc_os.fdopen.assert_called_once_with(100, "rb") + mc.start.assert_called_once() + mc.join.assert_called_once() + + +def test_get_stream_failure(): + """Tests AZ.get_stream method, raises an exception when child process fails""" + with patch("twindb_backup.destination.az.os") as mc_os: + with patch("twindb_backup.destination.az.Process") as mc_process: + mc = MagicMock(spec=Process) + mc_process.return_value = mc + mc.exitcode = 1 + + mc_os.pipe.return_value = (100, 200) + c = mocked_az() + + mock_copy = mock.Mock() + mock_copy.key = "foo-key" + + with pytest.raises(Exception): + with c.get_stream(mock_copy): + pass + + az.Process.assert_called_once_with(target=c._download_to_pipe, args=(c.render_path("foo-key"), 100, 200)) + mc_os.close.assert_called_once_with(200) + mc_os.fdopen.assert_called_once_with(100, "rb") + mc.start.assert_called_once() + mc.join.assert_called_once() diff --git a/tests/unit/destination/az/test_init.py b/tests/unit/destination/az/test_init.py new file mode 100644 index 000000000..9361533c0 --- /dev/null +++ b/tests/unit/destination/az/test_init.py @@ -0,0 +1,106 @@ +import socket +from unittest.mock import MagicMock, patch + +import azure.core.exceptions as ae +import pytest +from azure.storage.blob import ContainerClient + +import twindb_backup.destination.az as az + +from .util import AZParams + + +def test_init_param(): + """Test initialization of AZ with all parameters set, mocking the _connect method.""" + with patch("twindb_backup.destination.az.AZ._connect") as mc: + mc.return_value = MagicMock(spec=ContainerClient) + p = AZParams() + c = az.AZ(**dict(p)) + + assert c._container_name == p.container_name + assert c._connection_string == p.connection_string + assert c._hostname == p.hostname + assert c._chunk_size == p.chunk_size + assert c._remote_path == p.remote_path + assert isinstance(c._container_client, ContainerClient) + az.AZ._connect.assert_called_once() + + +def test_init_param_defaults(): + """Test initialization of AZ with only required parameters set, ensuring default values, mocking the _connect method.""" + with patch("twindb_backup.destination.az.AZ._connect") as mc: + mc.return_value = MagicMock(spec=ContainerClient) + p = AZParams(only_required=True) + c = az.AZ(**dict(p)) + + assert c._container_name == p.container_name + assert c._connection_string == p.connection_string + assert c._hostname == socket.gethostname() + assert c._chunk_size == 4 * 1024 * 1024 + assert c._remote_path == "/" + assert isinstance(c._container_client, ContainerClient) + az.AZ._connect.assert_called_once() + + +def test_init_conn_string_valid(): + """Test initialization of AZ with valid connection string.""" + with patch("twindb_backup.destination.az.ContainerClient.exists") as mc: + mc.return_value = True + p = AZParams() + c = az.AZ(**dict(p)) + + az.ContainerClient.exists.assert_called_once() + assert isinstance(c._container_client, ContainerClient) + + +def test_init_conn_string_invalid(): + """Test initialization of AZ with invalid connection string, expecting ValueError.""" + with patch("twindb_backup.destination.az.ContainerClient.exists") as mc: + mc.return_value = True + p = AZParams() + p.connection_string = "invalid_connection_string" + with pytest.raises(ValueError, match="Connection string is either blank or malformed."): + _ = az.AZ(**dict(p)) + + +def test_init_container_not_exists(): + """Test initialization of AZ with container not existing, mocking the create_container method.""" + with patch("twindb_backup.destination.az.ContainerClient.exists") as mc: + mc.return_value = False + with patch("twindb_backup.destination.az.ContainerClient.create_container") as mc_create_container: + mc_create_container.return_value = MagicMock(spec=ContainerClient) + p = AZParams() + c = az.AZ(**dict(p)) + + az.ContainerClient.exists.assert_called_once() + az.ContainerClient.create_container.assert_called_once() + assert isinstance(c._container_client, ContainerClient) + + +def test_init_container_create_fails(): + """Test initialization of AZ with container not existing, fails to create container, re-raising error.""" + with patch("twindb_backup.destination.az.ContainerClient.exists") as mc: + mc.return_value = False + with patch("twindb_backup.destination.az.ContainerClient.create_container") as mc_create_container: + mc_create_container.side_effect = ae.HttpResponseError() + + p = AZParams() + with pytest.raises(Exception): + c = az.AZ(**dict(p)) + + az.ContainerClient.exists.assert_called_once() + az.ContainerClient.create_container.assert_called_once() + assert isinstance(c._container_client, ContainerClient) + + +def test_init_success(): + """Test initialization of AZ with existing container, mocking the from_connection_string method.""" + with patch("twindb_backup.destination.az.ContainerClient.from_connection_string") as mc: + mc.return_value = MagicMock(spec=ContainerClient) + p = AZParams() + c = az.AZ(**dict(p)) + + az.ContainerClient.from_connection_string.assert_called_once_with(p.connection_string, p.container_name) + mc.return_value.exists.assert_called_once() + mc.return_value.create_container.assert_not_called() + assert isinstance(c._container_client, ContainerClient) diff --git a/tests/unit/destination/az/test_list_files.py b/tests/unit/destination/az/test_list_files.py new file mode 100644 index 000000000..1e69322a8 --- /dev/null +++ b/tests/unit/destination/az/test_list_files.py @@ -0,0 +1,86 @@ +import random +import string + +import azure.core.exceptions as ae +import pytest +from azure.storage.blob import BlobProperties + +from .util import mocked_az + +PREFIX = "/backups/mysql" + +BLOBS = [ + BlobProperties(name="blob1", metadata={"hdi_isfolder": "true"}), + BlobProperties(name="blob2", metadata={"hdi_isfolder": "false"}), + BlobProperties(name="blob3"), +] + + +def test_list_files_success(): + """Tests AZ.list_files method, reading a list of blob names from azure.""" + c = mocked_az() + c._container_client.list_blobs.return_value = BLOBS + + blobs = c._list_files() + assert blobs == [b.name for b in BLOBS] + + c._container_client.list_blobs.assert_called_once() + + +def test_list_files_fail(): + """Tests AZ.list_files method, re-raises an exception on failure""" + c = mocked_az() + c._container_client.list_blobs.side_effect = ae.HttpResponseError() + + with pytest.raises(Exception): + c._list_files(PREFIX, False, False) + + c._container_client.list_blobs.assert_called_once_with(name_starts_with=PREFIX, include=["metadata"]) + + +def test_list_files_files_only(): + """Tests AZ.list_files method, listing only file blobs""" + c = mocked_az() + c._container_client.list_blobs.return_value = BLOBS + + blob_names = c._list_files(PREFIX, False, True) + + assert blob_names == ["blob2", "blob3"] + + c._container_client.list_blobs.assert_called_once_with(name_starts_with=PREFIX, include=["metadata"]) + + +def test_list_files_all_files(): + """Tests AZ.list_files method, listing all blobs, including directories""" + c = mocked_az() + c._container_client.list_blobs.return_value = BLOBS + + blob_names = c._list_files(PREFIX, False, False) + + assert blob_names == [b.name for b in BLOBS] + + c._container_client.list_blobs.assert_called_once_with(name_starts_with=PREFIX, include=["metadata"]) + + +def test_list_files_recursive(): + """Tests AZ.list_files method, recursive option is ignored""" + c = mocked_az() + c._container_client.list_blobs.return_value = BLOBS + + blob_names = c._list_files(PREFIX, False, False) + blob_names_recursive = c._list_files(PREFIX, True, False) + + assert blob_names == blob_names_recursive + c._container_client.list_blobs.assert_called_with(name_starts_with=PREFIX, include=["metadata"]) + + +def test_list_files_prefix(): + """Tests AZ.list_files method, prefix is used as a filter in list_blobs only""" + c = mocked_az() + c._container_client.list_blobs.return_value = BLOBS + + # Prefix is used as a filter in list_blobs, and because its mocked - it makes no difference. + blob_names = c._list_files("".join(random.SystemRandom().choices(string.ascii_lowercase, k=10)), False, False) + blob_names_recursive = c._list_files(PREFIX, False, False) + + assert blob_names == blob_names_recursive diff --git a/tests/unit/destination/az/test_read.py b/tests/unit/destination/az/test_read.py new file mode 100644 index 000000000..052cafcad --- /dev/null +++ b/tests/unit/destination/az/test_read.py @@ -0,0 +1,45 @@ +from unittest.mock import MagicMock + +import azure.core.exceptions as ae +import pytest +from azure.storage.blob import StorageStreamDownloader + +from twindb_backup.destination.exceptions import FileNotFound + +from .util import mocked_az + +EXAMPLE_FILE = "test/backup.tar.gz" + + +def test_read_success(): + """Tests AZ.read method, ensuring the blob is read from azure.""" + c = mocked_az() + mock = MagicMock(StorageStreamDownloader) + c._container_client.download_blob.return_value = mock + + c.read(EXAMPLE_FILE) + + c._container_client.download_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), encoding="utf-8") + mock.read.assert_called_once() + + +def test_read_fail(): + """Tests AZ.read method, re-raises an exception on failure""" + c = mocked_az() + c._container_client.download_blob.side_effect = ae.HttpResponseError() + + with pytest.raises(Exception): + c.read(EXAMPLE_FILE) + c._container_client.download_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), encoding="utf-8") + + +def test_read_fail_not_found(): + """Tests AZ.read method, raising a twindb_backup.destination.exceptions.FileNotFound exception on ResourceNotFoundError""" + c = mocked_az() + c._container_client.download_blob.side_effect = ae.ResourceNotFoundError() + + with pytest.raises( + FileNotFound, match=f"File {c.render_path(EXAMPLE_FILE)} does not exist in container {c._container_name}" + ): + c.read(EXAMPLE_FILE) + c._container_client.download_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), encoding="utf-8") diff --git a/tests/unit/destination/az/test_render_path.py b/tests/unit/destination/az/test_render_path.py new file mode 100644 index 000000000..6873057f5 --- /dev/null +++ b/tests/unit/destination/az/test_render_path.py @@ -0,0 +1,8 @@ +from .util import mocked_az + + +def test_render_path(): + """Test render_path method, ensuring the remote path is prepended to the path.""" + c = mocked_az() + + assert c.render_path("test") == f"{c.remote_path}/test" diff --git a/tests/unit/destination/az/test_save.py b/tests/unit/destination/az/test_save.py new file mode 100644 index 000000000..0cafd2717 --- /dev/null +++ b/tests/unit/destination/az/test_save.py @@ -0,0 +1,37 @@ +from typing import BinaryIO +from unittest.mock import MagicMock + +import azure.core.exceptions as ae +import pytest + +from .util import mocked_az + +EXAMPLE_FILE = "test/backup.tar.gz" + + +def test_save_success(): + """Tests AZ.save method, ensuring the blob is saved to azure.""" + c = mocked_az() + handler = MagicMock(BinaryIO) + file_obj = MagicMock() + handler.__enter__.return_value = file_obj + handler.__exit__.return_value = None + + c.save(handler, EXAMPLE_FILE) + + c._container_client.upload_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), file_obj) + + +def test_save_fail(): + """Tests AZ.save method, re-raises an exception on failure""" + c = mocked_az() + handler = MagicMock(BinaryIO) + file_obj = MagicMock() + handler.__enter__.return_value = file_obj + handler.__exit__.return_value = None + c._container_client.upload_blob.side_effect = ae.HttpResponseError() + + with pytest.raises(Exception): + c.save(handler, EXAMPLE_FILE) + + c._container_client.upload_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), file_obj) diff --git a/tests/unit/destination/az/test_write.py b/tests/unit/destination/az/test_write.py new file mode 100644 index 000000000..993039395 --- /dev/null +++ b/tests/unit/destination/az/test_write.py @@ -0,0 +1,27 @@ +import azure.core.exceptions as ae +import pytest + +from .util import mocked_az + +EXAMPLE_FILE = "test/backup.tar.gz" +CONTENT = b"test content" + + +def test_write_success(): + """Tests AZ.write method, ensuring the blob is written to azure.""" + c = mocked_az() + + c.write(CONTENT, EXAMPLE_FILE) + + c._container_client.upload_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), CONTENT, overwrite=True) + + +def test_write_fail(): + """Tests AZ.write method, re-raises an exception on failure""" + c = mocked_az() + c._container_client.upload_blob.side_effect = ae.HttpResponseError() + + with pytest.raises(Exception): + c.write(CONTENT, EXAMPLE_FILE) + + c._container_client.upload_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), CONTENT, overwrite=True) diff --git a/tests/unit/destination/az/util.py b/tests/unit/destination/az/util.py new file mode 100644 index 000000000..8b221f9fe --- /dev/null +++ b/tests/unit/destination/az/util.py @@ -0,0 +1,54 @@ +import collections +from unittest.mock import MagicMock, patch + +from azure.storage.blob import ContainerClient + +import twindb_backup.destination.az as az + + +class AZParams(collections.Mapping): + def __init__(self, only_required=False) -> None: + self.container_name = "test_container" + self.connection_string = "DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" + + if not only_required: + self.hostname = "test_host" + self.chunk_size = 123 + self.remote_path = "/himom" + + def __iter__(self): + return iter(self.__dict__) + + def __len__(self): + return len(self.__dict__) + + def __getitem__(self, key): + return self.__dict__[key] + + +class AZConfigParams(collections.Mapping): + def __init__(self, only_required=False) -> None: + self.connection_string = "test_connection_string" + self.container_name = "test_container" + + if not only_required: + self.chunk_size = 123 + self.remote_path = "/himom" + + def __iter__(self): + return iter(self.__dict__) + + def __len__(self): + return len(self.__dict__) + + def __getitem__(self, key): + return self.__dict__[key] + + +def mocked_az(): + with patch("twindb_backup.destination.az.AZ._connect") as mc: + mc.return_value = MagicMock(spec=ContainerClient) + p = AZParams() + c = az.AZ(**dict(p)) + + return c diff --git a/tests/unittests/azblob_testing/__init__.py b/tests/unittests/azblob_testing/__init__.py deleted file mode 100644 index e448ab726..000000000 --- a/tests/unittests/azblob_testing/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -from pathlib import Path - -HERE = Path(__file__).parent - - -def do_set_osenvs(setter_func): - here = Path(HERE) - target_dummies = None - while target_dummies is None and here.name: - if "dummy_env_vars.json" not in here.iterdir(): - here = here.parent - else: - target_dummies = str(here.joinpath("dummy_env_vars.json")) - setter_func(target_dummies) - - -PART_NAMES = "store,host,container,interval,media_type,fname".split(",") -SAMPLE_TARGETS = [ - "../../../.../.../mysql-2020-07-29_01_00_03.xbstream.gz", - "../../mysql/some/extra/dirs/mysql-2020-07-29_01_00_03.xbstream.gz", - "../../../../../mysql-fullbackup-qa1-rms", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_03_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_04_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_05_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_06_00_04.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_07_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_08_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_09_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_10_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_11_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_12_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_13_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_14_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_15_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_16_00_04.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_17_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_18_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_19_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_20_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_21_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_22_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_23_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-29_00_05_13.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-29_01_00_03.xbstream.gz", - "azure://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-29_01_00_03.xbstream.gz", - "azure://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-29_01_00_03.xbstream.gz", -] diff --git a/tests/unittests/azblob_testing/destination_tests/test_azblob_functionality.py b/tests/unittests/azblob_testing/destination_tests/test_azblob_functionality.py deleted file mode 100644 index 0f8d80961..000000000 --- a/tests/unittests/azblob_testing/destination_tests/test_azblob_functionality.py +++ /dev/null @@ -1,1189 +0,0 @@ -import io -import logging -import os -import sys -import time -import types -import unittest -from contextlib import contextmanager -from pathlib import Path -from typing import Dict, List, Optional, Tuple - -# third-party imports -import coverage -from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError - -# azure imports (also a third-party import) ;) -from azure.storage.blob import BlobClient, BlobProperties, ContainerClient -from azure.storage.blob._shared.response_handlers import PartialBatchErrorException - -from tests.unittests.azblob_testing import PART_NAMES, SAMPLE_TARGETS, do_set_osenvs - -# local project imports -from twindb_backup import LOG - -DO_TEST_SKIPPING = False - - -def get_root(path: Path, dir_name: str): - if path.name and path.name == dir_name: - return path - return get_root(path.parent, dir_name) - - -def handle_coverage(): - root = get_root(Path(__file__).parent, "backup") - - @contextmanager - def cover_ctx(): - cov = coverage.Coverage(data_file=str(root.joinpath("cov/.coverage"))) - cov.start() - try: - yield - finally: - cov.stop() - cov.save() - cov.html_report() - - return cover_ctx - - -test_function_logger = LOG -test_function_logger.setLevel(0) - - -class AzureBlobBaseCase(unittest.TestCase): - """No direct tests provided here. This class defines basic setup of testing resources which subclasses will need.""" - - @staticmethod - def _reproduce_potential_import_error(err: ImportError, msg): - def repeatable_raiser(*args, **kwargs): - nonlocal err - try: - raise ImportError(msg) from err - except ImportError as ie: - # creation of trimmed traceback inspired by the stack-overflow answer found here: - # https://stackoverflow.com/a/58821552/7412747 - tb = sys.exc_info()[2] - back_frame = tb.tb_frame.f_back - back_tb = types.TracebackType( - tb_next=None, - tb_frame=back_frame, - tb_lasti=back_frame.f_lasti, - tb_lineno=back_frame.f_lineno, - ) - immediate_err = ie.with_traceback(back_tb) - raise immediate_err - - return repeatable_raiser - - # noinspection PyUnresolvedReferences - @classmethod - def setUpClass(cls) -> None: - """Provides a class level function that will only be run one time - when this TestCase instance is first initialized.""" - try: - from twindb_backup.destination.azblob import AzureBlob - except ImportError as ie: - msg = f"Attempted use of object twindb_backup.destination.azblob.AzureBlob failed due to import error" - AzureBlob = cls._reproduce_potential_import_error(ie, msg) - try: - from twindb_backup.destination.azblob import logger - - # during testing it would be nice to see all console log output (if any). - logger.setLevel(0) - except ImportError as ie: - pass - if "PRIMARY_TEST_CONN_STR" not in os.environ: - from tests.unittests.excluded_env_config.build_out_dummy_env import set_osenvs - - logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(logging.WARNING) - do_set_osenvs(set_osenvs) - conn_str = os.environ["PRIMARY_TEST_CONN_STR"] - conn_str_split = conn_str.split(";") - conn_str_parts = {} - for part in conn_str_split: - try: - k, v = [s for s in part.split("=", 1)] - conn_str_parts[k] = v - except ValueError as ve: - obj = [v for v in part.split("=", 1)] - k, v = obj - ve.args += part, obj, len(obj), k, v - raise ve - # conn_str_parts = {k:v for part in conn_str.split(";") for k,v in part.split("=",1)} - cls._connection_string = conn_str - cls._remote_protocol = conn_str_parts["DefaultEndpointsProtocol"] - remote_host = cls._remote_host = f'{conn_str_parts["AccountName"]}.blob.{conn_str_parts["EndpointSuffix"]}' - cls._remote_path_template = os.environ["TEST_COMPLETE_REMOTE_PATH_TEMPLATE"] - cls._AzureBlob = AzureBlob - sample_resources = Path(__file__).resolve().parent - while not any("sample_resources" in str(p) for p in sample_resources.iterdir()): - sample_resources = sample_resources.parent - cls._sample_resource_folder = sample_resources.joinpath("sample_resources") - sample_targets = cls._sample_targets = SAMPLE_TARGETS - for i in range(len(sample_targets)): - sample_targets[i] = sample_targets[i].format(host=remote_host) - cls._part_names = PART_NAMES - cls._arg_names = [ - "default_protocol", - "default_host_name", - "default_container_name", - "default_interval", - "default_media_type", - "default_fname_prefix", - ] - cls._blank_parts = { - "default_protocol": "", - "default_host_name": "", - "default_container_name": "", - "default_interval": "", - "default_media_type": "", - "default_fname_prefix": "", - } - cls._none_parts = { - "default_protocol": None, - "default_host_name": None, - "default_container_name": None, - "default_interval": None, - "default_media_type": None, - "default_fname_prefix": None, - } - cls._basic_src_dst_kwargs = { - "protocol": "https", - "host_name": f"{remote_host}", - "container_name": "{default_container_name}", - "interval": "{default_interval}", - "media_type": "mysql", - "fname_prefix": "{default_fname_prefix}", - } - cls._container_names: Optional[str] = None - # For clarification on the following class members and their structure, - # see their associated properties defined below. - cls._basic_remote_dest_path: Optional[str] = None - cls._basic_remote_source_path: Optional[str] = None - cls._complex_invalid_remote_paths: Optional[List[Tuple[str, Dict[str, str], Dict[str, str]]]] = None - cls._complex_valid_remote_paths: Optional[Dict[str, List[Tuple[str, Dict[str, str], Dict[str, str]]]]] = None - cls._easy_container_names_map: Optional[Dict[str, str]] = None - cls._simple_valid_remote_paths: Optional[List[Tuple[str, Dict[str, str], Dict[str, str]]]] = None - cls._simple_valid_save_source_paths: Optional[List[str]] = None - cls._structured_parts: Optional[Dict[str, Dict[str, str]]] = None - cls._unique_backup_locations: Optional[Tuple[str]] = None - - @property - def AzureBlob(self): - return self._AzureBlob - - @property - def basic_remote_source_path(self): - if not self._basic_remote_source_path: - self._basic_remote_source_path = self._remote_path_template[: -len("{fname}")].format( - **self._basic_src_dst_kwargs - ) - return self._basic_remote_source_path - - @property - def basic_remote_dest_path(self): - if not self._basic_remote_dest_path: - self._basic_remote_dest_path = self._remote_path_template[: -len("{fname}")].format( - **self._basic_src_dst_kwargs - ) - return self._basic_remote_dest_path - - @property - def complex_valid_remote_paths(self): - if not self._complex_invalid_remote_paths: - # create mutable_parts as a deep copy of structured_parts - mutable_parts = {k: {kk: vv for kk, vv in v.items()} for k, v in self.structured_parts.items()} - path_template = "{default_protocol}://{default_host_name}/{default_container_name}/{default_interval}/{default_media_type}/{default_fname_prefix}" - self._complex_valid_remote_paths = { - "sub_all": [ - ( - "", - {k: v for k, v in mutable_parts[name].items()}, - self.structured_parts[name], - ) - for name in mutable_parts - ] - } - split_point = len("default_") - # build out a suite of test inputs that have part-wise substitution changes marked - for part in self._arg_names: - # marks the part to flag for substitution - [mutable_parts[name].update({part: "..."}) for name in mutable_parts] - _part = part[split_point:] - sub_part = f"sub_{_part}" - self._complex_valid_remote_paths[sub_part] = [ - ( - path_template.format(**args_d), - {part: self.structured_parts[name][part]}, - self.structured_parts[name], - ) - for name, args_d in mutable_parts.items() - ] - # reset the flagged part with its original value in preparation for next loop. - [mutable_parts[name].update({part: self.structured_parts[name][part]}) for name in mutable_parts] - return self._complex_valid_remote_paths - - @property - def complex_invalid_remote_paths(self): - if not self._complex_invalid_remote_paths: - blank_parts = self._blank_parts - none_parts = self._none_parts - self._complex_invalid_remote_paths = [ - # (f"azure://{cls._remote_host}/barney-of-buffalo-lodge/hourly/mysql/",{}), - ("../../../hourly/mysql/", {}), - ("../../../hourly/mysql/", blank_parts), - ("../../../hourly/mysql/", none_parts), - ( - f"../../../https://{self._remote_host}/{self._structured_parts['wilma']['default_container_name']}/hourly/mysql/", - {}, - ), - ( - f"../../../https://{self._remote_host}/{self._structured_parts['wilma']['default_container_name']}/hourly/mysql/", - blank_parts, - ), - ( - f"../../../https://{self._remote_host}/{self._structured_parts['wilma']['default_container_name']}/hourly/mysql/", - none_parts, - ), - # (f"https://{cls._remote_host}/wilma-of-impossibly-good-figure/daily/mysql/",{}), - (f"https://{self._remote_host}/.../daily/mysql/", {}), - (f"https://{self._remote_host}/.../daily/mysql/", blank_parts), - (f"https://{self._remote_host}/.../daily/mysql/", none_parts), - # (f"azure://{cls._remote_host}/betty-of-impossibly-good-figure/weekly/mysql/",{}), - (f"azure://{self._remote_host}/.../", {}), - (f"azure://{self._remote_host}/.../", blank_parts), - (f"azure://{self._remote_host}/.../", none_parts), - # (f"https://{cls._remote_host}/fred-of-buffalo-lodge/monthly/mysql/",{}), - (f"https://{self._remote_host}/", {}), - (f"https://{self._remote_host}/", blank_parts), - (f"https://{self._remote_host}/", none_parts), - ] - return self._complex_invalid_remote_paths - - @property - def connection_string(self): - return self._connection_string - - @property - def container_names(self): - if not self._container_names: - self._container_names = os.environ["TEST_CONTAINER_NAMES"].split(";") - self._container_names.extend( - "save-function-test,write-function-test,delete-function-test,combo-all-flintstones".split(",") - ) - return self._container_names - - @property - def easy_container_names(self): - if not self._easy_container_names_map: - self._easy_container_names_map = {v.split("-")[0]: v for v in self.container_names} - return self._easy_container_names_map - - @property - def part_names(self): - return self._part_names - - @property - def remote_path_template(self): - return self._remote_path_template - - @property - def sample_targets(self): - return self._sample_targets - - @property - def simple_valid_remote_paths(self): - if not self._simple_valid_remote_paths: - none_parts = self._none_parts - blank_parts = self._blank_parts - remote_host = self._remote_host - self._simple_valid_remote_paths = [ - ( - f"https://{remote_host}/barney-of-buffalo-lodge/hourly/mysql/backup/", - {}, - {}, - ), - ( - f"https://{remote_host}/barney-of-buffalo-lodge/hourly/mysql/backup/", - blank_parts, - {}, - ), - ( - f"https://{remote_host}/barney-of-buffalo-lodge/hourly/mysql/backup/", - none_parts, - {}, - ), - ( - f"https://{remote_host}/barney-of-buffalo-lodge/hourly/mysql/backup/", - self.structured_parts["barney"], - {}, - ), - ( - f"https://{remote_host}/wilma-of-impossibly-good-figure/daily/mysql/backup/", - {}, - {}, - ), - ( - f"https://{remote_host}/wilma-of-impossibly-good-figure/daily/mysql/backup/", - blank_parts, - {}, - ), - ( - f"https://{remote_host}/wilma-of-impossibly-good-figure/daily/mysql/backup/", - none_parts, - {}, - ), - ( - f"https://{remote_host}/wilma-of-impossibly-good-figure/daily/mysql/backup/", - self.structured_parts["wilma"], - {}, - ), - ( - f"https://{remote_host}/betty-of-impossibly-good-figure/weekly/mysql/backup/", - {}, - {}, - ), - ( - f"https://{remote_host}/betty-of-impossibly-good-figure/weekly/mysql/backup/", - blank_parts, - {}, - ), - ( - f"https://{remote_host}/betty-of-impossibly-good-figure/weekly/mysql/backup/", - none_parts, - {}, - ), - ( - f"https://{remote_host}/betty-of-impossibly-good-figure/weekly/mysql/backup/", - self.structured_parts["betty"], - {}, - ), - ( - f"https://{remote_host}/fred-of-buffalo-lodge/monthly/mysql/backup/", - {}, - {}, - ), - ( - f"https://{remote_host}/fred-of-buffalo-lodge/monthly/mysql/backup/", - blank_parts, - {}, - ), - ( - f"https://{remote_host}/fred-of-buffalo-lodge/monthly/mysql/backup/", - none_parts, - {}, - ), - ( - f"https://{remote_host}/fred-of-buffalo-lodge/monthly/mysql/backup/", - self.structured_parts["fred"], - {}, - ), - ] - for path, kwargs, out in self._simple_valid_remote_paths: - self._get_remote_parts(path, kwargs, out) - return self._simple_valid_remote_paths - - @property - def simple_valid_save_source_paths(self): - if not self._simple_valid_save_source_paths: - save_trunkate_len = len("backup/") - self._simple_valid_save_source_paths = [p[:-save_trunkate_len] for p in self.unique_backup_locations] - return self._simple_valid_save_source_paths - - @property - def structured_parts(self): - if not self._structured_parts: - remote_host = self._remote_host - self._structured_parts = { - "barney": { - "default_protocol": "https", - "default_host_name": f"{remote_host}", - "default_container_name": "barney-of-buffalo-lodge", - "default_interval": "hourly", - "default_media_type": "mysql", - "default_fname_prefix": "", - }, - "betty": { - "default_protocol": "https", - "default_host_name": f"{remote_host}", - "default_container_name": "betty-of-impossibly-good-figure", - "default_interval": "weekly", - "default_media_type": "mysql", - "default_fname_prefix": "", - }, - "wilma": { - "default_protocol": "https", - "default_host_name": f"{remote_host}", - "default_container_name": "wilma-of-impossibly-good-figure", - "default_interval": "daily", - "default_media_type": "mysql", - "default_fname_prefix": "", - }, - "fred": { - "default_protocol": "https", - "default_host_name": f"{remote_host}", - "default_container_name": "fred-of-buffalo-lodge", - "default_interval": "monthly", - "default_media_type": "mysql", - "default_fname_prefix": "", - }, - } - return self._structured_parts - - @property - def unique_backup_locations(self): - if not self._unique_backup_locations: - self._unique_backup_locations = tuple(set(p for p, _, _ in self._simple_valid_remote_paths)) - return self._unique_backup_locations - - @staticmethod - def _get_remote_parts(path: str, kwargs: dict, out: dict): - """ - "default_protocol" - "default_host_name" - "default_container_name" - "default_interval" - "default_media_type" - "default_fname_prefix" - - :param path: - :type path: - :param kwargs: - :type kwargs: - :param out: - :type out: - :return: - :rtype: - """ - path = path.rstrip("/") - _path = path - part_names = [ - "default_host_name", - "default_container_name", - "default_interval", - "default_media_type", - "default_fname_prefix", - ] - if path: - protocol, sep, path = path.partition("://") - if not path: - path = protocol - protocol = "" - out["default_protocol"] = protocol or kwargs.get("default_protocol", "") - for name in part_names[:-1]: - if not path: - break - part, _, path = path.partition("/") - if not path: - path = part - part = "" - kpart = kwargs.get(name, "") - out[name] = part or kpart - else: - name = part_names[-1] - part, _, path = path.partition("/") - kpart = kwargs.get(name, "") - out[name] = part or kpart - else: - out.update(kwargs) - - def _cleanup_remote(self): - delete_count = 0 - for kwargs in self.structured_parts.values(): - remote = self.AzureBlob( - self.basic_remote_source_path.format(**{k: v.strip(":/") for k, v in kwargs.items()}), - self.connection_string, - ) - delete_targets = [f for f in remote.list_files() if any(s in f for s in ("backup", "delete"))] - if not delete_targets: - continue - parts = [f.partition("://")[2].split("/")[1:] for f in delete_targets] - containers = [fparts[0] for fparts in parts] - full_fnames = ["/".join(fparts[1:]) for fparts in parts] - container_map = {} - for cont, fname in zip(containers, full_fnames): - container_map.setdefault(cont, []).append(fname) - containers = tuple(container_map.keys()) - with remote.connection_manager(containers) as cmanager: - cclients: list[ContainerClient] = cmanager.client - for cclient in cclients: - targets = container_map[cclient.container_name] - cclient.delete_blobs(*targets) - delete_count += len(targets) - return delete_count - - -class TC_000_ImportsTestCase(unittest.TestCase): - def test_00_successful_imports(self): - from twindb_backup.destination.azblob import AzureBlob - - def test_01_correct_os_environs(self): - from tests.unittests.excluded_env_config.build_out_dummy_env import set_osenvs - - do_set_osenvs(set_osenvs) - - -class TC_001_AzureBlobInstantiationTestCase(AzureBlobBaseCase): - def test_01_complex_valid_remote_paths(self) -> None: - expected: dict - for sub_type, sub_args in self.complex_valid_remote_paths.items(): - for remote_path, kwargs, expected in sub_args: - dest = self.AzureBlob(remote_path, self.connection_string, **kwargs) - attr: str - expected_val: str - for attr, expected_val in expected.items(): - produced_val = getattr(dest, attr) - expected_val = expected_val.strip(":/") - with self.subTest( - objective="checks if dest's computed properties match expectations, where dest is an instance of the twindb_backup.destinations.azblob.AzureBlob class", - sub_type=sub_type, - remote_path=remote_path, - kwargs=kwargs, - expected=expected, - attr=attr, - produced_val=produced_val, - expected_val=expected_val, - ): - self.assertEqual( - produced_val, - expected_val, - msg=( - f"\n\t{sub_type=}" - f"\n\t{remote_path=}" - f"\n\t{kwargs=}" - f"\n\t{expected=}" - f"\n\t{attr=}" - f"\n\t{produced_val=}" - f"\n\t{expected_val=}" - ), - ) - - def test_00_simple_valid_remote_paths(self) -> None: - expected: dict - for remote_path, kwargs, expected in self.simple_valid_remote_paths: - dest = self.AzureBlob(remote_path, self.connection_string, **kwargs) - attr: str - val: str - for attr, val in expected.items(): - with self.subTest( - objective="checks if dest's computed properties match expectations, where dest is an instance of the twindb_backup.destinations.azblob.AzureBlob class", - remote_path=remote_path, - kwargs=kwargs, - expected=expected, - attr=attr, - expected_val=val, - ): - self.assertEqual(getattr(dest, attr), val) - - -class TC_002_ListFilesTestCase(AzureBlobBaseCase): - """Tests an AzureBlob class instance's ability to produce a valid list of files when - given a relative path to some file or directory root in the same storage account as - its connection-string is associated with. - - When given an invalid path, that is incorrectly configured or asking for a file name - that doesn't exist, the correct behavior should be to return an empty list, and not - raise any errors. - - """ - - def setUp(self) -> None: - kwargs = self.structured_parts["fred"] - remote_path = self.basic_remote_source_path.format(**{k: v.strip(":/") for k, v in kwargs.items()}) - self.remote_source = self.AzureBlob(remote_path, self.connection_string) - self.expected = {} - for parent, parts_dict in self.structured_parts.items(): - self.expected[parent] = [] - container = parts_dict["default_container_name"] - fnames = os.environ[container.strip("/").replace("-", "_").upper()].split(";") - path = "{default_protocol}://{default_host_name}/{default_container_name}/{{fname}}".format(**parts_dict) - for fname in fnames: - self.expected[parent].append(path.format(fname=fname)) - non_suspend_brk = 0 - - def test_00_list_files_recursive_no_args(self): - retrieved = [ - f - for f in self.remote_source.list_files( - prefix=self.remote_source.default_container_name, - recursive=True, - files_only=True, - ) - if not f.endswith("sticker.png") - ] - expected = [name for cname, names in self.expected.items() for name in names] - for retrieved_f in retrieved: - path_f = Path(retrieved_f) - with self.subTest( - objective="confirm that retrieved_f is among our expected files list.", - retrieved_f=retrieved_f, - expected=expected, - ): - self.assertIn(retrieved_f, expected, f"\n\t{retrieved_f=}\n\t{expected=}") - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_01_list_files_prefixed(self): - dest = self.remote_source - # prefix:str=None, recursive=False, files_only=False - pref_expected = [ - (".../", 6), - ("...", 6), - (".../hourly", 0), - (".../monthly", 6), - (".../monthly/mysql", 6), - (".../monthly/mysql/does_not_exist", 0), - (".../hourly/mysql", 0), - (".../.../does_not_exist", 0), - (".../hourly/does_not_exist", 0), - (".../monthly/does_not_exist", 0), - (".../.../mysql", 6), - ("barney-of-buffalo-lodge", 6), - ("barney*/", 6), - ("barney-of-buffalo-lodge/hourly/mysql", 6), - ] - tf_patterns = [ - (False, False), - (True, False), - (True, True), - (False, True), - ] - testable_prefixes = [ - ( - dict(prefix=prefix, recursive=recursive, files_only=files_only), - expected_res_len, - ) - for prefix, expected_res_len in pref_expected - for recursive, files_only in tf_patterns - ] - for _kwargs, expected_ret_len in testable_prefixes: - retrieved = dest.list_files(**_kwargs) - ret_str = "\n\t\t".join(retrieved) - ret_len = len(retrieved) - kwarg_str = "\n\t\t".join(f"{k}: {v}" for k, v in _kwargs.items()) - failure_msg = ( - f"A prefix of {_kwargs} should result in {expected_ret_len}, actual retrieval got {ret_len}, files found." - f"\n\t{dest.default_protocol=}" - f"\n\t{dest.default_host_name=}" - f"\n\t{dest.default_container_name=}" - f"\n\t{dest.default_interval=}" - f"\n\t{dest.default_media_type=}" - f"\n\t{dest.default_fname_prefix=}" - f"\n\tkwargs=\n\t\t{kwarg_str}" - f"\n\tretrieved=\n\t\t{ret_str}" - ) - with self.subTest( - objective="ensure that the number of returned files for given prefixes matches expectations", - ret_len=ret_len, - expected_ret_len=expected_ret_len, - _kwargs=_kwargs, - ): - self.assertEqual(ret_len, expected_ret_len, failure_msg) - - -class TC_003_ReadTestCase(AzureBlobBaseCase): - def setUp(self) -> None: - kwargs = self.structured_parts["fred"] - src_path = self.basic_remote_source_path.format(**kwargs) - self.remote_source = self.AzureBlob(src_path, self.connection_string) - self.local_copy_location = self._sample_resource_folder.joinpath("remote_example") - container_paths = tuple(self.local_copy_location.iterdir()) - flist = [] - expected_data = {} - for cpath in container_paths: - ref = expected_data.setdefault(cpath.name, {}) - for bpath in cpath.rglob("**/*.txt"): - ref[bpath.name] = bpath.read_text() - flist.append(bpath) - smallest_file = min( - filter( - lambda p: self.remote_source.default_container_name in p.parts, - flist, - ), - key=lambda p: p.stat().st_size, - ) - self.smallest_file = str(smallest_file).split(self.remote_source.default_container_name)[1].lstrip("/") - self.expected_data = expected_data - - def test_read(self): - targets = tuple( - filter( - lambda s: self.smallest_file in s, - self.remote_source.list_files(), - ) - ) - containers = tuple(self.expected_data.keys()) - for f in targets: - _, _, path = f.partition("://") - parts = path.split("/") - container = parts[1] - if container in containers and "likes.dinosaurs.txt" in f: - test_function_logger.debug( - f"Running test on:" - f"\n\ttarget={f}\n\tas_bytes={False}\n\tcontainer={container}\n\tfname={parts[-1]}" - ) - with self.subTest( - objective="evaluate if data read from remote blob correctly matches the seed data stored locally.", - container=container, - fname=parts[-1], - target_file=f, - ): - data = self.remote_source.read(f) - expected = self.expected_data[container][parts[-1]] - data = data.decode("utf-8") - self.assertEqual(len(data), len(expected)) - self.assertMultiLineEqual(data, expected) - - -class TC_004_DeleteTestCase(AzureBlobBaseCase): - def setUp(self) -> None: - """ - Creates a temporary container (named delete-function-test) in the configured Azure blob storage endpoint, - and populates it with files copied from the "wilma-of-impossible-figure" sample container. - This container and its contents will be cleaned up at the end of each test function in this test-case. - """ - dst_container = self.test_container = self.easy_container_names["delete"] - src_container = self.easy_container_names["wilma"] - kwargs = {k: (v if "prefix" not in k else "").strip(":/") for k, v in self.structured_parts["wilma"].items()} - src_path = self.basic_remote_source_path.format(**kwargs) - kwargs["default_container_name"] = dst_container - dst_path = self.basic_remote_dest_path.format(**kwargs) - src = self.azure_source = self.AzureBlob(src_path, self.connection_string, False) - dest = self.azure_del_target = self.AzureBlob(dst_path, self.connection_string, True) - blob_names = [p.split(src_container)[1][1:] for p in src.list_files(src_container)] - self.participating_files = [] - with dest.connection_manager(dest.default_container_name) as cont_iter: - iter_type = next(cont_iter) - if iter_type != "ContainerClient": - from twindb_backup.destination.azblob import AzureClientManagerError - - raise AzureClientManagerError("Failed to get the right type of blob iterator") - dst_client: ContainerClient = next(cont_iter) - with src.connection_manager(src.default_container_name, blob=blob_names) as client_iterator: - iter_type = next(client_iterator) - if iter_type != "BlobClient": - from twindb_backup.destination.azblob import AzureClientManagerError - - raise AzureClientManagerError("Failed to get the right type of blob iterator") - copy_polls = [] - for src_bclient in client_iterator: - src_bclient: BlobClient - bname = src_bclient.blob_name - src_url = src_bclient.url - dst_bclient: BlobClient = dst_client.get_blob_client(bname) - self.participating_files.append((bname, src_url, dst_bclient.url)) - copy_polls.append(dst_bclient.start_copy_from_url(src_url)) - tries = 0 - while copy_polls and tries < 100: - for i in range(len(copy_polls) - 1, -1, -1): - if copy_polls[i]["copy_status"] == "success": - copy_polls.pop(i) - tries += 1 - - def tearDown(self) -> None: - with self.azure_source.connection_manager(self.test_container) as cont_iter: - iter_type = next(cont_iter) - if iter_type != "ContainerClient": - from twindb_backup.destination.azblob import AzureClientManagerError - - raise AzureClientManagerError("Failed to get the right type of blob iterator") - for client in cont_iter: - client: ContainerClient - try: - client.delete_blobs(*(tpl[2] for tpl in self.participating_files)) - except PartialBatchErrorException: - pass - - def test_00_delete_one_file(self): - del_target = self.participating_files[0][2] - self.azure_del_target.delete(del_target) - remaining_files = self.azure_del_target.list_files(".../.../.../") - readable_remaining = [f.split(self.test_container)[1] for f in remaining_files] - with self.subTest( - objective="ensure that once a file is deleted, it does not a member of the updated list of remaining_files", - del_target=del_target, - remaining_files=readable_remaining, - ): - self.assertNotIn(del_target, remaining_files) - for _, _, should_remain in self.participating_files[1:]: - with self.subTest( - objective="ensure that files not specified for deletion still remain", - should_remain=should_remain, - del_target=del_target, - remaining_files=readable_remaining, - ): - self.assertIn(should_remain, remaining_files) - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_01_delete_multiple_files(self): - del_targets = self.participating_files[1::2] - remaining_participants = self.participating_files[::2] - for target in del_targets: - self.azure_del_target.delete(target[2]) - remaining_files = self.azure_del_target.list_files(".../.../.../") - readable_remaining = [f.split(self.test_container)[1] for f in remaining_files] - for target in del_targets: - del_target = target[2] - with self.subTest(del_target=del_target, remaining_files=readable_remaining): - self.assertNotIn(del_target, remaining_files) - for _, _, should_remain in remaining_participants: - with self.subTest(should_remain=should_remain, remaining_files=readable_remaining): - self.assertIn(should_remain, remaining_files) - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_02_delete_all(self): - for bname, src_url, dst_url in self.participating_files: - self.azure_del_target.delete(dst_url) - remaining_files = [ - f - for f in self.azure_del_target.list_files(".../.../.../") - for p, _, fname in [f.rpartition("/")] - if fname and fname != "delete-function-test" - ] - if remaining_files: - self.fail(f"Failed to delete all files in target container: {remaining_files}") - - -class TC_005_WriteTestCase(AzureBlobBaseCase): - """Tests the different ways the `AzureBlob.write(...)` function can be called. - - We are drawing the source data from a single Azure storage subscription and writing the data back to the same - subscription in a different location. So, before running this set of tests, run the tests in the ReadTestCase class - to ensure proper source data is being provided to the writer. - """ - - def setUp(self) -> None: - self.test_container = self.easy_container_names["write"] - self.src_kwargs = self.structured_parts["barney"] - self.dst_kwargs = {k: v for k, v in self.src_kwargs.items()} - self.dst_kwargs["default_container_name"] = self.test_container - - self.local_copy_location = self._sample_resource_folder.joinpath("remote_example") - container_paths = tuple( - p for p in self.local_copy_location.iterdir() if p.name == self.src_kwargs["default_container_name"] - ) - smallest_file = min( - (p for c in container_paths for p in c.rglob("**/*.txt")), - key=lambda p: p.stat().st_size, - ) - self.smallest_file = str(smallest_file).split(self.src_kwargs["default_container_name"], 1)[1].lstrip("/") - - def test_00_write_generated_data_overwrite_fail(self): - test_str_content = "This is a simple and small bit of text to write to the destination_tests endpoint" - dest = self.AzureBlob( - self.basic_remote_dest_path.format(**self.dst_kwargs), - self.connection_string, - can_do_overwrites=False, - ) - pstr = dest.remote_path + "/overwrite.target.txt" - with self.subTest(content=test_str_content, path=pstr): - err = None - try: - dest.write(test_str_content, pstr) - except BaseException as be: - err = be - self.assertIsInstance(err, ResourceExistsError) - - def smallest_file_filter(self, file_url: str): - return self.smallest_file in file_url - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_01_write_generated_data_overwrite_ok(self): - test_str_content = "This is a simple and small bit of text to write to the destination_tests endpoint" - dest = self.AzureBlob( - self.basic_remote_dest_path.format(**self.dst_kwargs), - self.connection_string, - can_do_overwrites=True, - ) - pstr = dest.remote_path + "/overwrite.target.txt" - with self.subTest(content=test_str_content, path=pstr): - try: - dest.write(test_str_content, pstr) - except BaseException as be: - self.fail(f"Failed to write to target file with exception details:\n\t{type(be)}: {be.args}") - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_02_write_from_remote_overwrite_ok(self): - - source = self.AzureBlob( - self.basic_remote_source_path.format(**self.src_kwargs), - self.connection_string, - ) - dest = self.AzureBlob( - self.basic_remote_dest_path.format(**self.dst_kwargs), - self.connection_string, - can_do_overwrites=True, - ) - src_flist = tuple(filter(self.smallest_file_filter, source.list_files(".../"))) - for spath in src_flist: - parts = spath.partition("://")[2].split("/") - container = parts[1] - bname = "/".join(parts[2:]) - with BlobClient.from_connection_string(self.connection_string, container, bname) as bclient: - bclient: BlobClient - bprops: BlobProperties = bclient.get_blob_properties() - size = bprops.size - with source.get_stream(spath) as content: - parts[1] = dest.default_container_name - dpath = "/".join(parts) - with self.subTest(content_len=size, spath=spath, dpath=dpath): - try: - dest.write(content, dpath) - except BaseException as be: - self.fail(f"Failed to write to target file with exception details:\n\t{type(be)}: {be.args}") - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_03_write_from_remote_overwrite_fail(self): - source = self.AzureBlob( - self.basic_remote_source_path.format(**self.src_kwargs), - self.connection_string, - ) - dest = self.AzureBlob( - self.basic_remote_dest_path.format(**self.dst_kwargs), - self.connection_string, - can_do_overwrites=False, - ) - src_flist = tuple(filter(self.smallest_file_filter, source.list_files(".../"))) - for spath in src_flist: - parts = spath.partition("://")[2].split("/") - container = parts[1] - bname = "/".join(parts[2:]) - with BlobClient.from_connection_string(self.connection_string, container, bname) as bclient: - bclient: BlobClient - bprops: BlobProperties = bclient.get_blob_properties() - size = bprops.size - with source.get_stream(spath) as content: - parts[1] = dest.default_container_name - dpath = "/".join(parts) - with self.subTest(content_len=size, spath=spath, dpath=dpath): - self.assertRaises(ResourceExistsError, dest.write, content, dpath) - - -class TC_006_SaveTestCase(AzureBlobBaseCase): - def setUp(self) -> None: - remote_dest_target = Path(self.basic_remote_dest_path.partition("://")[2]) - dparts = remote_dest_target.parts - container_names = self.container_names - container: str - container, *_ = tuple(cont for cont in container_names if "betty" in cont) - self.source_container = container - self.dest_container = self.easy_container_names["save"] - fnames = os.environ[container.upper().replace("-", "_")].split(";") - dparts = dparts[0], self.dest_container, fnames[0].rpartition("/")[0] - sparts = dparts[0], self.source_container, fnames[0].rpartition("/")[0] - remote_dest_target = "https://" + "/".join(dparts) - remote_src_target = "https://" + "/".join(sparts) - self.remote_dest_target = remote_dest_target - self.remote_src_target = remote_src_target - self.dest = self.AzureBlob(remote_dest_target, self.connection_string) - self.source = self.AzureBlob(remote_src_target, self.connection_string) - local_copy = self._sample_resource_folder.joinpath("remote_example") - local_copy = tuple(p for p in local_copy.iterdir() if "betty-of" in str(p))[0] - local_copy = list(local_copy.iterdir()) - while not all(p.suffix and p.suffix == ".txt" for p in local_copy): - extension = [] - for p in local_copy: - extension.extend(p.iterdir()) - local_copy = extension - local_copy = [min(local_copy, key=lambda s: Path(s).stat().st_size)] - self.local_target_files = local_copy - # ".../.../.../" tells our destination instance to use its default names for [protocol, host, container] - remote_blob_names = [] - for p in local_copy: - rel = ".../.../.../" + str(p).split(self.source_container)[1].lstrip("/") - remote_blob_names.append(rel) - self.remote_blob_names = remote_blob_names - - # because we are testing our destination with the overwrite parameter set to false, we need to make - # sure our destination does not already exist. - with ContainerClient.from_connection_string(self.connection_string, self.dest_container) as cclient: - cclient: ContainerClient - try: - cclient.delete_blobs(*cclient.list_blobs()) - except ResourceNotFoundError: - pass - self.local_target_files = sorted(self.local_target_files, key=lambda p: p.name) - self.remote_blob_names = sorted(self.remote_blob_names, key=lambda s: s.rpartition("/")[2]) - self.smallest_file = ( - str(min(self.local_target_files, key=lambda p: p.stat().st_size)) - .split(self.source.default_container_name)[1] - .strip("/") - ) - - def tearDown(self) -> None: - cclient: ContainerClient = ContainerClient.from_connection_string(self.connection_string, self.dest_container) - try: - cclient.delete_blobs(*cclient.list_blobs()) - finally: - cclient.close() - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_00_save_from_local_fd(self): - for local_p, remote_p in zip(self.local_target_files, self.remote_blob_names): - if self.smallest_file not in str(local_p): - continue - with open(local_p, "rb") as f: - expected = f.read() - f.seek(0) - try: - self.dest.save(f, remote_p) - except ResourceExistsError: - self.fail("attempting to save to destination that already exists is a known failure condition.") - results = self.dest.read(remote_p) - with self.subTest( - objective="ensure that round-trip data transfer, starting in a local file, does not change or lose the data", - local_path=local_p, - remote_path=remote_p, - ): - self.assertEqual( - results, - expected, - "We've written from byte file to remote, " - "then read the stored contents back into a new bytes object for comparison.", - ) - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_01_save_from_remote_stream(self): - source_file_urls = self.source.list_files( - ".../.../.../", - True, - str(self.local_target_files[0]).split(self.source.default_container_name)[1].lstrip("/"), - True, - ) - for p in source_file_urls: - if self.smallest_file not in p: - continue - dpath = ".../.../.../" + p.split(self.source.default_container_name)[1].lstrip("/") - with self.subTest( - objective="ensure that round-trip data transfer, starting in a remote blob, does not change or lose the data", - src_path=p, - dst_path=dpath, - ): - with self.source.get_stream(p) as stream_in: - with self.subTest(stream_in=stream_in.fileno()): - try: - self.dest.save(stream_in, dpath) - except BaseException as be: - self.fail(f"Failed to save content to destination:\n\t{type(be)}: {be.args}") - - -class TC_007_StreamTestCase(AzureBlobBaseCase): - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_00_acquire_pipe_per_file(self): - src_kwargs = self.structured_parts["fred"] - source = self.AzureBlob( - self.basic_remote_source_path.format(**src_kwargs), - self.connection_string, - ) - sample_content_relative_path = ( - "backup/sample_resources/remote_example/fred-of-buffalo-lodge/monthly/mysql".split("/") - ) - here = Path(__file__).parent.resolve() - while here.name and here.name != sample_content_relative_path[0]: - here = here.parent - sample_path = here.joinpath("/".join(sample_content_relative_path[1:])) - expected_total_bytes = 0 - paths = [] - for p in sample_path.iterdir(): - paths.append("/".join(p.parts[-3:])) - with open(p, "rb") as f: - f.seek(0, 2) - expected_total_bytes += f.tell() - test_function_logger.debug(f"{expected_total_bytes=}") - expected_type = type(b"blah").__name__ - bytes_recieved = 0 - for p in paths: - dtypes = set() - with source.get_stream(f".../.../{source.default_container_name}/{p}") as stream_pipe: - stream_pipe: io.FileIO - try: - strt = time.perf_counter() - while time.perf_counter() - strt < 4: - data = stream_pipe.read() - data_type = type(data).__name__ - dtypes.add(data_type) - if data: - strt = time.perf_counter() - bytes_recieved += len(data) - test_function_logger.debug(f"{bytes_recieved=}") - except EOFError: - pass - for dtype in dtypes: - with self.subTest( - objective="Ensure that the data type (bytes/str/int) sent over pipe connection match expectations", - expected_output_type=expected_type, - actual_output_type=dtype, - path=p, - ): - self.assertEqual(dtype, expected_type) - with self.subTest( - objective="Ensure that all of the data sent into the pipe is was collected on the other side.", - expected_total_bytes=expected_total_bytes, - bytes_recieved=bytes_recieved, - ): - self.assertEqual(expected_total_bytes, bytes_recieved) - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_01_acquire_pipe_per_container(self): - src_kwargs = self.structured_parts["fred"] - source = self.AzureBlob( - self.basic_remote_source_path.format(**src_kwargs), - self.connection_string, - ) - sample_content_relative_path = "backup/sample_resources/remote_example".split("/") - here = Path(__file__).parent.resolve() - while here.name and here.name != sample_content_relative_path[0]: - here = here.parent - sample_path = here.joinpath("/".join(sample_content_relative_path[1:])) - expected_total_bytes = 0 - paths = [] - for p in sample_path.rglob(f"**/{source.default_container_name}/**/*.txt"): - paths.append("/".join(p.parts[-3:])) - with open(p, "rb") as f: - f.seek(0, 2) - expected_total_bytes += f.tell() - test_function_logger.debug(f"{expected_total_bytes=}") - expected_type = type(b"blah").__name__ - bytes_recieved = 0 - dtypes = set() - with source.get_stream(f".../.../.../") as stream_pipe: - stream_pipe: io.FileIO - try: - strt = time.perf_counter() - while time.perf_counter() - strt < 4: - data = stream_pipe.read() - data_type = type(data).__name__ - dtypes.add(data_type) - if data: - strt = time.perf_counter() - bytes_recieved += len(data) - test_function_logger.debug(f"{bytes_recieved=}") - except EOFError: - pass - for dtype in dtypes: - with self.subTest( - objective="Ensure that the data type (bytes/str/int) sent over pipe connection match expectations", - expected_output_type=expected_type, - actual_output_type=dtype, - ): - self.assertEqual(dtype, expected_type) - with self.subTest( - objective="Ensure that no data was mishandled or lost when passed through the pipe.", - expected_total_bytes=expected_total_bytes, - bytes_recieved=bytes_recieved, - ): - self.assertEqual(expected_total_bytes, bytes_recieved) - - -def main(): - cover_ctx_manager = handle_coverage() - with cover_ctx_manager(): - unittest.TextTestRunner().run(unittest.TestLoader().loadTestsFromTestCase(TC_000_ImportsTestCase)) - print("done") - dbg_break = 0 - - -if __name__ == "__main__": - # main() - unittest.main(verbosity=2) diff --git a/tests/unittests/azblob_testing/environment_access_tests/test_remote_data_generation.py b/tests/unittests/azblob_testing/environment_access_tests/test_remote_data_generation.py deleted file mode 100644 index a3cc847de..000000000 --- a/tests/unittests/azblob_testing/environment_access_tests/test_remote_data_generation.py +++ /dev/null @@ -1,88 +0,0 @@ -import os -import unittest - - -class CustomLocalEnvTestCase(unittest.TestCase): - def test_set_osenvs(self): - from tests.unittests.excluded_env_config.build_out_dummy_env import set_osenvs - - def single_equality(a, b): - return a == b - - def sequence_equality(a, b): - if len(a) != len(b): - return False - for i, (_a, _b) in enumerate(zip(a, b)): - if _a != _b: - return False - return True - - # set_osenvs(be_silent=False, use_multi_proc=False) - set_osenvs() - expected_test_interval = ( - "hourly", - "daily", - "weekly", - "monthly", - "yearly", - ) - expected_test_path_parts = ( - "protocol", - "host", - "container", - "interval", - "media_type", - "fname_prefix", - "fname", - ) - expected_test_complete_remote_path_template = ( - "{protocol}://{host_name}/{container_name}/{interval}/{media_type}/{fname_prefix}{fname}" - ) - env_vars = [ - ("test_intervals".upper(), ";", sequence_equality), - ("test_path_parts".upper(), ";", sequence_equality), - ( - "test_complete_remote_path_template".upper(), - None, - single_equality, - ), - ] - expected_vals = [ - expected_test_interval, - expected_test_path_parts, - expected_test_complete_remote_path_template, - ] - dead_tests = [] - for i, (name, *_) in enumerate(env_vars): - with self.subTest( - objective="check if '{}' variable is in os.environ".format(name), - environment_var=name, - ): - try: - check = os.environ[name] - except BaseException as be: - dead_tests.append(i) - for i in dead_tests[::-1]: - env_vars.pop(i) - expected_vals.pop(i) - - for (name, sep, comp), expected in zip(env_vars, expected_vals): - val = os.environ[name] - if sep: - val = val.split(sep) - with self.subTest( - objective="confirm that the configured values match expectations", - environment_var=name, - environment_val=val, - expected=expected, - ): - self.assertTrue( - comp(val, expected), - "{name} did not produce expected value:\n\tgot: {val}\n\texpected: {expected}".format( - name=name, val=val, expected=expected - ), - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unittests/excluded_env_config/README.md b/tests/unittests/excluded_env_config/README.md deleted file mode 100644 index edfb59179..000000000 --- a/tests/unittests/excluded_env_config/README.md +++ /dev/null @@ -1,120 +0,0 @@ -### Regarding `dummy_env_vars.json.template` - -## General purpose: - -`dummy_env_vars.json.template` is a template that you can copy past into a `dummy_env_vars.json` file with your own -values defined. - -It is meant to provide a pythonic mechanism to quickly, and easily, set up the necessary components for testing the -Azure blob storage extension to the twindb/backup project. - -## Quick overview of the file's structure - -#### Minimal key/value pairs for file to serve its purpose - -For the example values, we use a [Flintstones](https://en.wikipedia.org/wiki/The_Flintstones) naming theme to aid -in making it clear where you should supplement your own values. - -```json -{ - "os.environ": { - "comments": [ - "The `os.environ` key is a dict of environment variables that should be created prior to testing", - "the general structure of this dict should look something like this: env_vars['os.environ']['destination_container']" - ], - "test_destination": { - "comments": [ - "The value associated with 'PRIMARY_TEST_CONN_STR' is just a placeholder but it also serves to show", - "the expected structure of the connection string" - ], - "PRIMARY_TEST_CONN_STR": "DefaultEndpointsProtocol=https;AccountName=from_the_town_of_bedrock;AccountKey=hAVE+4+Ya8Ado/time+a+DAb4do/TIME+a+/Y4b4/d484/d0+tIMe==;EndpointSuffix=flintstones.meet.the.flintstones.net", - "INTERVALS": ["hourly","daily","weekly","monthly","yearly"], - "PATH_PARTS":["protocol","host","container","interval","media_type","fname_prefix","fname"], - "COMPLETE_REMOTE_PATH_TEMPLATE": "{protocol}://{host_name}/{container_name}/{interval}/{media_type}/{fname_prefix}{fname}" - } - }, - "dummy_vals": { - "comments": [ - "This is where we define container names and the blob paths under those containers for use in testing." - ], - "container_names": [ - "fred-of-buffalo-lodge", - "barney-of-buffalo-lodge", - "wilma-of-impossibly-good-figure", - "betty-of-impossibly-good-figure" - ], - "fname_template": { - "comments": [ - "this dict is used by tests/unittests/excluded_env_config/build_out_dummy_env.py", - "to build a mock environment for testing." - ], - "optional_directory_prefix": "{interval}/mysql", - "format_string": ["{child}{sep}{disposition}{sep}{item_type}.{extension}"], - "template_parts": { - "sep": ".", - "child": "pebbles|bambam", - "disposition": "likes|hates", - "item_type": "dinosaurs|caves|cave_paintings", - "extension": "txt" - } - } - } -} - - -``` - -## Quick explanation of the component key/value pairs - -```json -{ - "os.environ": { - "comments": [ - "The `os.environ` key is a dict of environment variables that should be created prior to testing", - "the general structure of this dict should look something like this: env_vars['os.environ']['destination_container']" - ], - "test_destination": { - "PRIMARY_TEST_CONN_STR": "This should be the connection string for your target Azure subscription as defined here:\n https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python#copy-your-credentials-from-the-azure-portal" - } - }, - "dummy_vals": { - "comments": [ - "This is where we define container names and the blob paths under those containers for use in testing." - ], - "container_names": [ - "best", - "not", - "change", - "unless", - "you", - "also", - "change", - "test", - "scripts", - "to", - "match" - ], - "fname_template": { - "comments": [ - "this dict is used by tests/unittests/excluded_env_config/build_out_dummy_env.py", - "to build a mock environment for testing." - ], - "optional_directory_prefix": "{interval}/mysql", - "format_string": [ - "{child}{sep}{disposition}{sep}{item_type}.{extension}" - ], - "template_parts": { - "sep": ".", - "child": "pebbles|bambam", - "disposition": "likes|hates", - "item_type": "dinosaurs|caves|cave_paintings", - "extension": "txt" - } - } - } -} -``` - -"the dictionaries that follow are examples of expected data structures": "key names inside chevrons, E.G. <>, are -optional and can be named however you like, all other's are minimum requirements\n\tall values are dummy examples and -should be replaced according to your own account details.", diff --git a/tests/unittests/excluded_env_config/_dummy_content.json b/tests/unittests/excluded_env_config/_dummy_content.json deleted file mode 100644 index 0ec3e7140..000000000 --- a/tests/unittests/excluded_env_config/_dummy_content.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "brands": ["Fruity Cards", "Launchpad Fruity", "Fruity Excellent", "Endurance Fruity", "SolidRock Fruity", "Totem Fruity", "Fruity Scouts", "QuantumLeap Fruity", "Fruity Design", "Fruity Fun", "Clearance Fruity", "Micro Fruity", "Starter Fruity", "Fruity Clip", "Fruity Study", "Tunnel Fruity", "Depot Fruity", "Vigor Fruity", "Fruity Reader", "Venue Fruity", "Shop Fruity", "Fruity Speak", "Fruity Chef", "Hobby Fruity", "Monkey Fruity", "Scion Fruity", "Magical Fruity", "Destiny Fruity", "Intrepid Fruity", "Acclaim Fruity", "Fruity Patch", "Axion Fruity", "TopChoice Fruity", "Savage Fruity", "Mustang Fruity", "Fruity Glory", "Big Fruity", "Fruity Watcher", "Fruity Future", "Fruity Secure", "Fruity Army", "Watch Fruity", "AlphaOmega Fruity", "Fruity Eye", "Fruity Concept", "Fruity Dash", "Season Fruity", "OpenMind Fruity", "Fruity Field", "Kronos Fruity", "Fruity Vid", "Skylark Fruity", "Fruity Ist", "Leader Fruity", "Magma Fruity", "Slick Fruity", "Quantum Fruity", "Fruity Dome", "Continuum Fruity"], - "names": ["Nigel Bradley","Saffa Avalos","Filip Hughes","Shanay Oakley","Nataniel Camacho","Samad Hensley","Mallory Pritchard","Abigale Farrow","Ophelia English","Adelina Sutherland","Francesco Cain","Camden Newman","Lamar Ellis","Cherish Osborne","Cari Sharples","Mya Schmitt","Eren Hills","Juanita Moreno","Emer Swan","Jimi Marsh","Tayyib Williams","Reya Wu","Janine Holmes","Calista York","Khalid Michael","Adil Conley","Winifred Mendoza","Eoghan Coffey","Liam Farley","Pia Hastings","Nola Bernal","Wesley Meyer","Elis Thatcher","Ottilie Haley","Leigh Landry","Boris Humphries","Wyatt Partridge","Mitchell Beltran","Codey Carr","Alys Sutton","Wren Li","Campbell Lloyd","Emmie Chase","Shahzaib Kim","Nada Webster","Said Patel","Cheyenne Odom","Emrys Witt","Lucia Francis","Zack Gough","Ailsa Flores","Leandro Bowman","Ayda Bloom","Rojin Adkins","Nathaniel Gentry","Archibald Mercado","Kevin Pennington","Summer Correa","Annabell Jensen","Tyreece Rubio","Collette Weeks","Menna Ponce","Anastazja Ritter","Iman Rennie","Valerie Roy","Nicole Pacheco","Kimberly Dunkley","Anderson Perez","Aleyna Wilder","Buddy Evans","Quinn Kendall","Stuart Mooney","Elmer Wise","Eryk Key","Blade Oliver","Maariyah Decker","Alice Legge","Kenny Noble","Camilla Shah","Imogen Acevedo","Renesmee Simons","Skye Gilbert","August Cruz","Chad Baxter","Tiana Donaldson","Shania Sharpe","Salma Guy","Ibrahim Morales","Iram Sears","Oliwia Olson","Michele Fisher","Connar Durham","Om Wang","Rhiann Thorne","Eduard Hassan","Elizabeth Pate","Rex Downes","Ava-Mae Maddox","Kaya Gray","Helen Hendricks"], - "affixes": ["a-","an-","-acanth","acantho-","-cantho","amphi-","-anthus","antho-","arch-","archi-","archo-","-archus","archaeo-","-arctos","arcto-","arthro-","aspido-","-aspis","-avis","-bates","brachi-","brachy-","bronto-","-canth","cantho-","-acanth","acantho-","carcharo-","-cephalus","cephalo-","-cephale","-cephalian","-ceras","cerat-","-ceratus","cetio-","-cetus","-cheirus","chloro-","coel-","cyan-","cyano-","cyclo-","cyn-","-cyon","-dactyl","-dactylus","-deres","-derm","deino-","dino-","deino-","-delphys","-delphis","delpho-","dendro-","-dendron","-dendrum","di-","dino-","deino-","diplo-","-don","-dont","-donto-","-odon","-odont","-odonto-","dromaeo-","dromeo-","-dromeus","eo-","-erpeton","eu-","-felis","-form","-formes","giga-","giganto-","-gnath-","gnatho-","-gnathus","hemi-","hespero-","hippus","hippo-","hyl-","hylo-","-ia","ichthyo-","-ichthys","-lania","-lepis","lepido-","-lestes","long","-lopho-","-lophus","macro-","-maia","maia-","mega-","megalo-","micro-","mimo-","-mimus","-monas","-monad","-morph","-nax","-anax-","-noto-","-nych","nycho-","-nyx","-onych","onycho-","-onyx","-odon","-odont","-odonto-","-oides","-odes","onycho-","-onychus","-onyx","-ops","-ornis","ornith-","ornitho-","pachy-","para-","-pelta","-philus","-phila","philo-","-phyton","-phyta","phyto-","-phyte","-pithecus","pitheco-","platy-","plesio-","plesi-","-pod","podo-","-pus","pro-","protero-","proto-","psittaco-","-psitta","pter-","ptero-","-pterus","pteryg-","-ptera","-pteryx","-pus","-pod","-podo-","-pus","-raptor","raptor-","-rex","-rhina","rhino-","-rhinus","rhodo-","rhynco-","-rhynchus","sarco-","saur","sauro-","-saurus","smilo-","-smilus","-spondylus","squali-","squalo-","stego-","-stega","strepto-","-stoma","-stome","-stomus","sucho-","-suchus","-teuthis","thero-","-therium","thylac-","tri-","titano-","-titan","tyranno-","-tyrannus","-urus","-uro-","veloci-","-venator","xeno-","-zoon","-zoa"] -} diff --git a/tests/unittests/excluded_env_config/build_out_dummy_env.py b/tests/unittests/excluded_env_config/build_out_dummy_env.py deleted file mode 100644 index bc0676225..000000000 --- a/tests/unittests/excluded_env_config/build_out_dummy_env.py +++ /dev/null @@ -1,418 +0,0 @@ -import concurrent.futures as cf -import gc -import json -import logging -import os -from contextlib import contextmanager -from pathlib import Path -from typing import Any, List, Union - -from azure.storage.blob import BlobClient, BlobProperties, BlobServiceClient, ContainerClient, StorageStreamDownloader - -logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(logging.WARNING) -logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING) - -from tests.unittests.excluded_env_config.dummy_content_generation import cave_gen, dino_gen, painting_gen -from twindb_backup import LOG - -PATH_HERE = Path(__file__).parent - -MIN_SAMPLE_SIZE = 2**31 -BLOCK_SIZE = 2**24 # 2**24 == 16 MiB -INTERVAL_MAPPING = { - "pebbles": { - "wilma": "daily", - "fred": "monthly", - }, - "bambam": { - "barney": "hourly", - "betty": "weekly", - }, -} -TARGET_FILE_HISTORY = set() -DUMMY_OBJ = object() - - -def get_local_cache_location(): - here = Path(PATH_HERE) - relative_dirs = "backup/sample_resources/remote_example".split("/") - while here.name and relative_dirs[0] != here.name: - here = here.parent - return here.parent.joinpath("/".join(relative_dirs)) - - -@contextmanager -def _gc_toggle(depth=0): - gc.disable() - try: - yield depth + 1 - finally: - if depth == 0: - gc.enable() - - -def get_key_manager(keychain: list): - @contextmanager - def key_master(val): - keychain.append(val) - try: - yield - finally: - keychain.pop() - - return key_master - - -def content_gen_wrapper(blob_names: dict, for_parent: str, sep): - """This function depends upon the structure of the input argument blob_names which should be derived from the - dummy_vals portion of the dummy_env_vas.json file.""" - - def part_gen(child, child_blobs): - def inner(parent, blob_name, size): - nonlocal content, correct_parents - try: - if parent in correct_parents: - yield True - gen = content[blob_name] - yield from gen(size) - else: - yield False - except BaseException as be: - be.args += ( - { - "parent": parent, - "child": child, - "blob_name": blob_name, - "size": size, - }, - ) - raise be - - correct_parents = parent_map[child] - gen_map = {"dinosaurs": dino_gen, "caves": cave_gen} - # content is a precomputed mapping of data generators used by our inner function to simulate human-readable data - content = { - blob: gen_map.get(blob.rsplit(child + sep, 1)[1].split(sep)[1], painting_gen) for blob in child_blobs - } - return inner - - parent_map = {childkey: {*INTERVAL_MAPPING[childkey]} for childkey in INTERVAL_MAPPING} - ret = { - parent: part_gen(kid, blob_names[kid]) - for kid, parents in parent_map.items() - for parent in parents - if parent == for_parent - } - return ret - - -def make_blobs(container_name, fname_template_dict): - def populate_remote_blob(): - content_map = content_gen_wrapper(children_dict, parent, sep) - with service_client.get_container_client(container_name) as client: - client: ContainerClient - if not client.exists(): - client: ContainerClient = service_client.create_container(container_name) - for child, blobs in children_dict.items(): - # sizes = 2**30,*(block_size for _ in range(len(blobs)-1)) - sizes = 2**27, *(BLOCK_SIZE for _ in range(len(blobs) - 1)) - blob: str - for blob, size in zip(blobs, sizes): - blob = blob.strip() - while blob.startswith("/"): - blob = blob[1:] - person, attitude, item_type, *_ = blob.split(".") - with client.get_blob_client(blob) as bclient: - bclient: BlobClient - cache_file_path = cache_location.joinpath(container_name).joinpath(blob) - cache_file_path.parent.mkdir(parents=True, exist_ok=True) - data_gen = content_map[parent](parent, blob, size) - if not next(data_gen): - continue - if not cache_file_path.exists(): - if bclient.exists(): - bprop: BlobProperties = bclient.get_blob_properties() - _size = bprop.size - LOG.debug( - f"staging {_size} byte content by downloading from {bclient.primary_endpoint}" - ) - with open(cache_file_path, "wb") as f: - dl: StorageStreamDownloader = bclient.download_blob() - dl.readinto(f) - else: - LOG.debug(f"staging {size} byte content before uploading to {bclient.primary_endpoint}") - with open(cache_file_path, "wb") as fd: - fd.writelines(data_gen) - else: - if not bclient.exists(): - with open(cache_file_path, "rb") as fd: - fd.seek(0, 2) # seeks to the end of the file - size = ( - fd.tell() - ) # gets the fd's position which should be the end length of the file - fd.seek(0, 0) # seek back to teh start of the file before we start trying to read - LOG.debug(f"uploading {size} byte content to {bclient.primary_endpoint}") - bclient.upload_blob(data=fd, length=size) - LOG.debug(f"{fd.tell()} byte content uploaded to {bclient.primary_endpoint}") - - blob, person = "", "" - try: - cache_location = get_local_cache_location() - with BlobServiceClient.from_connection_string(os.environ["PRIMARY_TEST_CONN_STR"]) as service_client: - service_client: BlobServiceClient - children_dict, sep = assemble_blob_names(fname_template_dict) - parent = container_name.split("-")[0] - kid_keys = list(children_dict.keys()) - nested_blob_paths = [] - for child in kid_keys: - child_intervals = INTERVAL_MAPPING[child] - if parent in child_intervals: - prefix = fname_template_dict["optional_directory_prefix"].format( - interval=INTERVAL_MAPPING[child][parent] - ) - for i, blob in enumerate(children_dict[child]): - blob = "/".join((prefix, blob)) - nested_blob_paths.append(blob) - children_dict[child][i] = blob - else: - children_dict.pop(child) - populate_remote_blob() - return container_name, nested_blob_paths - except BaseException as be: - be.args += container_name, blob, person - raise be - - -def assemble_blob_names(fname_template_dict): - template_parts = fname_template_dict["template_parts"] - fname_templates = fname_template_dict["format_string"] - sep = template_parts["sep"] - children = template_parts["child"].split("|") - dispositions = template_parts["disposition"].split("|") - items_types = template_parts["item_type"].split("|") - extension = template_parts["extension"] - fmt_kwargs = dict(sep=sep, extension=extension) - blob_names = {} - for child in children: - ref = blob_names.setdefault(child, []) - fmt_kwargs["child"] = child - for disposition in dispositions: - fmt_kwargs["disposition"] = disposition - for itype in items_types: - fmt_kwargs["item_type"] = itype - for template in fname_templates: - ref.append(template.format(**fmt_kwargs)) - return blob_names, sep - - -def crawler(data: dict, target_key: Any = DUMMY_OBJ, target_val: Any = DUMMY_OBJ): - """A support function to craw nested container objects searching for the given targets""" - - def do_dict(d: dict): - nonlocal keys_ctx - for k, v in d.items(): - with keys_ctx(k): - if k == target_key: - yield tuple(keychain), v - yield from enter(v) - - def do_sequence(d: Union[list, tuple]): - nonlocal keys_ctx - for k, v in enumerate(d): - with keys_ctx(k): - if k == target_key: - yield tuple(keychain), v - yield from enter(v) - - def do_value(d): - nonlocal keys_ctx - if d == target_val: - yield tuple(keychain), d - - def enter(d): - if isinstance(d, dict): - yield from do_dict(d) - elif isinstance(d, (list, tuple)): - yield from do_sequence(d) - else: - yield from do_value(d) - - keychain = [] - keys_ctx = get_key_manager(keychain) - yield from enter(data) - - -def set_osenvs(target_file: str = None, be_silent: bool = True, use_multi_proc: bool = True): - def validate_conn_str(connStr): - try: - with BlobServiceClient.from_connection_string(connStr) as client: - client: BlobServiceClient - container_list = tuple(client.list_containers()) - if not all( - any(s == c.name for c in container_list) for s in vars_dict["dummy_vals"]["container_names"] - ): - vars_dict["dummy_vals"]["container_names"] = container_list - vars_dict["os.environ"]["test_destination"]["PRIMARY_TEST_CONN_STR"] = connStr - return True - except BaseException as be: - return False - - if target_file is None: - target_file = str(PATH_HERE.joinpath("dummy_env_vars.json")) - if target_file in TARGET_FILE_HISTORY: - return - TARGET_FILE_HISTORY.add(target_file) - filePath = Path(target_file) - if filePath.exists(): - with open(filePath, "r", encoding="UTF-8") as f: - vars_dict = json.load(f) - else: - with open(filePath.with_suffix(".json.template"), "r") as f: - vars_dict = json.load(f) - LOG.info( - "\nWARNING:\n\tNo connection stored on local machine\n\tfor a guide on how to get your connection string see:\n\t\thttps://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python?tabs=environment-variable-windows#copy-your-credentials-from-the-azure-portal" - ) - conn_str = input("Please enter a valid connection string for the target account\n::") - - while not validate_conn_str(conn_str): - conn_str = input( - f"{conn_str} is not a valid connection string" - f"\n\tPlease enter a valid connection string for the target account\n" - ) - print("\nconnection string valid") - with open(filePath, "w") as f: - json.dump(vars_dict, f, indent=4) - for chain, value in tuple(crawler(vars_dict, target_key="comments")): - ref = vars_dict - for k in chain[:-1]: - ref = ref[k] - ref.pop(chain[-1]) - test_dest_vars: dict = vars_dict["os.environ"]["test_destination"] - os.environ["PRIMARY_TEST_CONN_STR"]: str = test_dest_vars["PRIMARY_TEST_CONN_STR"] - os.environ["TEST_INTERVALS"]: str = ";".join(test_dest_vars["INTERVALS"]) - os.environ["TEST_PATH_PARTS"]: str = ";".join(test_dest_vars["PATH_PARTS"]) - os.environ["TEST_COMPLETE_REMOTE_PATH_TEMPLATE"]: str = test_dest_vars["COMPLETE_REMOTE_PATH_TEMPLATE"] - os.environ["TEST_CONTAINER_NAMES"] = ";".join(vars_dict["dummy_vals"]["container_names"]) - populate_remote_containers(vars_dict, be_silent, use_multi_proc) - - -def populate_remote_containers(vars_dict, be_silent: bool, use_multi_proc: bool): - dummy_targets = vars_dict["dummy_vals"] - containers: List[str] = dummy_targets["container_names"] - fname_template_dict: dict = dummy_targets["fname_template"] - container: str - if use_multi_proc: - with cf.ProcessPoolExecutor(os.cpu_count()) as ppe: - ftrs = [] - for loop_container in containers: - # make_blobs(loop_container,fname_template_dict) - ftrs.append(ppe.submit(make_blobs, loop_container, fname_template_dict)) - for ftr in cf.as_completed(ftrs): - if ftr.exception(): - raise ftr.exception() - else: - container, blobs = ftr.result() - LOG.debug(f"{container} completed") - os.environ[container.replace("-", "_").upper()] = ";".join(blobs) - generate_cli_config(container, blobs) - else: - for loop_container in containers: - try: - container, blobs = make_blobs(loop_container, fname_template_dict) - LOG.debug(f"{container} completed") - os.environ[container.replace("-", "_").upper()] = ";".join(blobs) - generate_cli_config(container, blobs) - except BaseException as be: - LOG.error("{}: {}".format(type(be).__name__, repr(be.args))) - if not be_silent: - strings = [] - longest = max(len(k) for k in os.environ) - for k in os.environ: - strings.append(f"{k.strip():<{longest}} : {os.environ[k]}") - LOG.info("\n" + "\n".join(strings)) - - -def generate_cli_config(container: str, blobs: List[str]): - from configparser import ConfigParser - - from twindb_backup import INTERVALS - from twindb_backup import SUPPORTED_DESTINATION_TYPES as SDT - from twindb_backup import SUPPORTED_QUERY_LANGUAGES as SQ - from twindb_backup import XBSTREAM_BINARY, XTRABACKUP_BINARY - from twindb_backup.configuration import DEFAULT_CONFIG_FILE_PATH, RetentionPolicy - - cache_location = get_local_cache_location() - config_root = cache_location.parent.joinpath("configs").resolve() - os.environ["TEST_CONFIGS_ROOT"] = str(config_root) - config_file_path = config_root.joinpath(container).joinpath(DEFAULT_CONFIG_FILE_PATH.split("/")[-1]) - config_file_path.parent.mkdir(parents=True, exist_ok=True) - true_interval, media_type, *fname = blobs[0].split("/") - prefix: str = "/".join(fname[:-1]) - cache_endpoint = cache_location.joinpath("local_store").joinpath(prefix) - if prefix and not prefix.endswith("/"): - prefix += "/" - cache_endpoint.mkdir(parents=True, exist_ok=True) - # fname:str = fname[-1] - conn_str = os.environ["PRIMARY_TEST_CONN_STR"] - conn_parts = {k: v for part in conn_str.split(";") for k, v in (part.split("=", 1),)} - protocol = conn_parts["DefaultEndpointsProtocol"] - host_name = f'{conn_parts["AccountName"]}.{conn_parts["EndpointSuffix"]}' - path_parts = { - "protocol": protocol.strip(":/"), - "host_name": host_name.strip("/"), - "container_name": container.strip("/"), - "interval": true_interval.strip("/"), - "media_type": media_type.strip("/"), - "fname_prefix": prefix, - "fname": "", - } - sql_config = { - "mysql_defaults_file": "/root/.my.cnf", - "full_backup": INTERVALS[1], - "expire_log_days": 7, - "xtrabackup_binary": XTRABACKUP_BINARY, - "xbstream_binary": XBSTREAM_BINARY, - } - mock_config = { - "compression": { - "program": "pigz", - "threads": max(1, os.cpu_count() // 2), - "level": 9, - }, - "gpg": {"recipient": "", "keyring": "", "secret_keyring": ""}, - "intervals": {f"run_{interval}": interval == true_interval for interval in INTERVALS}, - "destination": { - "keep_local_path": True, - "backup_destination": SDT.azure, - }, - "export": { - "transport": "datadog", - "app_key": "some_app_key", - "api_key": "some_api_key", - }, - "source": {"backup_dirs": [str(cache_endpoint)], "backup_mysql": True}, - "retention": {f"{interval}_copies": count for interval, count in RetentionPolicy._field_defaults.items()}, - "retention_local": {f"{interval}_copies": count for interval, count in RetentionPolicy._field_defaults.items()}, - SQ.mysql: sql_config, - SDT.azure: { - "remote_path": os.environ["TEST_COMPLETE_REMOTE_PATH_TEMPLATE"].format(**path_parts), # remote_path - "connection_string": f"'{conn_str}'", # connection_string - "can_do_overwrites": False, # can_do_overwrites - "cpu_cap": os.cpu_count(), # cpu_cap - "max_mem_bytes": 2**24, # max_mem_bytes - "default_protocol": path_parts["protocol"], # default_protocol - "default_host_name": path_parts["host_name"], # default_host_name - "default_container_name": path_parts["container_name"], # default_container_name - "default_interval": path_parts["interval"], # default_interval - "default_media_type": path_parts["media_type"], # default_media_type - "default_fname_prefix": path_parts["fname_prefix"], # default_fname_prefix - }, - } - writer = ConfigParser() - writer.read_dict(mock_config) - with open(config_file_path, "w") as fd: - writer.write(fd) - - -if __name__ == "__main__": - set_osenvs("dummy_env_vars.json") diff --git a/tests/unittests/excluded_env_config/dummy_content_generation/__init__.py b/tests/unittests/excluded_env_config/dummy_content_generation/__init__.py deleted file mode 100644 index 600cd4adf..000000000 --- a/tests/unittests/excluded_env_config/dummy_content_generation/__init__.py +++ /dev/null @@ -1,40 +0,0 @@ -from tests.unittests.excluded_env_config.dummy_content_generation.cave_generation import cave_gen -from tests.unittests.excluded_env_config.dummy_content_generation.cave_painting_desc import painting_gen -from tests.unittests.excluded_env_config.dummy_content_generation.dino_namegen import dino_gen - - -def manual_dummy_file_creation(out_target: str, fsize: int): - def rando_gen(total_len: int) -> bytes: - parts = total_len // 3 - for grp in zip(dino_gen(parts), cave_gen(parts), painting_gen(parts)): - yield from grp - - from pathlib import Path - - out_target = Path(out_target).resolve() - out_target.parent.mkdir(parents=True, exist_ok=True) - with open(out_target, "wb") as f: - f.writelines(rando_gen(fsize)) - - -if __name__ == "__main__": - import concurrent.futures as cf - import os - from random import random - - oneg = 2**30 - mean = 2**33 - half_span = 2**32 - - with cf.ProcessPoolExecutor(os.cpu_count()) as ppe: - args = [] - for i, mult in enumerate([4] * 5 + [9] * 5): - size = int(oneg * mult + (oneg * random()) * round(random() * 2.0 - 1.0)) - name = f"./big_dummies/{size//oneg}_{i}.txt" - args.append((name, size)) - args.sort(key=lambda s: s[1]) - ftrs = [] - for name, size in args: - print(name, size) - ftrs.append(ppe.submit(manual_dummy_file_creation, name, size)) - cf.wait(ftrs) diff --git a/tests/unittests/excluded_env_config/dummy_content_generation/cave_generation.py b/tests/unittests/excluded_env_config/dummy_content_generation/cave_generation.py deleted file mode 100644 index 8d899c8f1..000000000 --- a/tests/unittests/excluded_env_config/dummy_content_generation/cave_generation.py +++ /dev/null @@ -1,50 +0,0 @@ -from random import randint - -biome = ( - ("swamp", "-", "y ", "ed "), - ("desert", "-", "ed "), - ( - "savanna", - "-", - ), - ("mountain", "-", "ous ", "y-"), - ("hill", "top ", "y "), - ("valley", "-", "_floor "), -) -biome = tuple(v for tpl in zip(biome, (("", "") for _ in range(len(biome)))) for v in tpl) -feel = ( - "cozy ", - "damp ", - "dank ", - "spacious ", - "stinky ", - "pleasant ", - "small ", - "large ", - "big ", - "dirty ", - "clean ", -) -look = "open ,hidden ,exposed ,recessed ,majestic ,underwhelming ,high ,low ,deep ,shallow ".split(",") - - -def cave_gen(result_len: int): - alen = len(biome) - 1 - blen = len(feel) - 1 - clen = len(look) - 1 - byte_count = 0 - while byte_count < result_len: - a = biome[randint(0, alen)] - a = a[0] + a[randint(1, len(a) - 1)] - b = feel[randint(0, blen)] - c = look[randint(0, clen)] - abc = a + b + c if a.endswith("y ") or a.endswith("ed ") else b + c + a - abc = abc.replace("-", " ").replace("_", "-") - val = f"A {abc}cave\n".capitalize().encode("utf-8") - yield val - byte_count += len(val) - - -if __name__ == "__main__": - for cave in cave_gen(100): - print(cave) diff --git a/tests/unittests/excluded_env_config/dummy_content_generation/cave_painting_desc.py b/tests/unittests/excluded_env_config/dummy_content_generation/cave_painting_desc.py deleted file mode 100644 index d05e6f0c9..000000000 --- a/tests/unittests/excluded_env_config/dummy_content_generation/cave_painting_desc.py +++ /dev/null @@ -1,29 +0,0 @@ -from itertools import combinations -from random import randint - -descriptors = "simple,busy,abstract,obvious,pretty,scary,large,small,relatable,detailed,complex".split(",") -combos = tuple(tuple(combinations(descriptors, i)) for i in range(1, 5)) -combo_len = len(combos) - 1 -lens = tuple(len(c) - 1 for c in combos) - - -def painting_gen(result_len: int): - byte_count = 0 - while byte_count < result_len: - combo_idx = randint(0, combo_len) - clen = lens[combo_idx] - combo = combos[combo_idx][randint(0, clen)] - if len(combo) > 2: - combo = ", ".join(combo[:-1]) + f", and {combo[-1]}" - elif len(combo) == 2: - combo = ", ".join(combo[:-1]) + f" and {combo[-1]}" - else: - combo = combo[0] - val = f"{combo} types of cave-paintings\n".capitalize().encode("utf-8") - yield val - byte_count += len(val) - - -if __name__ == "__main__": - for s in painting_gen(10): - print(s) diff --git a/tests/unittests/excluded_env_config/dummy_content_generation/dino_namegen.py b/tests/unittests/excluded_env_config/dummy_content_generation/dino_namegen.py deleted file mode 100644 index 259247556..000000000 --- a/tests/unittests/excluded_env_config/dummy_content_generation/dino_namegen.py +++ /dev/null @@ -1,390 +0,0 @@ -"""Filler content generator. -code inspired by that used to create the site: https://www.fantasynamegenerators.com/dinosaur-names.php""" - -import multiprocessing as mp -from itertools import combinations -from multiprocessing import Queue -from queue import Empty -from random import sample -from time import perf_counter - -colors = ( - ("fuscus", "dark"), - ("Nigri", "Black"), - ("aterum", "dark-black"), - ("lividus", "blue-black"), - ("Cyano", "Blue"), - ("Leuco", "White"), - ("Chloro", "Green"), - ("prasino", "green"), - ("purpureus", "purple"), - ("caeruleus", "cerulean"), - ("ravus", "gray"), - ("canus", "light-gray"), - ("albus", "white"), - ("Xantho", "Yellow"), - ("flavus", "yellow"), - ("fulvus", "golden"), - ("aurantium", "orange"), - ("croceus", "saffron"), - ("ruber", "red"), - ("roseus", "rose-red"), -) -colors = tuple(pair for tpl in zip(colors, (("", "") for _ in range(len(colors)))) for pair in tpl) -physical_descriptors1 = ( - ("rhyncho", "Beak"), - ("chelo", "Claw"), - ("podo", "Foot"), - ("cerco", "Tail"), - ("canto", "Spined"), - ("cephalo", "Headed"), - ("donto", "Teeth"), - ("don", "Tooth"), - ("lopho", "Crested"), - ("ploce", "Armored"), - ("plo", "Armored"), - ("rhino", "Nosed"), - ("trachelo", "Necked"), - ("minisculum", "extremely-small"), - ("exigum", "very-small"), - ("minimum", "tiny"), - ("parvum", "small"), - ("vegrande", "not-very-big"), - ("praegrande", "very-big"), - ("magnum", "great"), - ("enorme", "enormous"), - ("immane", "huge"), - ("immensum", "immense"), - ("vastum", "vast"), - ("", ""), -) -physical_descriptors2 = ( - ("Acantho", "Spiny"), - ("Acro", "High"), - ("Aegypto", "Egyptian"), - ("Aepy", "Tall"), - ("Afro", "African"), - ("Agili", "Agile"), - ("Alectro", "Eagle"), - ("Ammo", "Sand"), - ("Anchi", "Near"), - ("Ankylo", "Stiff"), - ("Antarcto", "Antarctic"), - ("Apato", "Deceptive"), - ("Archaeo", "Ancient"), - ("Arrhino", "Without Nose"), - ("Austro", "South"), - ("Avi", "Bird"), - ("Baga", "Small"), - ("Baro", "Heavy"), - ("Bellu", "Fine"), - ("Brachio", "Arm"), - ("Brachy", "Short"), - ("Callio", "Beautiful"), - ("Campto", "Bent"), - ("Carno", "Carnivorous"), - ("Cerato", "Horned"), - ("Chloro", "Green"), - ("Coelo", "Hollow"), - ("Colosso", "Giant"), - ("Cyano", "Blue"), - ("Cyclo", "Round"), - ("Cyrto", "Curved"), - ("Daspleto", "Frightful"), - ("Deino", "Terrible"), - ("Di", "Two"), - ("Dicraeo", "Forked"), - ("Dilipho", "Two Ridged"), - ("Draco", "Dragon"), - ("Dromaeo", "Running"), - ("Drypto", "Tearing"), - ("Echino", "Spiny"), - ("Elaphro", "Fleet"), - ("Eo", "Dawn"), - ("Eu", "Well"), - ("Gampso", "Curved"), - ("Gorgo", "Fierce"), - ("Gymno", "Bare"), - ("Gyro", "Round"), - ("Hadro", "Big"), - ("Haplo", "Simple"), - ("Hespero", "Western"), - ("Hetero", "Different"), - ("Hylaeo", "Woodland"), - ("Kentro", "Spiky"), - ("Krito", "Noble"), - ("Lasio", "Hairy"), - ("Lepto", "Slim"), - ("Leuco", "White"), - ("Lopho", "Crested"), - ("Lurdu", "Heavy"), - ("Macro", "Large"), - ("Masso", "Massive"), - ("Mega", "Large"), - ("Megalo", "Big"), - ("Metria", "Moderately"), - ("Micro", "Tiny"), - ("Mono", "Single"), - ("Nano", "Dwarf"), - ("Nano", "Tiny"), - ("Neo", "New"), - ("Nigri", "Black"), - ("Oro", "Mountain"), - ("Orycto", "Digging"), - ("Ovi", "Egg"), - ("Pachy", "Thick"), - ("Parali", "Tidal"), - ("Peloro", "Monstrous"), - ("Plateo", "Flat"), - ("Platy", "Flat"), - ("Pogono", "Bearded"), - ("Preno", "Sloping"), - ("Prenoce", "Sloping"), - ("Pro", "Before"), - ("Proto", "Before"), - ("Rhab", "Rod"), - ("Rugos", "Wrinkled"), - ("Salto", "Hopping"), - ("Sarco", "Flesh"), - ("Segno", "Slow"), - ("Silvi", "Forest"), - ("Sino", "Chinese"), - ("Spino", "Thorn"), - ("Stego", "Roof"), - ("Steno", "Narrow"), - ("Styraco", "Spiked"), - ("Super", "Super"), - ("Theco", "Socket"), - ("Therizino", "Scythe"), - ("Thescelo", "Wonderful"), - ("Toro", "Bull"), - ("Torvo", "Savage"), - ("Trachy", "Rough"), - ("Trichodo", "Hairy"), - ("Troo", "Wounding"), - ("Tyloce", "Swelling"), - ("Tyranno", "Tyrant"), - ("Veloci", "Quick"), - ("Xantho", "Yellow"), - ("", ""), -) -abstract_descriptors1 = ( - ("bator", "Hero"), - ("ceratops", "Horned Face"), - ("draco", "Dragon"), - ("dromeus", "Runner"), - ("gryphus", "Griffin"), - ("lestes", "Stealer"), - ("mimus", "Mimic"), - ("moloch", "Demon"), - ("raptor", "Plunderer"), - ("rex", "King"), - ("sauropteryx", "Winged Lizard"), - ("saurus", "Lizard"), - ("saura", "Lizard"), - ("sornis", "Bird"), - ("titan", "Giant"), - ("tyrannus", "Tyrant"), - ("venator", "Hunter"), - ("amorabundum", "loving"), - ("excitum", "excited"), - ("confūsum", "confused"), - ("detestabile", "hateful"), - ("felix", "happy"), - ("invidum", "envious"), - ("iratum", "irate"), - ("laetum", "joyful"), - ("miserum", "miserable"), - ("solum", "lonely"), - ("somnolentum", "sleepy"), - ("territum", "terrified"), - ("triste", "sad"), - ("bella", "beautiful"), - ("breve", "short"), - ("cānum", "gray-haired"), - ("casuale", "casual"), - ("decens", "proper"), - ("decorum", "well-mannered"), - ("deforme", "ugly"), - ("elegans", "elegant"), - ("flāvum", "blonde"), - ("formale", "formal"), - ("iuvene", "young"), - ("longe", "tall"), - ("rūfum", "red-haired"), - ("venustum", "lovely"), - ("venustum", "charming"), - ("vetere", "old"), - ("", ""), -) -abstract_descriptors2 = ( - ("don", "Tooth"), - ("bator", "Hero"), - ("canthus", "Spine"), - ("ceras", "Roof"), - ("ceratops", "Horned Face"), - ("docus", "Neck"), - ("draco", "Dragon"), - ("dromeus", "Runner"), - ("gryphus", "Griffin"), - ("lestes", "Stealer"), - ("lodon", "Tooth"), - ("mimus", "Mimic"), - ("moloch", "Demon"), - ("nychus", "Claw"), - ("pelix", "Pelvis"), - ("pelta", "Shield"), - ("cephalus", "Head"), - ("pteryx", "Wing"), - ("pus", "Foot"), - ("raptor", "Plunderer"), - ("rex", "King"), - ("rhinus", "Snout"), - ("rhothon", "Nose"), - ("sauropteryx", "Winged Lizard"), - ("saurus", "Lizard"), - ("saura", "Lizard"), - ("sornis", "Bird"), - ("spondylus", "Vertebrae"), - ("suchus", "Crocodile"), - ("tholus", "Dome"), - ("titan", "Giant"), - ("tyrannus", "Tyrant"), - ("venator", "Hunter"), - ("", ""), -) - -colors = [(v1.strip(), v2.strip()) for v1, v2 in colors] -physical_descriptors1 = [(v1.strip(), v2.strip()) for v1, v2 in physical_descriptors1] -physical_descriptors2 = [(v1.strip(), v2.strip()) for v1, v2 in physical_descriptors2] -abstract_descriptors1 = [(v1.strip(), v2.strip()) for v1, v2 in abstract_descriptors1] -abstract_descriptors2 = [(v1.strip(), v2.strip()) for v1, v2 in abstract_descriptors2] - - -def combination_gen(_colors): - def inner(): - phys = physical_descriptors1 + physical_descriptors2 - abst = abstract_descriptors1 + abstract_descriptors2 - combos = combinations( - ( - _colors, - physical_descriptors1, - physical_descriptors2, - abstract_descriptors1, - abstract_descriptors2, - ), - 5, - ) - for la, a in _colors: - for lp1, p1 in phys: - for lp2, p2 in abst: - if a + p1 + p2: - yield "".join(v for v in (la, lp1, lp2) if v) + " " + " ".join(v for v in (a, p1, p2) if v) - for l1, l2, l3, l4, l5 in combos: - for lp1, p1 in l1: - for lp2, p2 in l2: - for lp3, p3 in l3: - for lp4, p4 in l4: - for lp5, p5 in l5: - if p1 + p2 + p3 + p4 + p5: - yield "".join(v for v in (lp1, lp2, lp3, lp4, lp5) if v) + " " + " ".join( - v for v in (p1, p2, p3, p4, p5) if v - ) - - for name in inner(): - if len(name.split()) > 1: - yield name.capitalize().encode("utf-8") - - -def dino_gen(result_len: int, clrs=None): - data = [] - byte_count = 0 - if clrs is None: - clrs = colors - for d in combination_gen(clrs): - d += b"\n" - yield d - data.append(d) - byte_count += len(d) - if byte_count >= result_len: - break - while byte_count < result_len: - for i in sample(range(len(data)), len(data)): - d = data[i] - yield d - data.append(d) - byte_count += len(d) - if byte_count >= result_len: - break - - -def _gen_wrapper(result_len: int, q: Queue, clrs: tuple = None): - if not clrs: - clrs = colors - batch = [] - for n in dino_gen(result_len, clrs): - batch.append(n) - if len(batch) > 1000: - q.put(b"".join(batch)) - batch = [] - if batch: - q.put(b"".join(batch)) - - -def all_gen(chunk_bytes: int, num_chunks: int): - clr_span = (len(colors) + num_chunks - 1) // num_chunks - q = Queue() - procs = [] - for i in range(0, len(colors), clr_span): - procs.append( - mp.Process( - target=_gen_wrapper, - args=(chunk_bytes, q, colors[i : i + clr_span]), - ) - ) - try: - for proc in procs: - proc.start() - with open("big_file.txt", "wb") as f: - strt = perf_counter() - batch = [] - elapsed = perf_counter() - strt - while elapsed < 5: - print(f"\r[{'|'*(round(100*elapsed/5)):<100}]", end="") - try: - v = q.get(True, 0.5) - if v: - batch.append(v) - strt = perf_counter() - except Empty: - pass - if len(batch) > 10: - f.write(b"".join(batch)) - if f.tell() >= (10 * 2**29): - break - batch = [] - elapsed = perf_counter() - strt - if batch: - f.writelines(batch) - finally: - for proc in procs: - try: - proc.join(2) - if proc.exitcode is None: - try: - proc.terminate() - except: - pass - except: - try: - proc.terminate() - except: - pass - try: - proc.close() - except: - pass - - -if __name__ == "__main__": - all_gen(2**29, 10) diff --git a/tests/unittests/excluded_env_config/dummy_content_generation/no_dupes.py b/tests/unittests/excluded_env_config/dummy_content_generation/no_dupes.py deleted file mode 100644 index ed3d88b3f..000000000 --- a/tests/unittests/excluded_env_config/dummy_content_generation/no_dupes.py +++ /dev/null @@ -1,14 +0,0 @@ -def kill_dupes(fp: str): - s = set() - with open(fp, "rb") as fin: - fin.seek(0) - with open("no_dupes.txt", "wb") as fout: - for line in fin.readlines(): - line = line.strip() - if line not in s: - s.add(line) - fout.write(line + b"\n") - - -if __name__ == "__main__": - kill_dupes("big_file.txt") diff --git a/tests/unittests/excluded_env_config/dummy_env_vars.json.template b/tests/unittests/excluded_env_config/dummy_env_vars.json.template deleted file mode 100644 index 8da678914..000000000 --- a/tests/unittests/excluded_env_config/dummy_env_vars.json.template +++ /dev/null @@ -1,40 +0,0 @@ -{ - "os.environ": { - "comments": [ - "The `os.environ` key is a dict of environment variables that should be created prior to testing", - "the general structure of this dict should look something like this: env_vars['os.environ']['destination_container']" - ], - "test_destination": { - "PRIMARY_TEST_CONN_STR": "DefaultEndpointsProtocol=https;AccountName=from_the_town_of_bedrock;AccountKey=hAVE+4+Ya8Ado/time+a+DAb4do/TIME+a+/Y4b4/d484/d0+tIMe==;EndpointSuffix=flintstones.meet.the.flintstones.net", - "INTERVALS": ["hourly","daily","weekly","monthly","yearly"], - "PATH_PARTS":["protocol","host","container","interval","media_type","fname_prefix","fname"], - "COMPLETE_REMOTE_PATH_TEMPLATE": "{protocol}://{host_name}/{container_name}/{interval}/{media_type}/{fname_prefix}{fname}" - } - }, - "dummy_vals": { - "comments": [ - "This is where we define container names and the blob paths under those containers for use in testing." - ], - "container_names": [ - "fred-of-buffalo-lodge", - "barney-of-buffalo-lodge", - "wilma-of-impossibly-good-figure", - "betty-of-impossibly-good-figure" - ], - "fname_template": { - "comments": [ - "this dict is used by tests/unittests/excluded_env_config/build_out_dummy_env.py", - "to build a mock environment for testing." - ], - "optional_directory_prefix": "{interval}/mysql", - "format_string": ["{child}{sep}{disposition}{sep}{item_type}.{extension}"], - "template_parts": { - "sep": ".", - "child": "pebbles|bambam", - "disposition": "likes|hates", - "item_type": "dinosaurs|caves|cave_paintings", - "extension": "txt" - } - } - } -} diff --git a/tox.ini b/tox.ini index dc7ba558c..c22367226 100644 --- a/tox.ini +++ b/tox.ini @@ -18,9 +18,4 @@ commands = deps=-rrequirements_dev.txt commands= coverage run -m py.test tests/unit - coverage report -; The following lines are candidate's to replace those in commands= (above) so that we can get a cumulative coverage -; report for the existing pytest scripts, as well as the AzureBlob specific unittest scripts. -; coverage run --source=twindb_backup -m py.test tests/unit -; coverage run -a --source=twindb_backup -m unittest discover -cvf --locals --start-directory tests/unittests/destination_tests -; coverage report + coverage report \ No newline at end of file diff --git a/twindb_backup/__init__.py b/twindb_backup/__init__.py index b6efdc0d2..ab7bfe4b5 100644 --- a/twindb_backup/__init__.py +++ b/twindb_backup/__init__.py @@ -57,9 +57,9 @@ class and saves the backup copy in something defined in a destination class. LOG = logging.getLogger(__name__) LOG.setLevel(GLOBAL_INIT_LOG_LEVEL) -DestTypes = namedtuple("DestinationTypes", "ssh,local,s3,gcs,azure") +DestTypes = namedtuple("DestinationTypes", "ssh,local,s3,gcs,az") QueryTypes = namedtuple("QueryTypes", ["mysql"]) -SUPPORTED_DESTINATION_TYPES = DestTypes("ssh", "local", "s3", "gcs", "azure") +SUPPORTED_DESTINATION_TYPES = DestTypes("ssh", "local", "s3", "gcs", "az") SUPPORTED_QUERY_LANGUAGES = QueryTypes("mysql") diff --git a/twindb_backup/configuration/__init__.py b/twindb_backup/configuration/__init__.py index c309ee266..7cd760d68 100644 --- a/twindb_backup/configuration/__init__.py +++ b/twindb_backup/configuration/__init__.py @@ -8,6 +8,7 @@ from twindb_backup import INTERVALS, LOG from twindb_backup.configuration.compression import CompressionConfig +from twindb_backup.configuration.destinations.az import AZConfig from twindb_backup.configuration.destinations.gcs import GCSConfig from twindb_backup.configuration.destinations.s3 import S3Config from twindb_backup.configuration.destinations.ssh import SSHConfig @@ -16,6 +17,7 @@ from twindb_backup.configuration.mysql import MySQLConfig from twindb_backup.configuration.retention import RetentionPolicy from twindb_backup.configuration.run_intervals import RunIntervals +from twindb_backup.destination.az import AZ from twindb_backup.destination.gcs import GCS from twindb_backup.destination.s3 import S3 from twindb_backup.destination.ssh import Ssh @@ -97,6 +99,15 @@ def ssh(self): except NoSectionError: return None + @property + def az(self): # pylint: disable=invalid-name + """Azure Blob configuration""" + try: + return AZConfig(**self.__read_options_from_section("az")) + + except NoSectionError: + return None + @property def s3(self): # pylint: disable=invalid-name """Amazon S3 configuration""" @@ -241,7 +252,14 @@ def destination(self, backup_source=socket.gethostname()): gc_encryption_key=self.gcs.gc_encryption_key, hostname=backup_source, ) - + elif backup_destination == "az": + return AZ( + connection_string=self.az.connection_string, + container_name=self.az.container_name, + chunk_size=self.az.chunk_size, + remote_path=self.az.remote_path, + hostname=backup_source, + ) else: raise ConfigurationError(f"Unsupported destination '{backup_destination}'") except NoSectionError as err: diff --git a/twindb_backup/configuration/destinations/az.py b/twindb_backup/configuration/destinations/az.py new file mode 100644 index 000000000..6d3c03ab6 --- /dev/null +++ b/twindb_backup/configuration/destinations/az.py @@ -0,0 +1,45 @@ +"""Azure Blob Storage destination configuration""" + + +class AZConfig: + """Azure Blob Storage Configuration.""" + + def __init__( + self, connection_string: str, container_name: str, chunk_size: int = 1024 * 1024 * 4, remote_path: str = "/" + ): + self._connection_string = connection_string + self._container_name = container_name + self._chunk_size = chunk_size + self._remote_path = remote_path + self.validate_config() + + def validate_config(self): + """Validate configuration.""" + if not isinstance(self._connection_string, str): + raise ValueError("CONNECTION_STRING must be a string") + if not isinstance(self._container_name, str): + raise ValueError("CONTAINER_NAME must be a string") + if not isinstance(self._chunk_size, int): + raise ValueError("CHUNK_SIZE must be an integer") + if not isinstance(self._remote_path, str): + raise ValueError("REMOTE_PATH must be a string") + + @property + def connection_string(self) -> str: + """CONNECTION_STRING""" + return self._connection_string + + @property + def container_name(self) -> str: + """CONTAINER_NAME""" + return self._container_name + + @property + def chunk_size(self) -> int: + """CHUNK_SIZE""" + return self._chunk_size + + @property + def remote_path(self) -> str: + """REMOTE_PATH""" + return self._remote_path diff --git a/twindb_backup/destination/az.py b/twindb_backup/destination/az.py new file mode 100644 index 000000000..e6ba0a59b --- /dev/null +++ b/twindb_backup/destination/az.py @@ -0,0 +1,246 @@ +# -*- coding: utf-8 -*- +""" +Module for Azure destination. +""" +import builtins +import os +import socket +import typing as t +from contextlib import contextmanager +from multiprocessing import Process + +import azure.core.exceptions as ae +from azure.storage.blob import ContainerClient + +from twindb_backup import LOG +from twindb_backup.copy.base_copy import BaseCopy +from twindb_backup.destination.base_destination import BaseDestination +from twindb_backup.destination.exceptions import FileNotFound + + +class AZ(BaseDestination): + """Azure Blob Storage Destination class""" + + def __init__( + self, + container_name: str, + connection_string: str, + hostname: str = socket.gethostname(), + chunk_size: int = 4 * 1024 * 1024, # TODO: Add support for chunk size + remote_path: str = "/", + ) -> None: + """Creates an instance of the Azure Blob Storage Destination class, + initializes the ContainerClient and validates the connection settings + + Args: + container_name (str): Name of the container in the Azure storage account + connection_string (str): Connection string for the Azure storage account + hostname (str, optional): Hostname of the host performing the backup. Defaults to socket.gethostname(). + chunk_size (int, optional): Size in bytes for read/write streams. Defaults to 4*1024*1024. + remote_path (str, optional): Remote base path in the container to store backups. Defaults to "/". + + Raises: + err: Raises an error if the client cannot be initialized + """ + + self._container_name = container_name + self._connection_string = connection_string + self._hostname = hostname + self._chunk_size = chunk_size + self._remote_path = remote_path + super(AZ, self).__init__(self._remote_path) + + self._container_client = self._connect() + + """HELPER FUNCTIONS + """ + + def _connect(self) -> ContainerClient: + """Connects to an Azure Storage Account and initializes a ContainerClient, + ensures the container exists, creating one if not found + + Raises: + err: Returns an error if the connection string is invalid or we failed to validate the container + + Returns: + ContainerClient: An initialized ContainerClient + """ + + client: ContainerClient = None + + # Create the container client - validates connection string format + try: + client = ContainerClient.from_connection_string(self._connection_string, self._container_name) + except builtins.ValueError as err: + LOG.error(f"Failed to create Azure Client. Error: {type(err).__name__}, Reason: {err}") + raise err + + # Check if the container exists, if not, create it + try: + if not client.exists(): + client.create_container() + except builtins.Exception as err: + LOG.error(f"Failed to validate or create container. Error: {type(err).__name__}, Reason: {err}") + raise err + + return client + + def render_path(self, path: str) -> str: + """Renders the absolute path for the Azure Blob Storage Destination + + Args: + path (str): Relative path to the blob in the container + + Returns: + str: Absolute path to the blob in the container + """ + return f"{self._remote_path}/{path}" + + def _download_to_pipe(self, blob_key: str, pipe_in: int, pipe_out: int) -> None: + """Downloads a blob from Azure Blob Storage and writes it to a pipe + + Args: + blob_key (str): The path to the blob in the container + pipe_in (int): The pipe to read the blob content from, closed in child process. + pipe_out (int): The pipe to write the blob content to, closed in parent process. + """ + os.close(pipe_in) + with os.fdopen(pipe_out, "wb") as pipe_out_file: + try: + self._container_client.download_blob(blob_key).readinto(pipe_out_file) + except builtins.Exception as err: + LOG.error(f"Failed to download blob {blob_key}. Error: {type(err).__name__}, Reason: {err}") + raise err + + """BaseDestination ABSTRACT METHODS IMPLEMENTATION + """ + + def delete(self, path: str) -> None: + """Deletes a blob from the Azure storage account's container + + Args: + path (str): Relative path to the blob in the container to delete + + Raises: + err: Raises an error if the blob failed to be deleted + """ + LOG.debug(f"Attempting to delete blob: {self.render_path(path)}") + try: + self._container_client.delete_blob(self.render_path(path)) + except builtins.Exception as err: + LOG.error(f"Failed to delete blob {self.render_path(path)}. Error: {type(err).__name__}, Reason: {err}") + raise err + + @contextmanager + def get_stream(self, copy: BaseCopy) -> t.Generator[t.BinaryIO, None, None]: + """Streams a blob from Azure Blob Storage into a pipe + + Args: + copy (BaseCopy): A copy object to stream from Azure + + Yields: + t.Generator(t.BinaryIO): A generator that yields a stream of the blob's content + """ + + LOG.debug(f"Attempting to stream blob: {self.render_path(copy.key)}") + pipe_in, pipe_out = os.pipe() + + proc = Process(target=self._download_to_pipe, args=(self.render_path(copy.key), pipe_in, pipe_out)) + proc.start() + os.close(pipe_out) + try: + with os.fdopen(pipe_in, "rb") as pipe_in_file: + yield pipe_in_file + finally: + proc.join() + if proc.exitcode != 0: + LOG.error(f"Failed to stream blob {self.render_path(copy.key)}") + raise builtins.Exception(f"Failed to stream blob {self.render_path(copy.key)}") + + def read(self, filepath: str) -> bytes: + """Read content of a file path from Azure Blob Storage + + Args: + filepath (str): Relative path to a blob in the container + + Raises: + err: Raises an error if the blob failed to be read or it does not exist + + Returns: + bytes: Content of the blob + """ + LOG.debug(f"Attempting to read blob: {self.render_path(filepath)}") + try: + return self._container_client.download_blob(self.render_path(filepath), encoding="utf-8").read() + except ae.ResourceNotFoundError: + LOG.debug(f"File {self.render_path(filepath)} does not exist in container {self._container_name}") + raise FileNotFound(f"File {self.render_path(filepath)} does not exist in container {self._container_name}") + except builtins.Exception as err: + LOG.error(f"Failed to read blob {self.render_path(filepath)}. Error: {type(err).__name__}, Reason: {err}") + raise err + + def save(self, handler: t.BinaryIO, filepath: str) -> None: + """Save a stream given as handler to filepath in Azure Blob Storage + + Args: + handler (t.BinaryIO): Incoming stream + filepath (str): Relative path to a blob in the container + + Raises: + err: Raises an error if the blob failed to be written + """ + + LOG.debug(f"Attempting to save blob: {self.render_path(filepath)}") + with handler as file_obj: + try: + self._container_client.upload_blob(self.render_path(filepath), file_obj) + except builtins.Exception as err: + LOG.error(f"Failed to upload blob or it already exists. Error {type(err).__name__}, Reason: {err}") + raise err + + def write(self, content: str, filepath: str) -> None: + """Write content to filepath in Azure Blob Storage + + Args: + content (str): Content to write to blob + filepath (str): Relative path to a blob in the container + + Raises: + err: Raises an error if the blob failed to be written + """ + + LOG.debug(f"Attempting to write blob: {self.render_path(filepath)}") + try: + self._container_client.upload_blob(self.render_path(filepath), content, overwrite=True) + except builtins.Exception as err: + LOG.error(f"Failed to upload or overwrite blob. Error {type(err).__name__}, Reason: {err}") + raise err + + def _list_files(self, prefix: str = "", recursive: bool = False, files_only: bool = False) -> t.List[str]: + """List files in the Azure Blob Storage container + + Args: + prefix (str, optional): Filters blobs by a given prefix. Defaults to "". + recursive (bool, optional): Not supported. Defaults to False. + files_only (bool, optional): Excludes directories when true, + otherwise includes files and directories. Defaults to False. + """ + LOG.debug( + f"""Listing files in container {self._container_name} with prefix={prefix}, + recursive={recursive}, files_only={files_only}""" + ) + + try: + blobs = self._container_client.list_blobs(name_starts_with=prefix, include=["metadata"]) + except builtins.Exception as err: + LOG.error( + f"Failed to list files in container {self._container_name}. Error: {type(err).__name__}, Reason: {err}" + ) + raise err + + return [ + blob.name + for blob in blobs + if not files_only + or not (bool(blob.get("metadata")) and blob.get("metadata", {}).get("hdi_isfolder") == "true") + ] diff --git a/twindb_backup/destination/azblob.py b/twindb_backup/destination/azblob.py deleted file mode 100644 index 408187641..000000000 --- a/twindb_backup/destination/azblob.py +++ /dev/null @@ -1,1097 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Module for Azure-blob destination. -""" -# builtin module imports -import gc -import io -import multiprocessing as mp -import os -import sys -import time -import traceback -from contextlib import contextmanager -from functools import wraps -from multiprocessing.connection import Connection as mpConn -from pathlib import Path -from textwrap import indent -from typing import AnyStr, Callable, Dict, Generator, Iterable, List, Optional, Tuple, Union - -# Third party module imports -from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError -from azure.storage.blob import ( - BlobClient, - BlobProperties, - BlobServiceClient, - ContainerClient, - ContainerProperties, - StorageStreamDownloader, -) - -# project sub-module imports -from twindb_backup import LOG -from twindb_backup.copy.mysql_copy import MySQLCopy -from twindb_backup.destination.base_destination import BaseDestination -from twindb_backup.destination.exceptions import AzureBlobDestinationError - -IterableClientType = Iterable[Union[BlobServiceClient, ContainerClient, BlobClient]] -DEFAULT_AVAILABLE_CPU = os.cpu_count() -GC_TOGGLE_DEPTH = 0 -"""GC_TOGGLE_DEPTH is used as a reference counter for managing when the _gc_toggle function should call gc.enable().""" -ONE_MiB = 2**20 -MAX_PIPE_CHUNK_BYTES = 8 * ONE_MiB -MAX_SYS_MEM_USE = 512 * ONE_MiB -"""MAX_PIPE_CHUNK_BYTES is a conservatively safe upper bound on the number of bytes we send through -`multiprocessing.connections.Connection` objects. - -This boundary will be derived for the current machine's OS at runtime. - -Per the official Python 3.9.6 documentation: -:: - - send(obj) - Send an object to the other end of the connection which should be read using recv(). - - The object must be picklable. Very large pickles (approximately 32 MiB+, though it depends on the OS) - may raise a ValueError exception. - -For source documentation on send(obj) see: - - https://docs.python.org/3/library/multiprocessing.html#multiprocessing.connection.Connection.send -""" -NONE_LABEL = "None" -BSC_LABEL = "BlobServiceClient" -CC_LABEL = "ContainerClient" -BC_LABEL = "BlobClient" - - -class ClientWrapper: - """The ContainerWrapper class exists to simplify the process of ensuring that a container's name - is accessible from mixed types of inputs. - - """ - - def __init__(self, name: str = None, props: Optional[ContainerProperties] = None) -> None: - self._name = name or None - if not self._name and props is not None: - self._name = props.name - - @property - def name(self) -> str: - return self._name - - -HasNameAttr = Union[ClientWrapper, ContainerProperties] -IterableHasName = Iterable[HasNameAttr] -StrOrHasName = Union[str, HasNameAttr] -IterableStrOrHasName = Iterable[StrOrHasName] - - -def _assemble_fname(path_dict: dict) -> str: - interval = path_dict.get("interval", None) - media = path_dict.get("media_type", None) - prefix = path_dict.get("fname_prefix", None) - fname = path_dict.get("fname", None) - return "/".join((part for part in (interval, media, prefix, fname) if part)) - - -@contextmanager -def _gc_toggle(): - """A context manager that toggles garbage collection off-at-entry and back-on-at-exit. - :return: A bool value indicating if gc was enabled when this context was entered - """ - global GC_TOGGLE_DEPTH - try: - gc.disable() - GC_TOGGLE_DEPTH += 1 - yield GC_TOGGLE_DEPTH - GC_TOGGLE_DEPTH -= 1 - finally: - if GC_TOGGLE_DEPTH == 0: - gc.enable() - - -def _client_name_gen(obj: Union[StrOrHasName, IterableStrOrHasName]) -> str: - if obj: - if isinstance(obj, (str, ClientWrapper, BlobProperties, ContainerProperties)): - obj = (obj,) - for elem in obj: - if isinstance(elem, str): - yield elem - elif isinstance(elem, (ClientWrapper, BlobProperties, ContainerProperties)): - yield elem.name - else: - yield from _client_name_gen(elem) - - -def _ensure_containers_exist(conn_str: str, container: Union[StrOrHasName, IterableStrOrHasName]): - """ - If we have been given a container name (or an iterable of container names) we should ensure they - exist and are ready to be acted upon before returning them to the caller. - Otherwise they will encounter the potentially troublesome `ResourceNotFoundError` - Example of how it becomes troublesome: - If a caller deletes a container just before calling this function, - there will be an some indeterminate amount of time while that delete operation is being - performed that any subsequent operations attempting to create the container will - raise `ResourceExistsError` and operations that would - interact with an existing resource will raise `ResourceNotFoundError`. - """ - gen = _client_name_gen(container) - delay_max = 10 - delay = 0.1 - while True: - unfinished = [] - for cont in gen: - _client: ContainerClient = ContainerClient.from_connection_string(conn_str, cont) - try: - cprop: ContainerProperties = _client.get_container_properties(timeout=2) - # getting etag confirms container is fully created - etag = getattr(cprop, "etag", cprop["etag"]) - except ResourceNotFoundError: - try: - cprop: ContainerProperties = _client.create_container(timeout=2) - # getting etag confirms container is fully created - etag = getattr(cprop, "etag", cprop["etag"]) - except ResourceExistsError: - # We are getting both resource existance errors, meaning the container - # is likely being deleted and we can't recreate it till that operation - # has finished. So, add the container back to our queue and we'll try - # again later. - unfinished.append(cont) - finally: - _client.close() - if not unfinished: - break - gen = _client_name_gen(unfinished) - # added delay to ensure we don't jackhammer requests to remote service. - time.sleep(delay) - delay = min(delay_max, delay + delay) - - -def flatten_client_iters(clients: List[Union[ContainerClient, List[BlobClient]]]): - errs: Dict[str, List[Dict[str, str]]] = {} - for cclient in clients: - if isinstance(cclient, list): - for bclient in cclient: - try: - yield bclient - except BaseException as be: - exc_type, exc_value, exc_traceback = sys.exc_info() - be.with_traceback(exc_traceback) - errs.setdefault(exc_type, []).append( - { - "original": be, - "exc_type": exc_type, - "exc_value": exc_value, - } - ) - else: - try: - yield cclient - except BaseException as be: - exc_type, exc_value, exc_traceback = sys.exc_info() - be.with_traceback(exc_traceback) - errs.setdefault(exc_type, []).append( - { - "original": be, - "exc_type": exc_type, - "exc_value": exc_value, - } - ) - if errs: - err = AzureClientManagerError(f"There were {len(errs)} errors while accessing the flattened clients iterable.") - err.aggregated_traceback = [] - for e, lst in errs.items(): - agg_tb = [] - for args in lst: - args: dict - oe: BaseException = args["original"] - tb = "".join(traceback.format_exception(args["exc_type"], args["exc_value"], oe.__traceback__)) - agg_tb.append(indent(tb, "\t")) - agg_tb = "\n\n".join(agg_tb) - agg_tb = f"\n{'=' * 120}\n{agg_tb}{'-' * 120}" - err.aggregated_traceback.append(agg_tb) - LOG.exception("\n".join(err.aggregated_traceback), exc_info=err) - # raise err - err.err_map = errs - err.args += (errs,) - raise err - - -def client_generator( - conn_str, - container: Optional[Union[StrOrHasName, IterableStrOrHasName]] = None, - prefix: Optional[str] = None, - blob: Optional[Union[StrOrHasName, IterableStrOrHasName]] = None, - recurse: bool = False, -) -> Generator[Union[str, BlobServiceClient, ContainerClient, BlobClient], None, None]: - # forward declared type hints - bprop: BlobProperties - cprop: ContainerProperties - # scope shared state flags - blobs_yielded = False - containers_yielded = False - service_clients_yielded = False - - # a couple of inner functions for handling different client iteration strategies - def client_iter(container_iterable): - nonlocal blobs_yielded, containers_yielded - for c in container_iterable: - with ContainerClient.from_connection_string(conn_str, c) as container_client: - container_client: ContainerClient - if prefix is not None or blob is not None: - for bprop in container_client.list_blobs(prefix): - bname: str = bprop.name - _name = bname.rpartition("/")[2] - if check_blob(_name): - with container_client.get_blob_client(bprop.name) as blob_client: - if not blobs_yielded: - yield BC_LABEL - blobs_yielded = True - yield blob_client - elif recurse: - for bprop in container_client.list_blobs(): - with container_client.get_blob_client(bprop.name) as blob_client: - if not blobs_yielded: - yield BC_LABEL - blobs_yielded = True - yield blob_client - else: - if not containers_yielded: - yield CC_LABEL - containers_yielded = True - yield container_client - if not (blobs_yielded or containers_yielded): - for c in _client_name_gen(container): - with ContainerClient.from_connection_string(conn_str, c) as container_client: - container_client: ContainerClient - if recurse: - for bprop in container_client.list_blobs(): - with BlobClient.from_connection_string( - conn_str, bprop.container, bprop.name - ) as blob_client: - if not blobs_yielded: - yield BC_LABEL - blobs_yielded = True - yield blob_client - else: - if not containers_yielded: - yield CC_LABEL - containers_yielded = True - yield container_client - - # second of the inner functions for client iteration strategies - def bsc_iter(): - nonlocal service_clients_yielded, containers_yielded, blobs_yielded - with BlobServiceClient.from_connection_string(conn_str) as service_client: - service_client: BlobServiceClient - if (prefix or blob) and not (blobs_yielded or containers_yielded): - yield from client_iter(service_client.list_containers()) - elif recurse: - for c in service_client.list_containers(): - with service_client.get_container_client(c) as container_client: - for b in container_client.list_blobs(): - with container_client.get_blob_client(b) as blob_client: - if not blobs_yielded: - yield BC_LABEL - blobs_yielded = True - yield blob_client - if not (blobs_yielded or containers_yielded): - yield BSC_LABEL - service_clients_yielded = True - yield service_client - - # begin context_manager function's logic - if not prefix: - if blob: - prefs = set() - _blob = [] - for b in _client_name_gen(blob): - pref, _, bname = b.rpartition("/") - _blob.append(bname) - if pref: - prefs.add(pref) - # ToDo: work in logic for handling if there are more than 1 kind of prefix found - blob = _blob - try: - _pref = prefs.pop() - except KeyError: - _pref = None # to ensure it's not an empty string - prefix = _pref - - def _check_name(name): - return name in blob_set - - def _always_true(*args): - return True - - if blob: - blob_set = set(_client_name_gen(blob)) - check_blob = _check_name - else: - blob = None - check_blob = _always_true - if container: - _ensure_containers_exist(conn_str, container) - yield from client_iter(_client_name_gen(container)) - else: - yield from bsc_iter() - - if not (blobs_yielded or containers_yielded or service_clients_yielded): - yield from (NONE_LABEL,) - - -def _client_ctx_mgr_wrapper(conn_str: str, gen_func: Callable = client_generator) -> contextmanager: - @contextmanager - @wraps(gen_func) - def context_manager(*args, **kwargs): - nonlocal conn_str, gen_func - try: - ret = gen_func(conn_str, *args, **kwargs) - yield ret - finally: - del ret - - return context_manager - - -def _ensure_str(obj: Union[AnyStr, Union[List[AnyStr], Tuple[AnyStr]]]): - if obj is None: - return "" - if isinstance(obj, (list, tuple)): - if obj: - obj = obj[0] - else: - return "" - if isinstance(obj, bytes): - obj = obj.decode("utf-8") - return str(obj) - - -def _ensure_list_of_str(obj: Union[List[AnyStr], AnyStr]) -> List[Union[str, List[str]]]: - """ - A helper function that allows us to ensure that a given argument parameter is a list of strings. - - This function assumes the given object is one of: - * list - * str - * bytes - :param obj: A string, bytes object, or a list (or nested list) of string/bytes objects. - :return: A list (or nested list) of string objects. - - :raises AzurBlobInitError: If the given object is not a str or bytes object, or if it's a list/tuple of - non-(str/bytes) objects then a logic error has likely occured somewhere and we should - fail execution here. - """ - if obj is None: - return [] - if isinstance(obj, (list, tuple)): - if isinstance(obj, tuple): - obj = list(obj) - elif isinstance(obj, (str, bytes)): - if isinstance(obj, bytes): - obj = obj.decode("utf-8") - obj = [obj] - else: - raise AzureBlobInitError(f"Our attempted to ensure obj is a list of strings failed,\n\tgiven {obj=}") - for i, elem in enumerate(obj): - if isinstance(elem, str): - continue - elif isinstance(elem, bytes): - obj[i] = elem.decode("utf-8") - elif isinstance(obj, (list, tuple)): - if isinstance(obj, tuple): - obj = list(obj) - for j, elem2 in obj: - obj[j] = _ensure_list_of_str(elem2) - else: - err_msg = ( - "Our attempt to ensure obj is a list of strings failed," - f"\n\tgiven: {obj=}" - f"\n\tfailure occured while ensuring each element of given iterable was a string, " - f"at element: obj[{i}]={elem}" - ) - raise AzureBlobInitError(err_msg) - return obj - - -class AzureBlobInitError(AzureBlobDestinationError): - pass - - -class AzureBlobPathParseError(AzureBlobDestinationError): - pass - - -class AzureBlobReadError(AzureBlobDestinationError): - blob_path: str = "" - """The path string which lead to this exception""" - chunk_byte_range: Tuple[int, int] = -1, -1 - """The [start,end) bytes defining the chunk where this exception occurs (if chunking used) else set to (-1,-1)""" - container_name: str = "" - blob_name: str = "" - blob_properties: BlobProperties = None - - -class AzureBlobWriteError(AzureBlobDestinationError): - blob_path: str = "" - """The path string which lead to this exception""" - container_name: str = "" - blob_name: str = "" - blob_properties: BlobProperties = None - content_type = None - - -class AzureBlobClientError(AzureBlobDestinationError): - container_name: str = "" - blob_name: str = "" - - -class AzureClientManagerError(AzureBlobDestinationError): - err_map: Dict[str, List[Dict[str, str]]] - aggregated_traceback: List[str] - - -class AzureClientIterationError(AzureBlobDestinationError): - pass - - -class AzureBlob(BaseDestination): - def __getnewargs__(self): - """utility function that allows an instance of this class to be pickled""" - return ( - self.remote_path, - self.connection_string, - self.can_overwrite, - self._cpu_cap, - self._max_mem_bytes, - self.default_protocol, - self.default_host_name, - self.default_container_name, - self.default_interval, - self.default_media_type, - self.default_fname_prefix, - ) - - def __getstate__(self): - """utility function that allows an instance of this class to be pickled""" - return {k: v if k != "_connection_manager" else None for k, v in self.__dict__.items()} - - def __init__( - self, - remote_path: AnyStr, - connection_string: AnyStr, - can_do_overwrites: bool = False, - cpu_cap: int = DEFAULT_AVAILABLE_CPU, - max_mem_bytes: int = MAX_SYS_MEM_USE, - default_protocol: Optional[AnyStr] = None, - default_host_name: Optional[AnyStr] = None, - default_container_name: Optional[AnyStr] = None, - default_interval: Optional[AnyStr] = None, - default_media_type: Optional[AnyStr] = None, - default_fname_prefix: Optional[AnyStr] = None, - ): - """ - A subclass of BAseDestination; Allows for streaming a backup stream to an Azure-blob destination. - - Here's the expected general form for the remote path: - [protocol]://[host_name]/[container_name]/[interval]/[media_type]/[default_prefix]/[optional_fname] - - NOTE: - Components inside square brackets, E.G.: `[some component]`; are optional as long as they are instead - defined by their corresponding initializer argument. - - :param remote_path: - REQUIRED; A string or bytes object; - Defines the URI (or URL) for where to connect to the backup object. - - :param connection_string: - REQUIRED; A string or bytes object; - When the application makes a request to Azure Storage, it must be authorized. - To authorize a request, add your storage account credentials to the application as a - connection string. - See: - https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python#copy-your-credentials-from-the-azure-portal - - :param can_do_overwrites: - REQUIRED; a boolean value; - Flags if we should overwrite existing data when given a destination that - already exists, or if we should fail and raise a `ResourceExistsError`. - - :param default_protocol: - OPTIONAL; DEFAULT is set from container component of remote_path argument - A string or bytes object; - The name of the container in the destination blob storage we should use. - If undefined, then we assume it is on the given remote_path argument. - - :param default_container_name: - OPTIONAL; DEFAULT is set from container component of remote_path argument - A string or bytes object; - The name of the container in the destination blob storage we should use. - If undefined, then we assume it is on the given remote_path argument. - - :param default_host_name: - OPTIONAL; DEFAULT is set from host component of remote_path argument. - A string or bytes object; - The name of the host server. - If undefined, then we assume it is on the given remote_path argument. - - :param default_interval: - OPTIONAL; DEFAULT to "yearly" - A string or bytes object; - If undefined, then we assume it is on the given remote_path argument. - - :param default_media_type: - OPTIONAL; DEFAULT to "mysql" - A string or bytes object; - if undefined, thenw e assume it is on the given remote_path argument. - - - """ - path = _ensure_str(remote_path) - path = path.strip(" /:").rstrip(".") - parts = self._path2parts(path) - if not path: - protocol = default_protocol or "" - if not protocol.endswith("://"): - protocol += "://" - host = default_host_name or "" - if not host.endswith("/"): - host += "/" - container = default_container_name or "" - if container and not container.endswith("/"): - container += "/" - interval = default_interval or "" - if interval and not interval.endswith("/"): - interval += "/" - media_type = default_media_type or "" - if media_type and not media_type.endswith("/"): - media_type += "/" - fname_prefix = default_fname_prefix or "" - if fname_prefix and not fname_prefix.endswith("/"): - fname_prefix += "/" - path = protocol + host + container + interval + media_type + fname_prefix - super(AzureBlob, self).__init__(path) - connection_string = _ensure_str(connection_string) - self._connection_string = connection_string - self._flag_overwite_on_write = can_do_overwrites - self._cpu_cap = cpu_cap - self._max_mem_bytes = max_mem_bytes - self._max_mem_pipe = min(MAX_PIPE_CHUNK_BYTES, max_mem_bytes) - default_protocol = _ensure_str(default_protocol or parts[0]).strip(":/") - default_host_name = _ensure_str(default_host_name or parts[1]).strip(":/") - default_container_name = _ensure_str(default_container_name or parts[2]).strip(":/") - default_interval = _ensure_str(default_interval or parts[3]).strip(":/") - default_media_type = _ensure_str(default_media_type or parts[4]).strip(":/") - default_fname_prefix = _ensure_str(default_fname_prefix or parts[5]).strip(":/") - self._protocol = default_protocol - self._host_name = default_host_name - self._container_name = default_container_name - self._interval = default_interval - self._media_type = default_media_type - self._fname_prefix = default_fname_prefix - self._part_names = "protocol,host,container,interval,media_type,fname_prefix,fname".split(",") - self._parts_list = [ - (name, parts[i] if i < len(parts) and parts[i] else "") for i, name in enumerate(self._part_names) - ] - self._default_parts: Dict[str, str] = {k: v if v != "" else None for k, v in self._parts_list} - self._default_parts["interval"] = self._default_parts["interval"] or "yearly" - self._default_parts["media_type"] = self._default_parts["media_type"] or "mysql" - self._part_names = self._part_names[::-1] - self._connection_manager: Optional[contextmanager] = None - - @property - def connection_string(self): - """An Azure specific authentication string - for accessing the target backup destination host""" - return self._connection_string - - @property - def default_protocol(self): - return self._protocol - - @property - def default_host_name(self): - """The default host server name directory that - we default to if a relative path string omits the reference""" - return self._host_name - - @property - def default_container_name(self): - """The default container (aka bucket) name that - we default to if a relative path string omits the reference""" - return self._container_name - - @property - def default_interval(self): - """The default backup interval directory that - we default to if a relative path string omits the reference""" - return self._interval - - @property - def default_media_type(self): - return self._media_type - - @property - def default_fname_prefix(self): - return self._fname_prefix - - @property - def can_overwrite(self): - return self._flag_overwite_on_write - - @property - def max_bytes_per_pipe_message(self): - return self._max_mem_pipe - - @property - def max_system_memory_usage(self): - return self._max_mem_bytes - - @property - def connection_manager(self): - if self._connection_manager is None: - self._connection_manager = _client_ctx_mgr_wrapper(self._connection_string, client_generator) - return self._connection_manager - - @staticmethod - def _path2parts(path: str, split_fname: bool = False): - """Breaks a path string into its sub-parts, and produces a tuple of those parts - that is at least 6 elements long. We will insert None where a part is determined to be missing in order to - ensure the minimum length of 6 elements.""" - - def extract_protocol(_path: str): - protocol, _, _path = _path.partition("://") - if not _path: - if protocol.startswith(".../"): - _path = protocol[4:] - protocol = "..." - else: - _path = protocol - protocol = None - else: - protocol = protocol.strip(":/") - return protocol, *partition_path(_path, 1) - - def partition_path(_path: str, depth: int): - if not _path: - if depth < 6: - return None, *partition_path(_path, depth + 1) - elif depth < 5: - part, _, _path = _path.partition("/") - return part.strip(":/"), *partition_path(_path, depth + 1) - elif split_fname: - prefix, _, fname = _path.rpartition("/") - return prefix, fname - return _path.strip(":/"), None - - return extract_protocol(path) - - def _path_parse(self, path: str, split_fname: bool = False): - """ - Called in multiple places where we need to decompose a path string - in order to access specific parts by name. - """ - if not path: - return self.remote_path, {k: v for k, v in self._default_parts.items()} - # noinspection PyTupleAssignmentBalance - ( - protocol, - host, - container, - interval, - media, - prefix, - *fname, - ) = self._path2parts(path, split_fname) - fname: list - protocol = protocol if protocol and protocol != "..." else self.default_protocol - host = host if host and host != "..." else self.default_host_name - container = container if container and container != "..." else self.default_container_name - if container != self.default_container_name: - interval = self.default_interval if interval and interval == "..." else interval if interval else "" - media = self.default_media_type if media and media == "..." else media if media else "" - prefix = self.default_fname_prefix if prefix and prefix == "..." else prefix if prefix else "" - else: - interval = interval if interval and interval != "..." else self.default_interval - media = media if media and media != "..." else self.default_media_type - prefix = prefix if prefix and prefix != "..." else self.default_fname_prefix - if fname: - _fname = list(fname) - while _fname: - fname = _fname.pop() - if fname: - _fname = "/".join(_fname) - break - else: - # noinspection PyTypeChecker - fname = None - parts: str = "/".join((s for s in (host, container, interval, media, prefix, fname) if s)) - relative_depth = 0 - while parts and parts.startswith("../"): - relative_depth += 1 - _, _, parts = parts.partition("/") - base_parts = "/".join(tpl[1] for tpl in self._parts_list[1:-relative_depth]) - base_parts += "/" if base_parts else "" - path = base_parts + parts.lstrip("/") - _parts = path.split("/", 4)[::-1] - shorten = len(self._part_names) - 1 - len(_parts) - _parts2 = [None] * shorten - _parts2 += _parts - # noinspection PyTypeChecker - ret = {k: v for k, v in zip(self._part_names[:-1], _parts2)} - ret["protocol"] = protocol - return path, ret - - def delete(self, path: AnyStr): - """ - Delete object from the destination - - the general form for the path object should conform to the following: - [azure:/]/[bucket or container name]/[server name]/[update interval]/[query language]/ - - NOTE: The protocol type (the left-most component of the example above) is technically optional, - as it should always be an azure storage type; but if passed we will check to confirm that it is - indeed for azure-blob storage, so including it ensures proper sanity checking. - - -- If path defines a new absolute path string then it must contain all parts defined above, - with the option to omit those components wrapped in square brackets, E.G.: [some component] - - where: - [components inside square brackets] => optional - => required - - such that: - optional components that are not provided should be substituted with an ellipsis - (the triple period => ...) - - E.G.: - ...://foo/.../hourly/mysql/bar-that.foos.gz - - Note: - Where optional path components are omitted, we assume that the context of the called AzureBlob instance - should be used to fill in the gaps. - - -- If path is given as a relative path string then you may also use the ellipsis as defined for absolute paths, - with the added option to use `..` for relative directory hierarchy referencing. The one caveat is that - - E.G.: - ../../daily/mysql/relative-foo.bar.gz - or - ../../../some_different_host/.../mysql - where: - The `...` component signals that we wish to use the given default interval this object was - initialized with. - - :param path: A string or bytes object; - The path to the file (blob) to delete. Can be relative or absolute. - """ - abs_path, path_dict = self._path_parse(path) - container = path_dict["container"] - fname = _assemble_fname(path_dict) - if fname: - label = BC_LABEL - client_type = "blob" - args = container, fname - else: - label = CC_LABEL - client_type = "container" - args = (container,) - with self.connection_manager(*args) as client_iter: - iter_type = next(client_iter) - if iter_type != label: - raise AzureClientIterationError( - f"Failed to properly identify deletion target given {path=}" - f"\n\texpected client type of {label} but got {iter_type}" - ) - to_check = [] - del_call = "delete_" + client_type - for client in client_iter: - client: Union[BlobClient, ContainerClient] - to_check.append(client) - getattr(client, del_call)() - for c in to_check: - delay = 0.01 - max_delay = 2 - t0 = time.perf_counter() - while (time.perf_counter() - t0) < 5: - try: - if client_type == "blob": - c: BlobClient - try: - bprop: BlobProperties = c.get_blob_properties() - if bprop.deleted: - break - except AttributeError: - # when calls to get_blob_properties raises AttributeError, - # then the blob is no longer available and the deletion was successful - break - else: - c: ContainerClient - cprop: ContainerProperties = c.get_container_properties() - if cprop.deleted: - break - time.sleep(delay) - delay = min(max_delay, delay + delay) - except ResourceNotFoundError: - break - - def _blob_ospiper( - self, - path_parts_dict: Dict[str, str], - pout: mpConn, - chunk_size: int = None, - ) -> None: - def err_assembly(): - bad_path = "{protocol}://{parts}".format( - protocol=self._part_names[0], - parts="/".join((f"{{{s}}}" for s in self._part_names[1:] if path_parts_dict.get(s, None))), - ).format(**path_parts_dict) - return AzureClientIterationError(f"Unable to find downloadable content files on path : {bad_path}") - - # noinspection PyShadowingNames - def configure_chunking(bsize: int, pipe_chunk_size: int): - """ - - :param bsize: total number of bytes to be downloaded for current blob - :type bsize: int - :param pipe_chunk_size: The maximum buffer size of our transfer pipe - :type pipe_chunk_size: int - :return: 4-tuple of ints indicating: - * the the number of memory chunks - * the size of those mem chunks - * if the pipe buffer is smaller than max allowed mem usage, then - this is the number of pipe chunks needed to fully transfer one - of the memory chunks. - * the size of the transfer chunks - :rtype: tuple[int,int,int,int] - """ - nonlocal self - if bsize < self.max_system_memory_usage: - mem_chunk_size = size - num_mem_chunks = 1 - else: - mem_chunk_size = self.max_system_memory_usage - num_mem_chunks = (size + mem_chunk_size - 1) // mem_chunk_size - if pipe_chunk_size < mem_chunk_size: - _chunk_size = pipe_chunk_size - num_chunks = (mem_chunk_size + _chunk_size - 1) // _chunk_size - else: - _chunk_size = mem_chunk_size - num_chunks = 1 - return num_mem_chunks, mem_chunk_size, num_chunks, _chunk_size - - chunk_size = self.max_bytes_per_pipe_message if chunk_size is None else chunk_size - max_threads = min(32, self._max_mem_bytes) - with pout: - with os.fdopen(pout.fileno(), "wb", buffering=chunk_size, closefd=False) as pipe_out: - container = path_parts_dict.get("container", None) - fname = path_parts_dict.pop("fname", None) - prefix = _assemble_fname(path_parts_dict) or None - with self.connection_manager(container, prefix, fname, recurse=True) as client_iter: - iter_type = next(client_iter) - if iter_type != BC_LABEL: - raise err_assembly() - for client in client_iter: - client: BlobClient - size = client.get_blob_properties().size - ( - num_mem_chunks, - mem_chunk_size, - num_chunks, - _chunk_size, - ) = configure_chunking(size, chunk_size) - with io.BytesIO(b"\x00" * mem_chunk_size) as bio: - for i in range(num_mem_chunks): - ipos = i * mem_chunk_size - dl: StorageStreamDownloader = client.download_blob( - ipos, - mem_chunk_size, - max_concurrency=max_threads, - ) - bio.seek(0) - bytes_read = dl.readinto(bio) - bio.seek(0) - - for pos in range(0, bytes_read, _chunk_size): - pipe_out.write(bio.read(_chunk_size)) - rem = bytes_read % _chunk_size - if rem: - pipe_out.write(bio.read(rem)) - - @contextmanager - def get_stream(self, copy: Union[str, MySQLCopy]): - if copy is None: - copy = self.remote_path - path = copy.key if isinstance(copy, MySQLCopy) else copy - _path = Path(path) - has_fname = "." in _path.name and _path.name != "..." - path, path_parts_dict = self._path_parse(path, has_fname) - pipe_in, pipe_out = mp.Pipe(False) - proc = mp.Process(target=self._blob_ospiper, args=(path_parts_dict, pipe_out)) - try: - with pipe_in: - proc.start() - pipe_out.close() - with os.fdopen(pipe_in.fileno(), "rb", closefd=False) as file_pipe_in: - yield file_pipe_in - finally: - # pipe_out.close() - proc.join() - proc.close() - - def read(self, filepath: str, bytes_per_chunk: Optional[int] = None) -> bytes: - """ - Read content from destination at the end of given filepath. - - :param filepath: - REQUIRED; a str object; - Relative path to destination file that we will read from. - :type filepath: str - - :param bytes_per_chunk: - OPTIONAL; DEFAULT = self.max_bytes_per_pipe_message; an int value; - This parameter dictates the max chunk size (in bytes) that should - be passed into the pipe for any single chunk. - :type bytes_per_chunk: int - - :return: Content of the file. - :rtype: bytes - """ - with self.get_stream(filepath) as conn: - conn: io.FileIO - strt = time.perf_counter() - datum = [] - while time.perf_counter() - strt < 2: - try: - data = conn.read() - if data: - datum.append(data) - strt = time.perf_counter() - except EOFError: - break - return b"".join(datum) - - def save(self, handler, filepath): - """ - Save a stream given as handler to filepath. - - :param handler: Incoming stream. - :type handler: file - :param filepath: Save stream as this name. - :type filepath: str - """ - with handler as f_src: - self.write(f_src, filepath) - - def write(self, content: Union[AnyStr, io.BufferedIOBase], filepath: AnyStr): - """ - Write ``content`` to a file. - - :param content: Content to write to the file. - :type content: str, bytes, or subclass of BufferedIOBase object - :param filepath: Relative path to file. - :type filepath: str or bytes object - """ - if isinstance(filepath, bytes): - filepath = filepath.decode("utf-8") - filepath, _, fname = filepath.rpartition("/") - path, path_dict = self._path_parse(filepath) - container = path_dict["container"] or self.default_container_name - blob_name = _assemble_fname(path_dict) - with self.connection_manager(container, prefix=blob_name, blob=fname) as client_iter: - iter_type = next(client_iter) - if iter_type == CC_LABEL: - blob_name += "/" + fname - client: ContainerClient = next(client_iter) - if isinstance(content, io.BufferedReader): - with content: - client.upload_blob( - blob_name, - content.read(), - overwrite=self.can_overwrite, - ) - else: - client.upload_blob(blob_name, content, overwrite=self.can_overwrite) - elif iter_type != BC_LABEL: - raise AzureClientIterationError(f"Failed to identify path to blob files given: {filepath}") - else: - # Unless filepath used wildcards, client_iter is only going to produce - # a single client instance to upload to. - bclient: BlobClient = next(client_iter) - if isinstance(content, io.BufferedReader): - with content: - bclient.upload_blob(content.read(), overwrite=self.can_overwrite) - else: - bclient.upload_blob(content, overwrite=self.can_overwrite) - - def _list_files(self, prefix: str = None, **kwargs): # , recursive=False, files_only=False): - """ - A descendant class must implement this method. - It should return a list of files already filtered out by prefix. - Some storage engines (e.g. Google Cloud Storage) allow that - at the API level. The method should use storage level filtering - to save on network transfers. - - if prefix is given it is assumed to supersede the default container/interval/media_type/custom-prefix/ parts of - the path. To only replace select parts of that path segment, use the ... (ellipsis) to indicate which portions - you wish to have remain default. - """ - results = set() - if prefix: - if prefix == "..." or prefix.startswith(".../"): - prefix = prefix.strip("/") - path_template = f"{self._protocol}://{self.default_host_name}/{prefix}" - _, path_dict = self._path_parse(path_template, True) - else: - container, _, prefix = prefix.partition("/") - path_dict = {"container": container, "fname_prefix": prefix} - else: - prefix = None # ensure we don't pass along an empty string - path_dict = {"container": None} - fname = path_dict.pop("fname", None) or None - prefix = _assemble_fname(path_dict) or prefix or None - cont_starts, _, _ = (path_dict.get("container", "") or "").partition("*") - with BlobServiceClient.from_connection_string(self.connection_string) as service_client: - service_client: BlobServiceClient - # service_client. - for container in service_client.list_containers(cont_starts or None): - with service_client.get_container_client(container) as cclient: - cclient: ContainerClient - if fname: - for bprop in cclient.list_blobs(prefix): - bprop: BlobProperties - if fname in bprop.name: - with cclient.get_blob_client(bprop) as bclient: - results.add(bclient.url) - else: - for bprop in cclient.list_blobs(prefix): - bprop: BlobProperties - with cclient.get_blob_client(bprop) as bclient: - results.add(bclient.url) - # if files_only: - # if recursive: - # for bprop in cclient.list_blobs(prefix): - # bprop: BlobProperties - # bname: str = bprop.name - # if not fname or fname in bname.rpartition("/")[2]: - # with cclient.get_blob_client(bprop) as bclient: - # results.add(bclient.url) - # else: - # for bprop in cclient.walk_blobs(prefix): - # bprop: BlobProperties - # bname = bprop.name - # dbg_break = 0 - # elif recursive: - # if not fname: - # for bprop in cclient.list_blobs(prefix): - # bprop: BlobProperties - # with cclient.get_blob_client(bprop) as bclient: - # results.add(bclient.url) - # - # else: - # for bprop in cclient.walk_blobs(prefix): - # if fname in bname.rpartition("/")[2]: - # with cclient.get_blob_client(bprop) as bclient: - # results.add(bclient.url) - return results diff --git a/vagrant/environment/puppet/modules/profile/files/twindb-backup.cfg b/vagrant/environment/puppet/modules/profile/files/twindb-backup.cfg index 10a755119..e7f5c6978 100644 --- a/vagrant/environment/puppet/modules/profile/files/twindb-backup.cfg +++ b/vagrant/environment/puppet/modules/profile/files/twindb-backup.cfg @@ -22,6 +22,12 @@ AWS_SECRET_ACCESS_KEY="YYYYY" AWS_DEFAULT_REGION="us-east-1" BUCKET="twindb-backups" +# Azure destination settings +[az] +connection_string="DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" +container_name="twindb-backups" +#remote_path = /backups/mysql # optional + # GCS destination settings [gcs] GC_CREDENTIALS_FILE=/twindb_backup/env/My Project 17339-bbbc43d1bee3.json