From 0e03641c1d5a1cc2b26c35a5838d53d23144d9af Mon Sep 17 00:00:00 2001 From: jakevc Date: Wed, 15 Nov 2023 09:48:21 -0800 Subject: [PATCH 01/34] creds,client --- poetry.lock | 197 +++++++++++++++++++++++- pyproject.toml | 2 + snakemake_storage_plugin_az/__init__.py | 27 +++- 3 files changed, 220 insertions(+), 6 deletions(-) diff --git a/poetry.lock b/poetry.lock index 5e23217..cfbd682 100644 --- a/poetry.lock +++ b/poetry.lock @@ -40,6 +40,45 @@ docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib- tests = ["attrs[tests-no-zope]", "zope-interface"] tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +[[package]] +name = "azure-core" +version = "1.29.5" +description = "Microsoft Azure Core Library for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "azure-core-1.29.5.tar.gz", hash = "sha256:52983c89d394c6f881a121e5101c5fa67278ca3b1f339c8fb2ef39230c70e9ac"}, + {file = "azure_core-1.29.5-py3-none-any.whl", hash = "sha256:0fa04b7b1f7d44a4fb8468c4093deb2ea01fdf4faddbf802ed9205615f99d68c"}, +] + +[package.dependencies] +requests = ">=2.18.4" +six = ">=1.11.0" +typing-extensions = ">=4.6.0" + +[package.extras] +aio = ["aiohttp (>=3.0)"] + +[[package]] +name = "azure-storage-blob" +version = "12.19.0" +description = "Microsoft Azure Blob Storage Client Library for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "azure-storage-blob-12.19.0.tar.gz", hash = "sha256:26c0a4320a34a3c2a1b74528ba6812ebcb632a04cd67b1c7377232c4b01a5897"}, + {file = "azure_storage_blob-12.19.0-py3-none-any.whl", hash = "sha256:7bbc2c9c16678f7a420367fef6b172ba8730a7e66df7f4d7a55d5b3c8216615b"}, +] + +[package.dependencies] +azure-core = ">=1.28.0,<2.0.0" +cryptography = ">=2.1.4" +isodate = ">=0.6.1" +typing-extensions = ">=4.3.0" + +[package.extras] +aio = ["azure-core[aio] (>=1.28.0,<2.0.0)"] + [[package]] name = "black" version = "23.10.1" @@ -91,6 +130,70 @@ files = [ {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, ] +[[package]] +name = "cffi" +version = "1.16.0" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, + {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, + {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, + {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, + {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, + {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, + {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, + {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, + {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, + {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, +] + +[package.dependencies] +pycparser = "*" + [[package]] name = "charset-normalizer" version = "3.3.2" @@ -304,6 +407,51 @@ files = [ [package.extras] toml = ["tomli"] +[[package]] +name = "cryptography" +version = "41.0.5" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cryptography-41.0.5-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:da6a0ff8f1016ccc7477e6339e1d50ce5f59b88905585f77193ebd5068f1e797"}, + {file = "cryptography-41.0.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:b948e09fe5fb18517d99994184854ebd50b57248736fd4c720ad540560174ec5"}, + {file = "cryptography-41.0.5-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d38e6031e113b7421db1de0c1b1f7739564a88f1684c6b89234fbf6c11b75147"}, + {file = "cryptography-41.0.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e270c04f4d9b5671ebcc792b3ba5d4488bf7c42c3c241a3748e2599776f29696"}, + {file = "cryptography-41.0.5-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ec3b055ff8f1dce8e6ef28f626e0972981475173d7973d63f271b29c8a2897da"}, + {file = "cryptography-41.0.5-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:7d208c21e47940369accfc9e85f0de7693d9a5d843c2509b3846b2db170dfd20"}, + {file = "cryptography-41.0.5-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:8254962e6ba1f4d2090c44daf50a547cd5f0bf446dc658a8e5f8156cae0d8548"}, + {file = "cryptography-41.0.5-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:a48e74dad1fb349f3dc1d449ed88e0017d792997a7ad2ec9587ed17405667e6d"}, + {file = "cryptography-41.0.5-cp37-abi3-win32.whl", hash = "sha256:d3977f0e276f6f5bf245c403156673db103283266601405376f075c849a0b936"}, + {file = "cryptography-41.0.5-cp37-abi3-win_amd64.whl", hash = "sha256:73801ac9736741f220e20435f84ecec75ed70eda90f781a148f1bad546963d81"}, + {file = "cryptography-41.0.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3be3ca726e1572517d2bef99a818378bbcf7d7799d5372a46c79c29eb8d166c1"}, + {file = "cryptography-41.0.5-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e886098619d3815e0ad5790c973afeee2c0e6e04b4da90b88e6bd06e2a0b1b72"}, + {file = "cryptography-41.0.5-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:573eb7128cbca75f9157dcde974781209463ce56b5804983e11a1c462f0f4e88"}, + {file = "cryptography-41.0.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0c327cac00f082013c7c9fb6c46b7cc9fa3c288ca702c74773968173bda421bf"}, + {file = "cryptography-41.0.5-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:227ec057cd32a41c6651701abc0328135e472ed450f47c2766f23267b792a88e"}, + {file = "cryptography-41.0.5-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:22892cc830d8b2c89ea60148227631bb96a7da0c1b722f2aac8824b1b7c0b6b8"}, + {file = "cryptography-41.0.5-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:5a70187954ba7292c7876734183e810b728b4f3965fbe571421cb2434d279179"}, + {file = "cryptography-41.0.5-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:88417bff20162f635f24f849ab182b092697922088b477a7abd6664ddd82291d"}, + {file = "cryptography-41.0.5-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c707f7afd813478e2019ae32a7c49cd932dd60ab2d2a93e796f68236b7e1fbf1"}, + {file = "cryptography-41.0.5-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:580afc7b7216deeb87a098ef0674d6ee34ab55993140838b14c9b83312b37b86"}, + {file = "cryptography-41.0.5-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fba1e91467c65fe64a82c689dc6cf58151158993b13eb7a7f3f4b7f395636723"}, + {file = "cryptography-41.0.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0d2a6a598847c46e3e321a7aef8af1436f11c27f1254933746304ff014664d84"}, + {file = "cryptography-41.0.5.tar.gz", hash = "sha256:392cb88b597247177172e02da6b7a63deeff1937fa6fec3bbf902ebd75d97ec7"}, +] + +[package.dependencies] +cffi = ">=1.12" + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] +nox = ["nox"] +pep8test = ["black", "check-sdist", "mypy", "ruff"] +sdist = ["build"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test-randomorder = ["pytest-randomly"] + [[package]] name = "datrie" version = "0.8.2" @@ -457,6 +605,20 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "isodate" +version = "0.6.1" +description = "An ISO 8601 date/time/duration parser and formatter" +optional = false +python-versions = "*" +files = [ + {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, + {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, +] + +[package.dependencies] +six = "*" + [[package]] name = "jinja2" version = "3.1.2" @@ -754,6 +916,17 @@ files = [ {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, ] +[[package]] +name = "pycparser" +version = "2.21" +description = "C parser in Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, + {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, +] + [[package]] name = "pyflakes" version = "3.1.0" @@ -1033,6 +1206,17 @@ files = [ {file = "rpds_py-0.10.6.tar.gz", hash = "sha256:4ce5a708d65a8dbf3748d2474b580d606b1b9f91b5c6ab2a316e0b0cf7a4ba50"}, ] +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + [[package]] name = "smart-open" version = "6.4.0" @@ -1203,6 +1387,17 @@ files = [ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["argcomplete (>=3.0.3)", "mypy (>=1.6.0)", "pre-commit", "pytest (>=7.0,<7.5)", "pytest-mock", "pytest-mypy-testing"] +[[package]] +name = "typing-extensions" +version = "4.8.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"}, + {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, +] + [[package]] name = "urllib3" version = "2.0.7" @@ -1323,4 +1518,4 @@ pyyaml = ">=6.0,<7.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "7fb121614b56cb2b0a09aeba8634f89a4347f73ad07aa6701aba6e69a76eb789" +content-hash = "08bb4f9cbfd43d4b004bf674ca1348a7e236f8cb72962c7cbc7d21049ae2f81b" diff --git a/pyproject.toml b/pyproject.toml index 6c9a535..195cc50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,8 @@ readme = "README.md" python = "^3.11" snakemake-interface-common = "^1.14.2" snakemake-interface-storage-plugins = "^1.2.3" +azure-storage-blob = "^12.19.0" +azure-core = "^1.29.5" [tool.poetry.group.dev.dependencies] diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 25cb9f9..f1f6ef8 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -1,4 +1,5 @@ from dataclasses import dataclass, field +from azure.storage.blob import BlobServiceClient from typing import Any, Iterable, Optional from snakemake_interface_storage_plugins.settings import StorageProviderSettingsBase from snakemake_interface_storage_plugins.storage_provider import ( @@ -26,10 +27,10 @@ # settings. @dataclass class StorageProviderSettings(StorageProviderSettingsBase): - myparam: Optional[int] = field( + endpoint_url: Optional[str] = field( default=None, metadata={ - "help": "Some help text", + "help": "Azure Blob Storage Account endpoint url", # Optionally request that setting is also available for specification # via an environment variable. The variable will be named automatically as # SNAKEMAKE__, all upper case. @@ -49,6 +50,20 @@ class StorageProviderSettings(StorageProviderSettingsBase): "required": True, }, ) + access_key: Optional[str] = field( + default=None, + metadata={ + "help": "Azure Blob Storage Account Access Key Credential", + "env_var": False, + }, + ) + sas_token: Optional[str] = field( + default=None, + metadata={ + "help": "Azure Blob Storage Account SAS Token Credential", + "env_var": False, + }, + ) # Required: @@ -59,13 +74,15 @@ class StorageProviderSettings(StorageProviderSettingsBase): class StorageProvider(StorageProviderBase): # For compatibility with future changes, you should not overwrite the __init__ # method. Instead, use __post_init__ to set additional attributes and initialize - # futher stuff. + # further stuff. def __post_init__(self): # This is optional and can be removed if not needed. # Alternatively, you can e.g. prepare a connection to your storage backend here. # and set additional attributes. - pass + self.blob_service_client = BlobServiceClient( + self.settings.endpoint_url, credential=self.settings.credential + ) @classmethod def is_valid_query(cls, query: str) -> StorageQueryValidationResult: @@ -91,7 +108,7 @@ def list_objects(self, query: Any) -> Iterable[str]: class StorageObject(StorageObjectRead, StorageObjectWrite, StorageObjectGlob): # For compatibility with future changes, you should not overwrite the __init__ # method. Instead, use __post_init__ to set additional attributes and initialize - # futher stuff. + # further stuff. def __post_init__(self): # This is optional and can be removed if not needed. From 94e3caffc975b899ece063c5fceac69166bc72fe Mon Sep 17 00:00:00 2001 From: jakevc Date: Wed, 15 Nov 2023 10:23:56 -0800 Subject: [PATCH 02/34] validate endpoint url --- poetry.lock | 95 ++++++++++++++++++++++++- pyproject.toml | 1 + snakemake_storage_plugin_az/__init__.py | 35 ++++++++- 3 files changed, 129 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index cfbd682..1864302 100644 --- a/poetry.lock +++ b/poetry.lock @@ -59,6 +59,23 @@ typing-extensions = ">=4.6.0" [package.extras] aio = ["aiohttp (>=3.0)"] +[[package]] +name = "azure-identity" +version = "1.15.0" +description = "Microsoft Azure Identity Library for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "azure-identity-1.15.0.tar.gz", hash = "sha256:4c28fc246b7f9265610eb5261d65931183d019a23d4b0e99357facb2e6c227c8"}, + {file = "azure_identity-1.15.0-py3-none-any.whl", hash = "sha256:a14b1f01c7036f11f148f22cd8c16e05035293d714458d6b44ddf534d93eb912"}, +] + +[package.dependencies] +azure-core = ">=1.23.0,<2.0.0" +cryptography = ">=2.5" +msal = ">=1.24.0,<2.0.0" +msal-extensions = ">=0.3.0,<2.0.0" + [[package]] name = "azure-storage-blob" version = "12.19.0" @@ -771,6 +788,43 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "msal" +version = "1.25.0" +description = "The Microsoft Authentication Library (MSAL) for Python library" +optional = false +python-versions = ">=2.7" +files = [ + {file = "msal-1.25.0-py2.py3-none-any.whl", hash = "sha256:386df621becb506bc315a713ec3d4d5b5d6163116955c7dde23622f156b81af6"}, + {file = "msal-1.25.0.tar.gz", hash = "sha256:f44329fdb59f4f044c779164a34474b8a44ad9e4940afbc4c3a3a2bbe90324d9"}, +] + +[package.dependencies] +cryptography = ">=0.6,<44" +PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} +requests = ">=2.0.0,<3" + +[package.extras] +broker = ["pymsalruntime (>=0.13.2,<0.14)"] + +[[package]] +name = "msal-extensions" +version = "1.0.0" +description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." +optional = false +python-versions = "*" +files = [ + {file = "msal-extensions-1.0.0.tar.gz", hash = "sha256:c676aba56b0cce3783de1b5c5ecfe828db998167875126ca4b47dc6436451354"}, + {file = "msal_extensions-1.0.0-py2.py3-none-any.whl", hash = "sha256:91e3db9620b822d0ed2b4d1850056a0f133cba04455e62f11612e40f5502f2ee"}, +] + +[package.dependencies] +msal = ">=0.4.1,<2.0.0" +portalocker = [ + {version = ">=1.0,<3", markers = "python_version >= \"3.5\" and platform_system != \"Windows\""}, + {version = ">=1.6,<3", markers = "python_version >= \"3.5\" and platform_system == \"Windows\""}, +] + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -866,6 +920,25 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "portalocker" +version = "2.8.2" +description = "Wraps the portalocker recipe for easy usage" +optional = false +python-versions = ">=3.8" +files = [ + {file = "portalocker-2.8.2-py3-none-any.whl", hash = "sha256:cfb86acc09b9aa7c3b43594e19be1345b9d16af3feb08bf92f23d4dce513a28e"}, + {file = "portalocker-2.8.2.tar.gz", hash = "sha256:2b035aa7828e46c58e9b31390ee1f169b98e1066ab10b9a6a861fe7e25ee4f33"}, +] + +[package.dependencies] +pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} + +[package.extras] +docs = ["sphinx (>=1.7.1)"] +redis = ["redis"] +tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)", "types-redis"] + [[package]] name = "psutil" version = "5.9.6" @@ -938,6 +1011,26 @@ files = [ {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, ] +[[package]] +name = "pyjwt" +version = "2.8.0" +description = "JSON Web Token implementation in Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, + {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, +] + +[package.dependencies] +cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"crypto\""} + +[package.extras] +crypto = ["cryptography (>=3.4.0)"] +dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] + [[package]] name = "pyreadline3" version = "3.4.1" @@ -1518,4 +1611,4 @@ pyyaml = ">=6.0,<7.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "08bb4f9cbfd43d4b004bf674ca1348a7e236f8cb72962c7cbc7d21049ae2f81b" +content-hash = "298b4867ac587cd9ae7edb2b4f673f4364ae3e48941dbd261a2160106c2c6c53" diff --git a/pyproject.toml b/pyproject.toml index 195cc50..c4da767 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ snakemake-interface-common = "^1.14.2" snakemake-interface-storage-plugins = "^1.2.3" azure-storage-blob = "^12.19.0" azure-core = "^1.29.5" +azure-identity = "^1.15.0" [tool.poetry.group.dev.dependencies] diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index f1f6ef8..192f9a4 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -1,4 +1,5 @@ from dataclasses import dataclass, field +import re from azure.storage.blob import BlobServiceClient from typing import Any, Iterable, Optional from snakemake_interface_storage_plugins.settings import StorageProviderSettingsBase @@ -15,6 +16,23 @@ from snakemake_interface_storage_plugins.io import IOCacheStorageInterface +def is_valid_azure_storage_blob_endpoint(endpoint_url: str) -> bool: + """ + Validates if the blob account endpoint is a valid Azure Storage Account URL. + + Args: + blob_account_url (str): The name of the environment variable. + + Returns: + bool: True if the environment variable is a valid Azure Storage Account URL. + """ + url_pattern = re.compile( + r"^https:\/\/[a-z0-9]+(\.[a-z0-9]+)*\.blob\.core\.windows\.net\/?(.+)?$" + ) + + return bool(url_pattern.match(endpoint_url)) + + # Optional: # Define settings for your storage plugin (e.g. host url, credentials). # They will occur in the Snakemake CLI as --storage-- @@ -53,7 +71,10 @@ class StorageProviderSettings(StorageProviderSettingsBase): access_key: Optional[str] = field( default=None, metadata={ - "help": "Azure Blob Storage Account Access Key Credential", + "help": ( + "Azure Blob Storage Account Access Key Credential.", + "If set, takes precedence over sas_token credential.", + ), "env_var": False, }, ) @@ -65,6 +86,18 @@ class StorageProviderSettings(StorageProviderSettingsBase): }, ) + def __post_init__(self): + if not is_valid_azure_storage_blob_endpoint(self.endpoint_url): + raise ValueError( + f"Invalid Azure Storage Blob Endpoint URL: {self.endpoint_url}" + ) + + self.credential = None + if self.access_key: + self.credential = self.access_key + elif self.sas_token: + self.credential = self.sas_token + # Required: # Implementation of your storage provider From e531e3f447bbed48cd806c89e1d0ff9c6cb10d07 Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 10:02:22 -0800 Subject: [PATCH 03/34] list objects --- snakemake_storage_plugin_az/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 192f9a4..55ee46b 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -130,7 +130,11 @@ def list_objects(self, query: Any) -> Iterable[str]: This is optional and can raise a NotImplementedError() instead. """ - ... + + # parse container name from query + container_name = query.split("/")[0] + cc = self.blob_service_client.get_container_client(container_name) + return cc.list_blob_names() # Required: From dd06e24c6516fcc602fe748a90d512d2f7c82ecc Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 10:08:32 -0800 Subject: [PATCH 04/34] func doc --- snakemake_storage_plugin_az/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 55ee46b..5733cae 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -16,15 +16,15 @@ from snakemake_interface_storage_plugins.io import IOCacheStorageInterface -def is_valid_azure_storage_blob_endpoint(endpoint_url: str) -> bool: +def is_valid_azure_blob_endpoint(endpoint_url: str) -> bool: """ - Validates if the blob account endpoint is a valid Azure Storage Account URL. + Validates the Azure Blob endpoint pattern. Args: - blob_account_url (str): The name of the environment variable. + endpoint_url (str): The name of the Azure Blob Storage Account endpoint Returns: - bool: True if the environment variable is a valid Azure Storage Account URL. + bool: True if the endpoint_url is a valid Azure Blob endpoint. """ url_pattern = re.compile( r"^https:\/\/[a-z0-9]+(\.[a-z0-9]+)*\.blob\.core\.windows\.net\/?(.+)?$" From 159ebc7915c243c822e846cd481c9dcdfd284dbf Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 10:08:50 -0800 Subject: [PATCH 05/34] func name --- snakemake_storage_plugin_az/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 5733cae..6c4e55f 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -87,7 +87,7 @@ class StorageProviderSettings(StorageProviderSettingsBase): ) def __post_init__(self): - if not is_valid_azure_storage_blob_endpoint(self.endpoint_url): + if not is_valid_azure_blob_endpoint(self.endpoint_url): raise ValueError( f"Invalid Azure Storage Blob Endpoint URL: {self.endpoint_url}" ) From 57d3346dd3f20559c713865f0f6e8a4b36b2c02f Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 10:10:33 -0800 Subject: [PATCH 06/34] query validation --- snakemake_storage_plugin_az/__init__.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 6c4e55f..c789784 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -1,5 +1,6 @@ from dataclasses import dataclass, field import re +from urllib.parse import urlparse from azure.storage.blob import BlobServiceClient from typing import Any, Iterable, Optional from snakemake_interface_storage_plugins.settings import StorageProviderSettingsBase @@ -123,7 +124,24 @@ def is_valid_query(cls, query: str) -> StorageQueryValidationResult: # Ensure that also queries containing wildcards (e.g. {sample}) are accepted # and considered valid. The wildcards will be resolved before the storage # object is actually used. - ... + try: + parsed = urlparse(query) + except Exception as e: + return StorageQueryValidationResult( + query=query, + valid=False, + reason=f"cannot be parsed as URL ({e})", + ) + if parsed.scheme != "az": + return StorageQueryValidationResult( + query=query, + valid=False, + reason="must start with az (az://...)", + ) + return StorageQueryValidationResult( + query=query, + valid=True, + ) def list_objects(self, query: Any) -> Iterable[str]: """Return an iterator over all objects in the storage that match the query. From 26062e7b05dfd663f86c91c34e5d7506f12b59cb Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 10:16:41 -0800 Subject: [PATCH 07/34] urlparse container name --- snakemake_storage_plugin_az/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index c789784..42166a0 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -150,7 +150,8 @@ def list_objects(self, query: Any) -> Iterable[str]: """ # parse container name from query - container_name = query.split("/")[0] + parsed = urlparse(query) + container_name = parsed.netloc cc = self.blob_service_client.get_container_client(container_name) return cc.list_blob_names() From 21498e93d2b60d16c5c4ec979c1129dbaf2ba59a Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 10:31:47 -0800 Subject: [PATCH 08/34] container exists --- snakemake_storage_plugin_az/__init__.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 42166a0..1348174 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -242,3 +242,13 @@ def list_candidate_matches(self) -> Iterable[str]: # This is used by glob_wildcards() to find matches for wildcards in the query. # The method has to return concretized queries without any remaining wildcards. ... + + def container_exists(self) -> bool: + """Returns True if container exists, False otherwise.""" + try: + container_name = urlparse(self.query).netloc + return self.provider.blob_service_client.get_container_client( + container_name + ) + except Exception: + return False From aa2e27d3374260cab4580d9affb5ad57fa985e4a Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 11:00:11 -0800 Subject: [PATCH 09/34] urlparse --- snakemake_storage_plugin_az/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 1348174..6b37aab 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -153,7 +153,7 @@ def list_objects(self, query: Any) -> Iterable[str]: parsed = urlparse(query) container_name = parsed.netloc cc = self.blob_service_client.get_container_client(container_name) - return cc.list_blob_names() + return [o for o in cc.list_blob_names()] # Required: @@ -170,7 +170,10 @@ def __post_init__(self): # This is optional and can be removed if not needed. # Alternatively, you can e.g. prepare a connection to your storage backend here. # and set additional attributes. - pass + if self.is_valid_query(): + parsed = urlparse(self.query) + self.container_name = parsed.netloc + self.path = parsed.path.lstrip("/") async def inventory(self, cache: IOCacheStorageInterface): """From this file, try to find as much existence and modification date From 5a0624f204162c12ca63770839c963acf9a11531 Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 11:13:07 -0800 Subject: [PATCH 10/34] bump --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 1864302..5f5d8d1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1401,13 +1401,13 @@ ConfigArgParse = ">=1.7,<2.0" [[package]] name = "snakemake-interface-storage-plugins" -version = "1.2.3" +version = "1.3.1" description = "This package provides a stable interface for interactions between Snakemake and its storage plugins." optional = false python-versions = ">=3.11,<4.0" files = [ - {file = "snakemake_interface_storage_plugins-1.2.3-py3-none-any.whl", hash = "sha256:2c9287a2c4a950f81ec832b5749c103fb5cbb9e6eabe120c7e675114a23a20f2"}, - {file = "snakemake_interface_storage_plugins-1.2.3.tar.gz", hash = "sha256:373ea2b22db2840fecb509818c4f0e862ef7c0412264f70dc720b2cf984671ef"}, + {file = "snakemake_interface_storage_plugins-1.3.1-py3-none-any.whl", hash = "sha256:57c07e3ab56124a2795d33461e170f1a38caaea54fd451e1be5ad5e0c6ecd51c"}, + {file = "snakemake_interface_storage_plugins-1.3.1.tar.gz", hash = "sha256:bf117e5955ba633f4d739fc8b4a7e6a4ae77d956d8550d3c3763c9bd9af9612d"}, ] [package.dependencies] From 44277e2ba5ef44ff305b85e4cd1100b3db35d3bd Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 11:37:29 -0800 Subject: [PATCH 11/34] local suffix --- snakemake_storage_plugin_az/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 6b37aab..37aba71 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -193,7 +193,7 @@ def get_inventory_parent(self) -> Optional[str]: def local_suffix(self) -> str: """Return a unique suffix for the local path, determined from self.query.""" - ... + return f"{self.container_name}/{self.path}" def close(self): # Close any open connections, unmount stuff, etc. From 78c421136480e2ec7ed428ccb600e6d50d7d78bd Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 11:44:31 -0800 Subject: [PATCH 12/34] cache_key --- snakemake_storage_plugin_az/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 37aba71..fe7a463 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -189,7 +189,7 @@ async def inventory(self, cache: IOCacheStorageInterface): def get_inventory_parent(self) -> Optional[str]: """Return the parent directory of this object.""" # this is optional and can be left as is - return None + return self.cache_key(self.container_name) def local_suffix(self) -> str: """Return a unique suffix for the local path, determined from self.query.""" From 9c3016cfe66e879270ff567354ff662fd13eb6c1 Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 12:24:53 -0800 Subject: [PATCH 13/34] exists --- snakemake_storage_plugin_az/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index fe7a463..8e5d592 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -205,7 +205,7 @@ def close(self): @retry_decorator def exists(self) -> bool: # return True if the object exists - ... + return self.provider.blob_service_client.get_blob_client(self.path).exists() @retry_decorator def mtime(self) -> float: From 3411ea56a16fa70bf28532f141201fde25f63343 Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 12:38:55 -0800 Subject: [PATCH 14/34] properties --- snakemake_storage_plugin_az/__init__.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 8e5d592..a5a0715 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field import re from urllib.parse import urlparse -from azure.storage.blob import BlobServiceClient +from azure.storage.blob import BlobServiceClient, ContainerClient, BlobClient from typing import Any, Iterable, Optional from snakemake_interface_storage_plugins.settings import StorageProviderSettingsBase from snakemake_interface_storage_plugins.storage_provider import ( @@ -114,7 +114,7 @@ def __post_init__(self): # This is optional and can be removed if not needed. # Alternatively, you can e.g. prepare a connection to your storage backend here. # and set additional attributes. - self.blob_service_client = BlobServiceClient( + self.bsc = BlobServiceClient( self.settings.endpoint_url, credential=self.settings.credential ) @@ -152,7 +152,7 @@ def list_objects(self, query: Any) -> Iterable[str]: # parse container name from query parsed = urlparse(query) container_name = parsed.netloc - cc = self.blob_service_client.get_container_client(container_name) + cc = self.bsc.get_container_client(container_name) return [o for o in cc.list_blob_names()] @@ -175,6 +175,14 @@ def __post_init__(self): self.container_name = parsed.netloc self.path = parsed.path.lstrip("/") + # container client + self.cc: ContainerClient = self.provider.bsc.get_container_client( + self.container_name + ) + + # blob client + self.bc: BlobClient = self.cc.get_blob_client(self.path) + async def inventory(self, cache: IOCacheStorageInterface): """From this file, try to find as much existence and modification date information as possible. Only retrieve that information that comes for free @@ -205,17 +213,17 @@ def close(self): @retry_decorator def exists(self) -> bool: # return True if the object exists - return self.provider.blob_service_client.get_blob_client(self.path).exists() + return self.bc.exists() @retry_decorator def mtime(self) -> float: # return the modification time - ... + return self.bc.get_blob_properties().last_modified.timestamp() @retry_decorator def size(self) -> int: # return the size in bytes - ... + return self.bc.get_blob_properties().size @retry_decorator def retrieve_object(self): @@ -250,8 +258,6 @@ def container_exists(self) -> bool: """Returns True if container exists, False otherwise.""" try: container_name = urlparse(self.query).netloc - return self.provider.blob_service_client.get_container_client( - container_name - ) + return self.provider.bsc.get_container_client(container_name) except Exception: return False From 29a3d1c9e62150e39af6b37d0f14ee44d385ba39 Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 13:09:24 -0800 Subject: [PATCH 15/34] bump --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 5f5d8d1..bfb7717 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1401,13 +1401,13 @@ ConfigArgParse = ">=1.7,<2.0" [[package]] name = "snakemake-interface-storage-plugins" -version = "1.3.1" +version = "2.0.1" description = "This package provides a stable interface for interactions between Snakemake and its storage plugins." optional = false python-versions = ">=3.11,<4.0" files = [ - {file = "snakemake_interface_storage_plugins-1.3.1-py3-none-any.whl", hash = "sha256:57c07e3ab56124a2795d33461e170f1a38caaea54fd451e1be5ad5e0c6ecd51c"}, - {file = "snakemake_interface_storage_plugins-1.3.1.tar.gz", hash = "sha256:bf117e5955ba633f4d739fc8b4a7e6a4ae77d956d8550d3c3763c9bd9af9612d"}, + {file = "snakemake_interface_storage_plugins-2.0.1-py3-none-any.whl", hash = "sha256:52cc8dea37fe6a2ba5f07b0ce96b370eb8e6a48e295fbcdc27eee0fc5212409b"}, + {file = "snakemake_interface_storage_plugins-2.0.1.tar.gz", hash = "sha256:cd6c4b3018dabf672e9779799fefbf2fda38c96e701dd53004e1cafa847c0ba6"}, ] [package.dependencies] @@ -1611,4 +1611,4 @@ pyyaml = ">=6.0,<7.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "298b4867ac587cd9ae7edb2b4f673f4364ae3e48941dbd261a2160106c2c6c53" +content-hash = "0f9d6f1ce307fb63620d0a019751fe5d71b12edcce1dbbb7586779ca10d18868" diff --git a/pyproject.toml b/pyproject.toml index c4da767..9b94ce1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.11" snakemake-interface-common = "^1.14.2" -snakemake-interface-storage-plugins = "^1.2.3" +snakemake-interface-storage-plugins = "2.0.1" azure-storage-blob = "^12.19.0" azure-core = "^1.29.5" azure-identity = "^1.15.0" From 76d010c09bd2482b5ee29d2a352c4756b7ce4332 Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 13:25:53 -0800 Subject: [PATCH 16/34] tests --- tests/tests.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/tests.py b/tests/tests.py index 9d040f7..45111b4 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1,20 +1,27 @@ +import uuid from typing import Optional, Type from snakemake_interface_storage_plugins.tests import TestStorageBase from snakemake_interface_storage_plugins.storage_provider import StorageProviderBase from snakemake_interface_storage_plugins.settings import StorageProviderSettingsBase +from snakemake_storage_plugin_az import StorageProvider, StorageProviderSettings class TestStorageNoSettings(TestStorageBase): __test__ = True retrieve_only = True + def get_query_not_existing(self) -> str: + container = uuid.uuid4().hex + path = uuid.uuid4().hex + return f"az://{container}/{path}" + def get_query(self) -> str: - ... + return "az://container/path/test.txt" def get_storage_provider_cls(self) -> Type[StorageProviderBase]: # Return the StorageProvider class of this plugin - ... + StorageProvider def get_storage_provider_settings(self) -> Optional[StorageProviderSettingsBase]: # instantiate StorageProviderSettings of this plugin as appropriate - ... + return StorageProviderSettings(endpoint_url="", access_key="") From a0197b07c76d96464eaa2ae0698c2e1c962929e6 Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 13:30:16 -0800 Subject: [PATCH 17/34] tmp_path --- tests/tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tests.py b/tests/tests.py index 45111b4..8ad3910 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -10,12 +10,12 @@ class TestStorageNoSettings(TestStorageBase): __test__ = True retrieve_only = True - def get_query_not_existing(self) -> str: + def get_query_not_existing(self, tmp_path) -> str: container = uuid.uuid4().hex path = uuid.uuid4().hex return f"az://{container}/{path}" - def get_query(self) -> str: + def get_query(self, tmp_path) -> str: return "az://container/path/test.txt" def get_storage_provider_cls(self) -> Type[StorageProviderBase]: From 68cf6a8a9b45b973bde7d3d6d0b6a54f1f99496d Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 13:37:42 -0800 Subject: [PATCH 18/34] tests --- tests/tests.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/tests.py b/tests/tests.py index 8ad3910..26d15e0 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -24,4 +24,7 @@ def get_storage_provider_cls(self) -> Type[StorageProviderBase]: def get_storage_provider_settings(self) -> Optional[StorageProviderSettingsBase]: # instantiate StorageProviderSettings of this plugin as appropriate - return StorageProviderSettings(endpoint_url="", access_key="") + # public dataset storage account and public sas token + ep = "https://datasetreferencegenomes.blob.core.windows.net/dataset" + sas = "sv=2019-02-02&se=2050-01-01T08%3A00%3A00Z&si=prod&sr=c&sig=JtQoPFqiC24GiEB7v9zHLi4RrA2Kd1r%2F3iFt2l9%2FlV8%3D" + return StorageProviderSettings(endpoint_url=ep, sas_token=sas) From 6b1ad5436e2677386eba72882957b9bdfcec90fb Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 13:38:25 -0800 Subject: [PATCH 19/34] compact --- tests/tests.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/tests.py b/tests/tests.py index 26d15e0..fa3165b 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -26,5 +26,8 @@ def get_storage_provider_settings(self) -> Optional[StorageProviderSettingsBase] # instantiate StorageProviderSettings of this plugin as appropriate # public dataset storage account and public sas token ep = "https://datasetreferencegenomes.blob.core.windows.net/dataset" - sas = "sv=2019-02-02&se=2050-01-01T08%3A00%3A00Z&si=prod&sr=c&sig=JtQoPFqiC24GiEB7v9zHLi4RrA2Kd1r%2F3iFt2l9%2FlV8%3D" + sas = ( + "sv=2019-02-02&se=2050-01-01T08%3A00%3A00Z&", + "si=prod&sr=c&sig=JtQoPFqiC24GiEB7v9zHLi4RrA2Kd1r%2F3iFt2l9%2FlV8%3D", + ) return StorageProviderSettings(endpoint_url=ep, sas_token=sas) From 12c862e9a1adf4b311a95095dcf48e076fa596b1 Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 13:40:33 -0800 Subject: [PATCH 20/34] ref --- tests/tests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/tests.py b/tests/tests.py index fa3165b..d765152 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -24,7 +24,8 @@ def get_storage_provider_cls(self) -> Type[StorageProviderBase]: def get_storage_provider_settings(self) -> Optional[StorageProviderSettingsBase]: # instantiate StorageProviderSettings of this plugin as appropriate - # public dataset storage account and public sas token + # public dataset storage account and public sas token: + # https://learn.microsoft.com/en-us/azure/open-datasets/dataset-genomics-data-lake ep = "https://datasetreferencegenomes.blob.core.windows.net/dataset" sas = ( "sv=2019-02-02&se=2050-01-01T08%3A00%3A00Z&", From b1308d1fb50e04012e9983e713d10dd75c9c797b Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 15:07:43 -0800 Subject: [PATCH 21/34] gitignore, tests --- .gitignore | 163 ++++++++++++++++++++++++ snakemake_storage_plugin_az/__init__.py | 32 ++++- tests/tests.py | 11 +- 3 files changed, 200 insertions(+), 6 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..796df40 --- /dev/null +++ b/.gitignore @@ -0,0 +1,163 @@ +poetry.lock +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +poetry.lock \ No newline at end of file diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index a5a0715..74282dc 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -7,7 +7,9 @@ from snakemake_interface_storage_plugins.storage_provider import ( StorageProviderBase, StorageQueryValidationResult, + ExampleQuery, ) +from snakemake_interface_storage_plugins.common import Operation from snakemake_interface_storage_plugins.storage_object import ( StorageObjectRead, StorageObjectWrite, @@ -118,6 +120,32 @@ def __post_init__(self): self.settings.endpoint_url, credential=self.settings.credential ) + def use_rate_limiter(self) -> bool: + """Return False if no rate limiting is needed for this provider.""" + return False + + def default_max_requests_per_second(self) -> float: + """Return the default maximum number of requests per second for this storage + provider.""" + ... + + def rate_limiter_key(self, query: str, operation: Operation): + """Return a key for identifying a rate limiter given a query and an operation. + + This is used to identify a rate limiter for the query. + E.g. for a storage provider like http that would be the host name. + For s3 it might be just the endpoint URL. + """ + ... + + @classmethod + def example_query(cls) -> ExampleQuery: + """Return an example query with description for this storage provider.""" + return ExampleQuery( + query="az://container/path/example/file.txt", + description="A file in an Azure Blob Storage Container", + ) + @classmethod def is_valid_query(cls, query: str) -> StorageQueryValidationResult: """Return whether the given query is valid for this storage provider.""" @@ -203,9 +231,9 @@ def local_suffix(self) -> str: """Return a unique suffix for the local path, determined from self.query.""" return f"{self.container_name}/{self.path}" - def close(self): + def cleanup(self): # Close any open connections, unmount stuff, etc. - ... + pass # Fallible methods should implement some retry logic. # The easiest way to do this (but not the only one) is to use the retry_decorator diff --git a/tests/tests.py b/tests/tests.py index d765152..73a6ca3 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1,5 +1,5 @@ import uuid -from typing import Optional, Type +from typing import List, Optional, Type from snakemake_interface_storage_plugins.tests import TestStorageBase from snakemake_interface_storage_plugins.storage_provider import StorageProviderBase from snakemake_interface_storage_plugins.settings import StorageProviderSettingsBase @@ -20,7 +20,7 @@ def get_query(self, tmp_path) -> str: def get_storage_provider_cls(self) -> Type[StorageProviderBase]: # Return the StorageProvider class of this plugin - StorageProvider + return StorageProvider def get_storage_provider_settings(self) -> Optional[StorageProviderSettingsBase]: # instantiate StorageProviderSettings of this plugin as appropriate @@ -28,7 +28,10 @@ def get_storage_provider_settings(self) -> Optional[StorageProviderSettingsBase] # https://learn.microsoft.com/en-us/azure/open-datasets/dataset-genomics-data-lake ep = "https://datasetreferencegenomes.blob.core.windows.net/dataset" sas = ( - "sv=2019-02-02&se=2050-01-01T08%3A00%3A00Z&", - "si=prod&sr=c&sig=JtQoPFqiC24GiEB7v9zHLi4RrA2Kd1r%2F3iFt2l9%2FlV8%3D", + "sv=2019-02-02&se=2050-01-01T08%3A00%3A00Z&" + "si=prod&sr=c&sig=JtQoPFqiC24GiEB7v9zHLi4RrA2Kd1r%2F3iFt2l9%2FlV8%3D" ) return StorageProviderSettings(endpoint_url=ep, sas_token=sas) + + def get_example_args(self) -> List[str]: + return [] From 2a16f1fca1deae3d68e5f83b6bb14cb0c956e091 Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 22:49:01 -0800 Subject: [PATCH 22/34] tests, azurite mock backend --- .github/workflows/ci.yml | 6 + .vscode/settings.json | 7 + snakemake_storage_plugin_az/__init__.py | 182 +++++++++++++++++++----- tests/conftest.py | 45 ++++++ tests/tests.py | 27 ++-- 5 files changed, 222 insertions(+), 45 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 tests/conftest.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e94c6e0..a33de7d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -72,6 +72,12 @@ jobs: - uses: actions/setup-python@v4 with: python-version: "${{ env.PYTHON_VERSION }}" + + - name: Install Azurite + id: azuright + uses: potatoqualitee/azuright@v1.1 + with: + self-signed-cert: true - name: Install poetry run: pipx install poetry diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..30d653b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "[python]": { + "editor.codeActionsOnSave": { + "source.organizeImports": true + } + }, +} \ No newline at end of file diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 74282dc..01f162a 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -1,22 +1,26 @@ -from dataclasses import dataclass, field import re -from urllib.parse import urlparse -from azure.storage.blob import BlobServiceClient, ContainerClient, BlobClient +from dataclasses import dataclass, field +from pathlib import PosixPath from typing import Any, Iterable, Optional -from snakemake_interface_storage_plugins.settings import StorageProviderSettingsBase -from snakemake_interface_storage_plugins.storage_provider import ( - StorageProviderBase, - StorageQueryValidationResult, - ExampleQuery, -) +from urllib.parse import unquote, urlparse + +from azure.core.credentials import AzureSasCredential +from azure.core.exceptions import HttpResponseError +from azure.storage.blob import BlobClient, BlobServiceClient, ContainerClient from snakemake_interface_storage_plugins.common import Operation +from snakemake_interface_storage_plugins.io import IOCacheStorageInterface +from snakemake_interface_storage_plugins.settings import StorageProviderSettingsBase from snakemake_interface_storage_plugins.storage_object import ( + StorageObjectGlob, StorageObjectRead, StorageObjectWrite, - StorageObjectGlob, retry_decorator, ) -from snakemake_interface_storage_plugins.io import IOCacheStorageInterface +from snakemake_interface_storage_plugins.storage_provider import ( + ExampleQuery, + StorageProviderBase, + StorageQueryValidationResult, +) def is_valid_azure_blob_endpoint(endpoint_url: str) -> bool: @@ -32,8 +36,11 @@ def is_valid_azure_blob_endpoint(endpoint_url: str) -> bool: url_pattern = re.compile( r"^https:\/\/[a-z0-9]+(\.[a-z0-9]+)*\.blob\.core\.windows\.net\/?(.+)?$" ) + mock_pattern = re.compile(r"^http://127\.0\.0\.1:10000/[a-zA-Z0-9]+$") - return bool(url_pattern.match(endpoint_url)) + return bool(url_pattern.match(endpoint_url)) or bool( + mock_pattern.match(endpoint_url) + ) # Optional: @@ -89,17 +96,38 @@ class StorageProviderSettings(StorageProviderSettingsBase): }, ) + def endpoint_url_is_mock(self): + """Returns true if endpoint url is mock pattern""" + mock_pattern = re.compile(r"^http://127\.0\.0\.1:10000/[a-zA-Z0-9]+$") + return mock_pattern.match(self.endpoint_url) + + def set_storage_account_name(self): + """Sets the storage account name""" + try: + if self.endpoint_url_is_mock: + parsed = urlparse(self.endpoint_url) + self.storage_account_name = parsed.path.lstrip("/") + else: + parsed = urlparse(self.endpoint_url) + account_name = parsed.netloc + if account_name != "": + self.storage_account_name = account_name.split(".")[0] + except Exception as e: + raise ValueError(f"unable to set storage account name: {e}") + def __post_init__(self): if not is_valid_azure_blob_endpoint(self.endpoint_url): raise ValueError( - f"Invalid Azure Storage Blob Endpoint URL: {self.endpoint_url}" + f"invalid Azure Storage Blob Endpoint URL: {self.endpoint_url}" ) + self.set_storage_account_name() + self.credential = None if self.access_key: self.credential = self.access_key elif self.sas_token: - self.credential = self.sas_token + self.credential = AzureSasCredential(self.sas_token) # Required: @@ -142,8 +170,8 @@ def rate_limiter_key(self, query: str, operation: Operation): def example_query(cls) -> ExampleQuery: """Return an example query with description for this storage provider.""" return ExampleQuery( - query="az://container/path/example/file.txt", - description="A file in an Azure Blob Storage Container", + query="az://account/container/path/example/file.txt", + description="A file in an Azure Blob Storage Account Container", ) @classmethod @@ -166,11 +194,41 @@ def is_valid_query(cls, query: str) -> StorageQueryValidationResult: valid=False, reason="must start with az (az://...)", ) + if not parsed.netloc.isalnum: + return StorageQueryValidationResult( + query=query, + valid=False, + reason="azure storage account name must be strictly alphanumeric", + ) return StorageQueryValidationResult( query=query, valid=True, ) + def parse_query_parts(self, query: str) -> (str, str, Optional[str]): + """Parses query parts for the provider""" + try: + parsed = urlparse(query) + account = parsed.netloc + + path_parts = PosixPath(unquote(parsed.path)).parts + + container = "" + if len(path_parts) > 2: + container = path_parts[1] + + bpath = "/".join(path_parts[2:]) + + except Exception as e: + raise ValueError(f"unable to parse query parts: {path_parts}, {e}") + + return account, container, bpath + + def get_container_name(self, query: str) -> str: + """Returns the container name from query.""" + _, c, _ = self.parse_query_parts(query) + return c + def list_objects(self, query: Any) -> Iterable[str]: """Return an iterator over all objects in the storage that match the query. @@ -178,9 +236,7 @@ def list_objects(self, query: Any) -> Iterable[str]: """ # parse container name from query - parsed = urlparse(query) - container_name = parsed.netloc - cc = self.bsc.get_container_client(container_name) + cc = self.bsc.get_container_client(self.get_container_name()) return [o for o in cc.list_blob_names()] @@ -199,17 +255,44 @@ def __post_init__(self): # Alternatively, you can e.g. prepare a connection to your storage backend here. # and set additional attributes. if self.is_valid_query(): - parsed = urlparse(self.query) - self.container_name = parsed.netloc - self.path = parsed.path.lstrip("/") - - # container client - self.cc: ContainerClient = self.provider.bsc.get_container_client( + ( + self.account_name, + self.container_name, + self.blob_path, + ) = self.provider.parse_query_parts(self.query) + self._local_suffix = self._local_suffix_from_key(self.blob_path) + + if self.account_name != self.provider.settings.storage_account_name: + raise ValueError( + f"query account name: {self.account_name} must " + "match that from endpoint url: " + f"{self.provider.settings.storage_account_name}" + ) + + def container(self): + # initialize container client + try: + cc: ContainerClient = self.provider.bsc.get_container_client( self.container_name ) + except Exception as e: + raise ConnectionError( + "failed to initialize ContainerClient for container:" + f" {self.container_name}: {e}" + ) + return cc - # blob client - self.bc: BlobClient = self.cc.get_blob_client(self.path) + def blob(self): + # initialize blob + try: + bc: BlobClient = self.provider.bsc.get_container_client( + self.container_name + ).get_blob_client(self.blob_path) + except Exception as e: + raise ConnectionError( + f"failed to initialize BlobClient for blob:" f" {self.blob_path}: {e}" + ) + return bc async def inventory(self, cache: IOCacheStorageInterface): """From this file, try to find as much existence and modification date @@ -220,7 +303,21 @@ async def inventory(self, cache: IOCacheStorageInterface): # If this is implemented in a storage object, results have to be stored in # the given IOCache object. - pass + + if self.get_inventory_parent(): + # found + return + + # bucket exists + if not self.container_exists(): + cache.exists_in_storage[self.cache_key] = False + else: + cache.exists_in_storage[self.cache_key] = True + for o in self.container().list_blobs(): + key = self.cache_key(self._local_suffix_from_key(o.name)) + cache.mtime[key] = o.last_modified.timestamp() + cache.size[key] = o.size + cache.exists_in_storage[key] = True def get_inventory_parent(self) -> Optional[str]: """Return the parent directory of this object.""" @@ -229,7 +326,10 @@ def get_inventory_parent(self) -> Optional[str]: def local_suffix(self) -> str: """Return a unique suffix for the local path, determined from self.query.""" - return f"{self.container_name}/{self.path}" + return self._local_suffix + + def _local_suffix_from_key(self, key: str) -> str: + return f"{self.container_name}/{key}" def cleanup(self): # Close any open connections, unmount stuff, etc. @@ -241,17 +341,20 @@ def cleanup(self): @retry_decorator def exists(self) -> bool: # return True if the object exists - return self.bc.exists() + if not self.container_exists(): + return False + else: + return self.blob().exists() @retry_decorator def mtime(self) -> float: # return the modification time - return self.bc.get_blob_properties().last_modified.timestamp() + return self.blob().get_blob_properties().last_modified.timestamp() @retry_decorator def size(self) -> int: # return the size in bytes - return self.bc.get_blob_properties().size + return self.blob().get_blob_properties().size @retry_decorator def retrieve_object(self): @@ -285,7 +388,14 @@ def list_candidate_matches(self) -> Iterable[str]: def container_exists(self) -> bool: """Returns True if container exists, False otherwise.""" try: - container_name = urlparse(self.query).netloc - return self.provider.bsc.get_container_client(container_name) - except Exception: - return False + return self.container().exists() + except HttpResponseError as e: + if e.status_code == 403: + raise PermissionError( + "the provided credential does not have permission to list " + "containers on this storage account" + ) + else: + raise e + except Exception as e: + raise e diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..7c229c7 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,45 @@ +# configures the use of azurite mock storage backend +from azure.core.exceptions import HttpResponseError +from azure.storage.blob import BlobClient, BlobServiceClient + +AZURITE_STORAGE_ACCOUNT = "devstoreaccount1" +AZURITE_TEST_CONTAINER = "test-container" +AZURITE_TEST_BLOB = "example/test.txt" + +AZURITE_MOCK_KEY = ( + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/" + "K1SZFPTOtr/KBHBeksoGMGw==" +) + +AZURITE_MOCK_ENDPOINT = f"http://127.0.0.1:10000/{AZURITE_STORAGE_ACCOUNT}" + +AZURITE_CONNECTION_STRING = ( + "DefaultEndpointsProtocol=http;" + f"AccountName={AZURITE_STORAGE_ACCOUNT};" + f"AccountKey={AZURITE_MOCK_KEY};" + f"BlobEndpoint=http://127.0.0.1:10000/{AZURITE_STORAGE_ACCOUNT};" +) + + +# bootstrap azurite storage backend for tests +def pytest_generate_tests(metafunc): + blob_service_client = BlobServiceClient.from_connection_string( + AZURITE_CONNECTION_STRING + ) + try: + blob_service_client.create_container(AZURITE_TEST_CONTAINER) + except HttpResponseError as e: + # continue if container exists + if e.status_code == 409: + pass + except Exception as e: + raise e + + # create a test blob with azurite + bc: BlobClient = blob_service_client.get_blob_client( + AZURITE_TEST_CONTAINER, AZURITE_TEST_BLOB + ) + try: + bc.upload_blob("Hello, World", overwrite=True) + except Exception as e: + raise e diff --git a/tests/tests.py b/tests/tests.py index 73a6ca3..33d5126 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1,8 +1,17 @@ import uuid from typing import List, Optional, Type -from snakemake_interface_storage_plugins.tests import TestStorageBase -from snakemake_interface_storage_plugins.storage_provider import StorageProviderBase + +from conftest import ( + AZURITE_MOCK_ENDPOINT, + AZURITE_MOCK_KEY, + AZURITE_STORAGE_ACCOUNT, + AZURITE_TEST_BLOB, + AZURITE_TEST_CONTAINER, +) from snakemake_interface_storage_plugins.settings import StorageProviderSettingsBase +from snakemake_interface_storage_plugins.storage_provider import StorageProviderBase +from snakemake_interface_storage_plugins.tests import TestStorageBase + from snakemake_storage_plugin_az import StorageProvider, StorageProviderSettings @@ -13,10 +22,13 @@ class TestStorageNoSettings(TestStorageBase): def get_query_not_existing(self, tmp_path) -> str: container = uuid.uuid4().hex path = uuid.uuid4().hex - return f"az://{container}/{path}" + return f"az://{AZURITE_STORAGE_ACCOUNT}/{container}/{path}" def get_query(self, tmp_path) -> str: - return "az://container/path/test.txt" + return ( + f"az://{AZURITE_STORAGE_ACCOUNT}/{AZURITE_TEST_CONTAINER}/" + f"{AZURITE_TEST_BLOB}" + ) def get_storage_provider_cls(self) -> Type[StorageProviderBase]: # Return the StorageProvider class of this plugin @@ -26,12 +38,9 @@ def get_storage_provider_settings(self) -> Optional[StorageProviderSettingsBase] # instantiate StorageProviderSettings of this plugin as appropriate # public dataset storage account and public sas token: # https://learn.microsoft.com/en-us/azure/open-datasets/dataset-genomics-data-lake - ep = "https://datasetreferencegenomes.blob.core.windows.net/dataset" - sas = ( - "sv=2019-02-02&se=2050-01-01T08%3A00%3A00Z&" - "si=prod&sr=c&sig=JtQoPFqiC24GiEB7v9zHLi4RrA2Kd1r%2F3iFt2l9%2FlV8%3D" + return StorageProviderSettings( + endpoint_url=AZURITE_MOCK_ENDPOINT, access_key=AZURITE_MOCK_KEY ) - return StorageProviderSettings(endpoint_url=ep, sas_token=sas) def get_example_args(self) -> List[str]: return [] From 72331688d2a9e709eb140de04a9bf33f7a98d534 Mon Sep 17 00:00:00 2001 From: jakevc Date: Thu, 16 Nov 2023 22:55:39 -0800 Subject: [PATCH 23/34] azurite service contianer --- .github/workflows/ci.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a33de7d..000356b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,6 +66,15 @@ jobs: testing: runs-on: ubuntu-latest + + services: + azurite: + image: mcr.microsoft.com/azure-storage/azurite + ports: + - 10000:10000 + - 10001:10001 + - 10002:10002 + steps: - uses: actions/checkout@v3 @@ -73,12 +82,6 @@ jobs: with: python-version: "${{ env.PYTHON_VERSION }}" - - name: Install Azurite - id: azuright - uses: potatoqualitee/azuright@v1.1 - with: - self-signed-cert: true - - name: Install poetry run: pipx install poetry From 62fd2c72f72a96ee097b91a2d4c329e618905150 Mon Sep 17 00:00:00 2001 From: jakevc Date: Fri, 17 Nov 2023 07:48:00 -0800 Subject: [PATCH 24/34] bump --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index bfb7717..7cd8b65 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1386,13 +1386,13 @@ reports = ["pygments"] [[package]] name = "snakemake-interface-common" -version = "1.14.2" +version = "1.14.3" description = "Common functions and classes for Snakemake and its plugins" optional = false python-versions = ">=3.8,<4.0" files = [ - {file = "snakemake_interface_common-1.14.2-py3-none-any.whl", hash = "sha256:31b8542f313602d26a22a379888c9ecde595d887708e9c27a414525051ba4ebf"}, - {file = "snakemake_interface_common-1.14.2.tar.gz", hash = "sha256:377aa7760220f92deade72473cd9409b8715eaa2cd6f76b1965e035a5556ffe2"}, + {file = "snakemake_interface_common-1.14.3-py3-none-any.whl", hash = "sha256:8188aec5d92fc3fcfc5d7e7e12d383c98f721c4273a8230e5fddec4020d7ee3d"}, + {file = "snakemake_interface_common-1.14.3.tar.gz", hash = "sha256:1ea9486d26841c169a85d9955ff9e8a3b720d9d64fda357d70e87b3cd70eeeee"}, ] [package.dependencies] @@ -1611,4 +1611,4 @@ pyyaml = ">=6.0,<7.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "0f9d6f1ce307fb63620d0a019751fe5d71b12edcce1dbbb7586779ca10d18868" +content-hash = "dbb9d1615acbd416b520667f5dd42f112e0daf7ebbd9bd0305ba311e2059858a" diff --git a/pyproject.toml b/pyproject.toml index 9b94ce1..d5d10f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.11" -snakemake-interface-common = "^1.14.2" +snakemake-interface-common = "1.14.3" snakemake-interface-storage-plugins = "2.0.1" azure-storage-blob = "^12.19.0" azure-core = "^1.29.5" From a30b64fb014f6d5907e6ca7fdf05fd56d540108a Mon Sep 17 00:00:00 2001 From: jakevc Date: Fri, 17 Nov 2023 09:39:51 -0800 Subject: [PATCH 25/34] docs, store, remove --- pyproject.toml | 4 +- snakemake_storage_plugin_az/__init__.py | 90 +++++++++++++++++++------ 2 files changed, 73 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d5d10f6..e6e0802 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,8 @@ [tool.poetry] name = "snakemake-storage-plugin-az" version = "0.1.0" -description = "" -authors = ["jakevc "] +description = "A Snakemake storage plugin to read and write from Azure Blob Storage" +authors = ["Jake VanCampen "] readme = "README.md" [tool.poetry.dependencies] diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 01f162a..ae9e12d 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -25,13 +25,17 @@ def is_valid_azure_blob_endpoint(endpoint_url: str) -> bool: """ - Validates the Azure Blob endpoint pattern. + Validates the Azure Blob endpoint. + + Returns True if endpoint_url matches the Azure Blob Storage + endpoint regex or if endpoint_url matches the local + azurite storage emulator endpoint used for testing. Args: - endpoint_url (str): The name of the Azure Blob Storage Account endpoint + endpoint_url (str): The name of the Azure Blob Storage Account endpoint Returns: - bool: True if the endpoint_url is a valid Azure Blob endpoint. + bool: True if the endpoint_url is a valid Azure Blob endpoint. """ url_pattern = re.compile( r"^https:\/\/[a-z0-9]+(\.[a-z0-9]+)*\.blob\.core\.windows\.net\/?(.+)?$" @@ -96,13 +100,26 @@ class StorageProviderSettings(StorageProviderSettingsBase): }, ) - def endpoint_url_is_mock(self): - """Returns true if endpoint url is mock pattern""" + def endpoint_url_is_mock(self) -> bool: + """ + Returns true if endpoint url matches the mock pattern. + + Returns: + bool: True if self.endpoint_url matches mock_pattern, False otherwise + """ mock_pattern = re.compile(r"^http://127\.0\.0\.1:10000/[a-zA-Z0-9]+$") return mock_pattern.match(self.endpoint_url) def set_storage_account_name(self): - """Sets the storage account name""" + """ + Sets the storage account name + + Sets self.storage_account_name by parsing from the endpoint_url. If the endpoint + is the local emulator, the parsing is slightly different. + + Raises: + ValueError: if urlparse fails to parse the endpoint_url or parse the path. + """ try: if self.endpoint_url_is_mock: parsed = urlparse(self.endpoint_url) @@ -176,7 +193,16 @@ def example_query(cls) -> ExampleQuery: @classmethod def is_valid_query(cls, query: str) -> StorageQueryValidationResult: - """Return whether the given query is valid for this storage provider.""" + """ + Return whether the given query is valid for this storage provider. + + Args: + query (str): the storage query string. + + Returns: + StoryQueryValidationResult: the query validation result describes if the + query is valid or not, and if not specifies the reason. + """ # Ensure that also queries containing wildcards (e.g. {sample}) are accepted # and considered valid. The wildcards will be resolved before the storage # object is actually used. @@ -206,7 +232,16 @@ def is_valid_query(cls, query: str) -> StorageQueryValidationResult: ) def parse_query_parts(self, query: str) -> (str, str, Optional[str]): - """Parses query parts for the provider""" + """ + Parses query parts for the provider. + + Args: + query (str): the azure storage query string. + + Returns: + (account: str, container: str, bpath: str): a tuple of the storage details + parsed from the query string. + """ try: parsed = urlparse(query) account = parsed.netloc @@ -225,7 +260,9 @@ def parse_query_parts(self, query: str) -> (str, str, Optional[str]): return account, container, bpath def get_container_name(self, query: str) -> str: - """Returns the container name from query.""" + """ + Returns the container name from query. + """ _, c, _ = self.parse_query_parts(query) return c @@ -234,8 +271,6 @@ def list_objects(self, query: Any) -> Iterable[str]: This is optional and can raise a NotImplementedError() instead. """ - - # parse container name from query cc = self.bsc.get_container_client(self.get_container_name()) return [o for o in cc.list_blob_names()] @@ -270,7 +305,7 @@ def __post_init__(self): ) def container(self): - # initialize container client + """Return initialized ContainerClient""" try: cc: ContainerClient = self.provider.bsc.get_container_client( self.container_name @@ -283,7 +318,7 @@ def container(self): return cc def blob(self): - # initialize blob + """Return initialized BlobClient""" try: bc: BlobClient = self.provider.bsc.get_container_client( self.container_name @@ -340,7 +375,7 @@ def cleanup(self): # provided by snakemake-interface-storage-plugins. @retry_decorator def exists(self) -> bool: - # return True if the object exists + """Return True if the object exists.""" if not self.container_exists(): return False else: @@ -348,12 +383,12 @@ def exists(self) -> bool: @retry_decorator def mtime(self) -> float: - # return the modification time + """Returns the modification time""" return self.blob().get_blob_properties().last_modified.timestamp() @retry_decorator def size(self) -> int: - # return the size in bytes + """Returns the size in bytes""" return self.blob().get_blob_properties().size @retry_decorator @@ -366,14 +401,31 @@ def retrieve_object(self): @retry_decorator def store_object(self): + """ + Stores the local object in cloud storage. + + If the storage container does not exist, it is created. This check creates the + dependency that one must provide a credential with container create permissions. + """ + + if not self.container_exists(): + self.container().create_container(self.container_name) + # Ensure that the object is stored at the location specified by # self.local_path(). - ... + if self.local_path().exists(): + self.upload_blob_to_storage() + + def upload_blob_to_storage(self): + """Uploads the blob to storage, opening a connection and streaming the bytes.""" + with open(self.local_path, "rb") as data: + self.blob().upload_blob(data, overwrite=True) @retry_decorator def remove(self): - # Remove the object from the storage. - ... + """Removes the object from blob storage.""" + if self.blob().exists(): + self.blob().delete_blob() # The following to methods are only required if the class inherits from # StorageObjectGlob. From 1a5a91cfebf2475baf3aea02d031404d0fe6709d Mon Sep 17 00:00:00 2001 From: jakevc Date: Fri, 17 Nov 2023 09:42:38 -0800 Subject: [PATCH 26/34] handle 403 --- snakemake_storage_plugin_az/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index ae9e12d..46187c5 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -408,8 +408,13 @@ def store_object(self): dependency that one must provide a credential with container create permissions. """ - if not self.container_exists(): - self.container().create_container(self.container_name) + try: + if not self.container_exists(): + self.container().create_container(self.container_name) + # pass on container exists exception + except Exception as e: + if e.status_code == 403: + pass # Ensure that the object is stored at the location specified by # self.local_path(). From bab94037b439c48cf5cb452164abf0239bf029a0 Mon Sep 17 00:00:00 2001 From: jakevc Date: Fri, 17 Nov 2023 09:45:15 -0800 Subject: [PATCH 27/34] connection string from mock endpoint --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7c229c7..45ca1a4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,7 +17,7 @@ "DefaultEndpointsProtocol=http;" f"AccountName={AZURITE_STORAGE_ACCOUNT};" f"AccountKey={AZURITE_MOCK_KEY};" - f"BlobEndpoint=http://127.0.0.1:10000/{AZURITE_STORAGE_ACCOUNT};" + f"BlobEndpoint={AZURITE_MOCK_ENDPOINT};" ) From 1059b10b293976c0a2b6706699890be23cb5c0ec Mon Sep 17 00:00:00 2001 From: jakevc Date: Fri, 17 Nov 2023 09:47:18 -0800 Subject: [PATCH 28/34] doc --- snakemake_storage_plugin_az/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 46187c5..1932a96 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -297,6 +297,8 @@ def __post_init__(self): ) = self.provider.parse_query_parts(self.query) self._local_suffix = self._local_suffix_from_key(self.blob_path) + # check the storage account parsed form the endpoint_url + # matches that parsed from the query if self.account_name != self.provider.settings.storage_account_name: raise ValueError( f"query account name: {self.account_name} must " @@ -305,7 +307,7 @@ def __post_init__(self): ) def container(self): - """Return initialized ContainerClient""" + """Return initialized ContainerClient.""" try: cc: ContainerClient = self.provider.bsc.get_container_client( self.container_name @@ -318,7 +320,7 @@ def container(self): return cc def blob(self): - """Return initialized BlobClient""" + """Return initialized BlobClient.""" try: bc: BlobClient = self.provider.bsc.get_container_client( self.container_name @@ -383,12 +385,12 @@ def exists(self) -> bool: @retry_decorator def mtime(self) -> float: - """Returns the modification time""" + """Returns the modification time.""" return self.blob().get_blob_properties().last_modified.timestamp() @retry_decorator def size(self) -> int: - """Returns the size in bytes""" + """Returns the size in bytes.""" return self.blob().get_blob_properties().size @retry_decorator From 3bc8b79f55ab4f75607f2f61292f88a55426caa1 Mon Sep 17 00:00:00 2001 From: jakevc Date: Fri, 17 Nov 2023 09:50:52 -0800 Subject: [PATCH 29/34] update readme --- README.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 46f48f3..2538fbf 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,19 @@ # Snakemake Storage Plugin Az -Azure Storage plugin for snakemake. +Azure Blob Storage plugin for snakemake. This plugin is in draft form ! This is scaffolded from snakemake/poetry-snakemake-plugin. + +# Testing + +Testing this plugin locally require the azurite storage emulator to be running locally. +This can be setup using the following docker run command: + +``` +docker run -p 10000:10000 mcr.microsoft.com/azure-storage/azurite azurite-blob --blobHost 0.0.0.0 +``` + +Then execute the tests: +``` +poetry run coverage run -m pytest tests/tests.py +``` From f7a8e16b80de4e71d4706ccaad9597a178bbf826 Mon Sep 17 00:00:00 2001 From: jakevc Date: Mon, 20 Nov 2023 11:54:45 -0800 Subject: [PATCH 30/34] use Mtime object for setting mtime in inventory --- snakemake_storage_plugin_az/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 1932a96..1e9e9c5 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -8,7 +8,7 @@ from azure.core.exceptions import HttpResponseError from azure.storage.blob import BlobClient, BlobServiceClient, ContainerClient from snakemake_interface_storage_plugins.common import Operation -from snakemake_interface_storage_plugins.io import IOCacheStorageInterface +from snakemake_interface_storage_plugins.io import IOCacheStorageInterface, Mtime from snakemake_interface_storage_plugins.settings import StorageProviderSettingsBase from snakemake_interface_storage_plugins.storage_object import ( StorageObjectGlob, @@ -352,7 +352,7 @@ async def inventory(self, cache: IOCacheStorageInterface): cache.exists_in_storage[self.cache_key] = True for o in self.container().list_blobs(): key = self.cache_key(self._local_suffix_from_key(o.name)) - cache.mtime[key] = o.last_modified.timestamp() + cache.mtime[key] = Mtime(storage=o.last_modified.timestamp()) cache.size[key] = o.size cache.exists_in_storage[key] = True From 36041d64c7d169cf3fd41a9fa757a2d4b9d40be6 Mon Sep 17 00:00:00 2001 From: jakevc Date: Mon, 20 Nov 2023 12:02:46 -0800 Subject: [PATCH 31/34] IOCacheStorageInterface metho is "exists_remote" --- snakemake_storage_plugin_az/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index 1e9e9c5..b7e53ba 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -347,14 +347,14 @@ async def inventory(self, cache: IOCacheStorageInterface): # bucket exists if not self.container_exists(): - cache.exists_in_storage[self.cache_key] = False + cache.exists_remote[self.cache_key] = False else: - cache.exists_in_storage[self.cache_key] = True + cache.exists_remote[self.cache_key] = True for o in self.container().list_blobs(): key = self.cache_key(self._local_suffix_from_key(o.name)) cache.mtime[key] = Mtime(storage=o.last_modified.timestamp()) cache.size[key] = o.size - cache.exists_in_storage[key] = True + cache.exists_remote[key] = True def get_inventory_parent(self) -> Optional[str]: """Return the parent directory of this object.""" From 3b35738add1b6ded86a166e75a015211a8ff9aaf Mon Sep 17 00:00:00 2001 From: jakevc Date: Fri, 24 Nov 2023 14:07:00 -0800 Subject: [PATCH 32/34] bump storage plugins interface --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7cd8b65..07d41e5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1401,13 +1401,13 @@ ConfigArgParse = ">=1.7,<2.0" [[package]] name = "snakemake-interface-storage-plugins" -version = "2.0.1" +version = "2.1.0" description = "This package provides a stable interface for interactions between Snakemake and its storage plugins." optional = false python-versions = ">=3.11,<4.0" files = [ - {file = "snakemake_interface_storage_plugins-2.0.1-py3-none-any.whl", hash = "sha256:52cc8dea37fe6a2ba5f07b0ce96b370eb8e6a48e295fbcdc27eee0fc5212409b"}, - {file = "snakemake_interface_storage_plugins-2.0.1.tar.gz", hash = "sha256:cd6c4b3018dabf672e9779799fefbf2fda38c96e701dd53004e1cafa847c0ba6"}, + {file = "snakemake_interface_storage_plugins-2.1.0-py3-none-any.whl", hash = "sha256:5a35cf2b79be588594e97b13615a765f9417c12f65050cb99188ba05ba16f9fe"}, + {file = "snakemake_interface_storage_plugins-2.1.0.tar.gz", hash = "sha256:78f99aff951843b7f98f8f7ce48c9402eb0e14fdda587b93d471a8127e2949b1"}, ] [package.dependencies] @@ -1611,4 +1611,4 @@ pyyaml = ">=6.0,<7.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "dbb9d1615acbd416b520667f5dd42f112e0daf7ebbd9bd0305ba311e2059858a" +content-hash = "e5e16f7684ab5cb3cd44bd7cc509cc3c48710ebfdb6ee2b5b832acc8b83dc439" diff --git a/pyproject.toml b/pyproject.toml index e6e0802..35cd62a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.11" snakemake-interface-common = "1.14.3" -snakemake-interface-storage-plugins = "2.0.1" +snakemake-interface-storage-plugins = "^2.1.0" azure-storage-blob = "^12.19.0" azure-core = "^1.29.5" azure-identity = "^1.15.0" From 86058738f591990d4e2e6689f43a94d49fe84f1c Mon Sep 17 00:00:00 2001 From: jakevc Date: Fri, 24 Nov 2023 14:31:08 -0800 Subject: [PATCH 33/34] no need quotes --- .github/workflows/ci.yml | 16 ++++++++-------- .github/workflows/release-please.yml | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 000356b..a569d4e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "${{ env.PYTHON_VERSION }}" + python-version: ${{ env.PYTHON_VERSION }} - name: Install poetry run: pipx install poetry @@ -28,7 +28,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "${{ env.PYTHON_VERSION }}" + python-version: ${{ env.PYTHON_VERSION }} cache: poetry - name: Install Dependencies using Poetry @@ -45,17 +45,17 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "${{ env.PYTHON_VERSION }}" + python-version: ${{ env.PYTHON_VERSION }} - name: Install poetry - run: pipx install poetry + run: pip install poetry - name: Determine dependencies run: poetry lock - uses: actions/setup-python@v4 with: - python-version: "${{ env.PYTHON_VERSION }}" + python-version: ${{ env.PYTHON_VERSION }} cache: poetry - name: Install Dependencies using Poetry @@ -80,17 +80,17 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "${{ env.PYTHON_VERSION }}" + python-version: ${{ env.PYTHON_VERSION }} - name: Install poetry - run: pipx install poetry + run: pip install poetry - name: Determine dependencies run: poetry lock - uses: actions/setup-python@v4 with: - python-version: "${{ env.PYTHON_VERSION }}" + python-version: ${{ env.PYTHON_VERSION }} cache: poetry - name: Install dependencies diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index 3074df7..58fa224 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -29,7 +29,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "${{ env.PYTHON_VERSION }}" + python-version: ${{ env.PYTHON_VERSION }} - name: Install poetry run: pipx install poetry @@ -39,7 +39,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "${{ env.PYTHON_VERSION }}" + python-version: ${{ env.PYTHON_VERSION }} cache: poetry - name: Install Dependencies using Poetry From 5e2b176b605b257212c54594641c875f262e17a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Sun, 26 Nov 2023 15:27:42 +0100 Subject: [PATCH 34/34] Update snakemake_storage_plugin_az/__init__.py --- snakemake_storage_plugin_az/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/snakemake_storage_plugin_az/__init__.py b/snakemake_storage_plugin_az/__init__.py index b7e53ba..946fe1f 100644 --- a/snakemake_storage_plugin_az/__init__.py +++ b/snakemake_storage_plugin_az/__init__.py @@ -347,9 +347,9 @@ async def inventory(self, cache: IOCacheStorageInterface): # bucket exists if not self.container_exists(): - cache.exists_remote[self.cache_key] = False + cache.exists_in_storage[self.cache_key] = False else: - cache.exists_remote[self.cache_key] = True + cache.exists_in_storage[self.cache_key] = True for o in self.container().list_blobs(): key = self.cache_key(self._local_suffix_from_key(o.name)) cache.mtime[key] = Mtime(storage=o.last_modified.timestamp())