diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b721070..f3d362f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,6 +25,12 @@ jobs: registry: private.docker.xenit.eu username: ${{ secrets.CLOUDSMITH_USER }} password: ${{ secrets.CLOUDSMITH_APIKEY }} + - name: Build docker image + uses: gradle/gradle-build-action@v2.3.3 + env: + BRANCH_NAME: ${{ github.ref_name }} + with: + arguments: buildDockerImage - name: Publish docker image if: ${{ startsWith(github.ref, 'refs/heads/master') }} uses: gradle/gradle-build-action@v2.3.3 diff --git a/build.gradle b/build.gradle index 7009ef7..365be54 100644 --- a/build.gradle +++ b/build.gradle @@ -6,7 +6,7 @@ plugins { ext { base_img = 'open-source.docker.xenit.eu/oracle-python' base_img_version = 'main-2.1.1' - img_version = '2.0.0' + img_version = '2.1.0' } createDockerFile { diff --git a/src/main/docker/docker_root/swarmclean.py b/src/main/docker/docker_root/swarmclean.py index 92399c0..0482add 100755 --- a/src/main/docker/docker_root/swarmclean.py +++ b/src/main/docker/docker_root/swarmclean.py @@ -13,6 +13,7 @@ import records from dataclasses import dataclass, asdict import humanfriendly +import binascii sys.path.insert(0,sys.path[0]+'/castorsdk') import scspHeaders @@ -351,7 +352,7 @@ def list_bucket_contents_filtered(self, filter_function, max_batch_size): objects = response.json() if not objects: - return object_list + return { 'list': object_list, 'size': batch_size } for object in objects: swarm_object = SwarmObject(**object) @@ -456,7 +457,16 @@ def __init__(self, args): def isDeletionCandidate(self, swarm_object): if args.filter_method == 'alfresco_db': - result = self.alfresco_db.query_single_value("select count(*) from alf_content_url where content_url like :object_name", {'object_name': f"%/{swarm_object.name}"}) == 0 + content_url_short = swarm_object.name[-12:] + content_url_crc = binascii.crc32(bytes(f"swarm://{self.args.swarm_domain}/{swarm_object.name}", 'ascii')) + # table has an index on content_url_short + content_url_crc + result = self.alfresco_db.query_single_value( + "select count(*) from alf_content_url where content_url_short = :content_url_short and content_url_crc = :content_url_crc", + { + 'content_url_short': content_url_short, + 'content_url_crc': content_url_crc + } + ) == 0 elif args.filter_method == 'regex': result = self.filterRegex.match(swarm_object.name) logging.trace(f"filter { swarm_object.name }: { bool(result) } - size { humanfriendly.format_size(swarm_object.bytes, binary=True) }")