Skip to content

Commit

Permalink
Merge pull request #4 from xenit-eu/MNTSUP-165
Browse files Browse the repository at this point in the history
MNTSUP-165 - speed up content_url query
  • Loading branch information
wimfabri authored Oct 19, 2023
2 parents 53e1a78 + 5b4e720 commit 21cd008
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 3 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ jobs:
registry: private.docker.xenit.eu
username: ${{ secrets.CLOUDSMITH_USER }}
password: ${{ secrets.CLOUDSMITH_APIKEY }}
- name: Build docker image
uses: gradle/[email protected]
env:
BRANCH_NAME: ${{ github.ref_name }}
with:
arguments: buildDockerImage
- name: Publish docker image
if: ${{ startsWith(github.ref, 'refs/heads/master') }}
uses: gradle/[email protected]
Expand Down
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ plugins {
ext {
base_img = 'open-source.docker.xenit.eu/oracle-python'
base_img_version = 'main-2.1.1'
img_version = '2.0.0'
img_version = '2.1.0'
}

createDockerFile {
Expand Down
14 changes: 12 additions & 2 deletions src/main/docker/docker_root/swarmclean.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import records
from dataclasses import dataclass, asdict
import humanfriendly
import binascii

sys.path.insert(0,sys.path[0]+'/castorsdk')
import scspHeaders
Expand Down Expand Up @@ -351,7 +352,7 @@ def list_bucket_contents_filtered(self, filter_function, max_batch_size):
objects = response.json()

if not objects:
return object_list
return { 'list': object_list, 'size': batch_size }

for object in objects:
swarm_object = SwarmObject(**object)
Expand Down Expand Up @@ -456,7 +457,16 @@ def __init__(self, args):

def isDeletionCandidate(self, swarm_object):
if args.filter_method == 'alfresco_db':
result = self.alfresco_db.query_single_value("select count(*) from alf_content_url where content_url like :object_name", {'object_name': f"%/{swarm_object.name}"}) == 0
content_url_short = swarm_object.name[-12:]
content_url_crc = binascii.crc32(bytes(f"swarm://{self.args.swarm_domain}/{swarm_object.name}", 'ascii'))
# table has an index on content_url_short + content_url_crc
result = self.alfresco_db.query_single_value(
"select count(*) from alf_content_url where content_url_short = :content_url_short and content_url_crc = :content_url_crc",
{
'content_url_short': content_url_short,
'content_url_crc': content_url_crc
}
) == 0
elif args.filter_method == 'regex':
result = self.filterRegex.match(swarm_object.name)
logging.trace(f"filter { swarm_object.name }: { bool(result) } - size { humanfriendly.format_size(swarm_object.bytes, binary=True) }")
Expand Down

0 comments on commit 21cd008

Please sign in to comment.