Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support uploading translations artifacts #1141

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions addonscript/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1007,9 +1007,9 @@ rsa==4.9 \
--hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \
--hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21
# via python-jose
scriptworker==60.7.0 \
--hash=sha256:9ebbae8c81ad27fa62d5c8f0b8d9b8e8792fee2d2f6b3fb9d711953327c70e4e \
--hash=sha256:a3286420970afbee519dd61c04dab83973483163c987666f239276aa6e823e0c
scriptworker==60.7.1 \
--hash=sha256:a5c0e6e87ecc4df9ca1eccd7bb246e3f892b33ac2852d02825e7de417d24d9d1 \
--hash=sha256:ed2dcad392744654ec36d210ebc943f40d219d457769ac3b6123775309a4e037
# via -r requirements/base.in
six==1.17.0 \
--hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \
Expand All @@ -1025,9 +1025,9 @@ slugid==2.0.0 \
# via
# taskcluster
# taskcluster-taskgraph
taskcluster==82.0.0 \
--hash=sha256:6b31d731aead974c980fd85dd7367167cf0b34e1ab32a3ad3cf4897544f3ed03 \
--hash=sha256:adb73f4a92b06442208ab80f2e768afb601188bbfbf6e895b68226770fbde459
taskcluster==82.0.1 \
--hash=sha256:399096c79ded948048e215a50a4e02937aaee8ef1c9a655a2c2b49a4cea452b3 \
--hash=sha256:f5d09cba026ee0f902aa04e662a9c6079ddb78f999661af9b956c607d12033a2
# via
# -r requirements/base.in
# scriptworker
Expand Down
12 changes: 6 additions & 6 deletions balrogscript/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1022,9 +1022,9 @@ rpds-py==0.23.1 \
# via
# jsonschema
# referencing
scriptworker==60.7.0 \
--hash=sha256:9ebbae8c81ad27fa62d5c8f0b8d9b8e8792fee2d2f6b3fb9d711953327c70e4e \
--hash=sha256:a3286420970afbee519dd61c04dab83973483163c987666f239276aa6e823e0c
scriptworker==60.7.1 \
--hash=sha256:a5c0e6e87ecc4df9ca1eccd7bb246e3f892b33ac2852d02825e7de417d24d9d1 \
--hash=sha256:ed2dcad392744654ec36d210ebc943f40d219d457769ac3b6123775309a4e037
# via -r requirements/base.in
six==1.17.0 \
--hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \
Expand All @@ -1040,9 +1040,9 @@ slugid==2.0.0 \
# via
# taskcluster
# taskcluster-taskgraph
taskcluster==82.0.0 \
--hash=sha256:6b31d731aead974c980fd85dd7367167cf0b34e1ab32a3ad3cf4897544f3ed03 \
--hash=sha256:adb73f4a92b06442208ab80f2e768afb601188bbfbf6e895b68226770fbde459
taskcluster==82.0.1 \
--hash=sha256:399096c79ded948048e215a50a4e02937aaee8ef1c9a655a2c2b49a4cea452b3 \
--hash=sha256:f5d09cba026ee0f902aa04e662a9c6079ddb78f999661af9b956c607d12033a2
# via
# -r requirements/base.in
# scriptworker
Expand Down
24 changes: 12 additions & 12 deletions beetmoverscript/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,13 @@ binaryornot==0.4.4 \
--hash=sha256:359501dfc9d40632edc9fac890e19542db1a287bbcfa58175b66658392018061 \
--hash=sha256:b8b71173c917bddcd2c16070412e369c3ed7f0528926f70cac18a6c97fd563e4
# via cookiecutter
boto3==1.37.0 \
--hash=sha256:01015b38017876d79efd7273f35d9a4adfba505237159621365bed21b9b65eca \
--hash=sha256:03bd8c93b226f07d944fd6b022e11a307bff94ab6a21d51675d7e3ea81ee8424
boto3==1.37.1 \
--hash=sha256:4320441f904435a1b85e6ecb81793192e522c737cc9ed6566014e29f0a11cb22 \
--hash=sha256:96d18f7feb0c1fcb95f8837b74b6c8880e1b4e35ce5f8a8f8cb243a090c278ed
# via -r requirements/base.in
botocore==1.37.0 \
--hash=sha256:b129d091a8360b4152ab65327186bf4e250de827c4a9b7ddf40a72b1acf1f3c1 \
--hash=sha256:d01661f38c0edac87424344cdf4169f3ab9bc1bf1b677c8b230d025eb66c54a3
botocore==1.37.1 \
--hash=sha256:b194db8fb2a0ffba53568c364ae26166e7eec0445496b2ac86a6e142f3dd982f \
--hash=sha256:c1db1bfc5d8c6b3b6d1ca6794f605294b4264e82a7e727b88e0fef9c2b9fbb9c
# via
# boto3
# s3transfer
Expand Down Expand Up @@ -1196,9 +1196,9 @@ s3transfer==0.11.2 \
--hash=sha256:3b39185cb72f5acc77db1a58b6e25b977f28d20496b6e58d6813d75f464d632f \
--hash=sha256:be6ecb39fadd986ef1701097771f87e4d2f821f27f6071c872143884d2950fbc
# via boto3
scriptworker==60.7.0 \
--hash=sha256:9ebbae8c81ad27fa62d5c8f0b8d9b8e8792fee2d2f6b3fb9d711953327c70e4e \
--hash=sha256:a3286420970afbee519dd61c04dab83973483163c987666f239276aa6e823e0c
scriptworker==60.7.1 \
--hash=sha256:a5c0e6e87ecc4df9ca1eccd7bb246e3f892b33ac2852d02825e7de417d24d9d1 \
--hash=sha256:ed2dcad392744654ec36d210ebc943f40d219d457769ac3b6123775309a4e037
# via -r requirements/base.in
six==1.17.0 \
--hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \
Expand All @@ -1213,9 +1213,9 @@ slugid==2.0.0 \
# via
# taskcluster
# taskcluster-taskgraph
taskcluster==82.0.0 \
--hash=sha256:6b31d731aead974c980fd85dd7367167cf0b34e1ab32a3ad3cf4897544f3ed03 \
--hash=sha256:adb73f4a92b06442208ab80f2e768afb601188bbfbf6e895b68226770fbde459
taskcluster==82.0.1 \
--hash=sha256:399096c79ded948048e215a50a4e02937aaee8ef1c9a655a2c2b49a4cea452b3 \
--hash=sha256:f5d09cba026ee0f902aa04e662a9c6079ddb78f999661af9b956c607d12033a2
# via
# -r requirements/base.in
# scriptworker
Expand Down
2 changes: 2 additions & 0 deletions beetmoverscript/src/beetmoverscript/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@

ARTIFACT_REGISTRY_ACTIONS = ("import-from-gcs-to-artifact-registry",)

TRANSLATIONS_ACTIONS = ("upload-translations-artifacts",)

# XXX this is a fairly clunky way of specifying which files to copy from
# candidates to releases -- let's find a nicer way of doing this.
# XXX if we keep this, let's make it configurable? overridable in config?
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
{
"title": "Taskcluster beetmover upload translations artifacts task schema",
"type": "object",
"properties": {
"dependencies": {
"type": "array",
"minItems": 1,
"uniqueItems": true,
"items": {
"type": "string"
}
},
"payload": {
"type": "object",
"properties": {
"dryrun": {
"type": "boolean"
},
"releaseProperties": {
"type": "object",
"properties": {
"appName": {
"type": "string"
}
},
"required": [
"appName"
]
},
"upstreamArtifacts": {
"type": "array",
"items": {
"type": "object",
"properties": {
"taskType": {
"type": "string"
},
"taskId": {
"type": "string"
},
"paths": {
"type": "array",
"minItems": 1,
"uniqueItems": true,
"items": {
"type": "string"
}
}
},
"required": ["taskId", "taskType", "paths"]
},
"minItems": 1,
"uniqueItems": true
},
"artifactMap": {
"type": "array",
"items": {
"type": "object",
"properties": {
"taskId": {
"type": "string"
},
"paths": {
"type": "object",
"minItems": 1,
"uniqueItems": true,
"properties": {
"destinations": {
"type": "array",
"minItems": 1,
"items": {
"type": "string"
}
}
}
}
},
"required": [
"taskId",
"paths"
]
},
"minItems": 1,
"uniqueItems": true
},
"maxRunTime": {
"type": "number"
}
},
"required": ["dryrun", "releaseProperties", "upstreamArtifacts", "artifactMap"]
}
},
"required": ["payload", "dependencies"]
}
4 changes: 2 additions & 2 deletions beetmoverscript/src/beetmoverscript/gcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ def setup_gcs_credentials(raw_creds):
async def upload_to_gcs(context, target_path, path, expiry=None):
product = get_product_name(context.task, context.config)
mime_type = mimetypes.guess_type(path)[0]
if not mime_type:
raise ScriptWorkerTaskException("Unable to discover valid mime-type for path ({}), mimetypes.guess_type() returned {}".format(path, mime_type))
# if not mime_type:
# raise ScriptWorkerTaskException("Unable to discover valid mime-type for path ({}), mimetypes.guess_type() returned {}".format(path, mime_type))
bucket_name = get_bucket_name(context, product, "gcloud")

bucket = Bucket(context.gcs_client, name=bucket_name)
Expand Down
143 changes: 143 additions & 0 deletions beetmoverscript/src/beetmoverscript/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,22 @@
"""Beetmover script"""

import asyncio
import copy
import fnmatch
import logging
import mimetypes
import os
import os.path
import re
import sys
from collections import defaultdict
from multiprocessing.pool import ThreadPool

import aiohttp
import boto3
from botocore.exceptions import ClientError
from redo import retry
from scriptworker import artifacts as scriptworker_artifacts
from scriptworker import client
from scriptworker.exceptions import (
ScriptWorkerRetryException,
Expand Down Expand Up @@ -273,6 +278,142 @@ async def push_to_maven(context):
)


# TODO: maybe call upstreamArtifactPaths something different?
def get_concrete_artifact_map_from_globbed(upstreamArtifactPaths, artifactMap, strip_prefixes=["public/build", "public/logs"]):
# Sanity check inputs. Each file in upstreamArtifactPaths should match either:
# - One non "*" glob in artifactMap
# - A non "*" glob and "*" (in which case the former takes precedence)
# - "*" only
# TODO: maybe move this sanity check out elsewhere?
# Additionally, each destination should only have one matching artifact
# (ie: nothing should be overridden)

# upstreamArtifactPaths here is in the form of:
# {
# "taskId1": [
# "/path/to/file",
# "/path/to/file2",
# ],
# "taskId2": [
# "/path/to/file",
# "/path/to/file2",
# ],
# }
# artifactMap is in the form of:
# [
# {
# "taskId": "taskId1",
# "paths": {
# "*": {
# "destinations": [
# "dest1",
# ]
# }
# },
# }
# ]

concreteArtifactMap = []
errors = []

for map_ in artifactMap:
concretePaths = {}

full_glob_destinations = map_["paths"].get("*", {}).get("destinations")
other_paths = copy.deepcopy(map_["paths"])
if "*" in other_paths:
del other_paths["*"]

for taskId, artifacts in upstreamArtifactPaths.items():
if map_["taskId"] != taskId:
continue

for artifact in artifacts:
retained_artifact_path = artifact
for sp in strip_prefixes:
if sp in artifact:
retained_artifact_path = retained_artifact_path[retained_artifact_path.find(sp) + len(sp) :]
destinations = []
# We need to look at non-'*' paths separate from '*' paths.

for input_path, output in other_paths.items():
# Skip any input paths that don't match the artifact name.
if "*" in input_path:
if not fnmatch.fnmatch(retained_artifact_path, input_path):
continue
else:
if input_path != artifact:
continue

# If there's already destinations, we've already seen this artifact,
# and we have a clash.
if destinations:
errors.append(f"'{artifact}' matched multiple concrete paths")
else:
destinations.extend(output["destinations"])

# If we have destinations for the "*" glob any artifacts that
# aren't accounted for by any of the `other_paths` will go to
# the "*" destinations.
if full_glob_destinations and not destinations:
destinations.extend(full_glob_destinations)

if destinations:
concretePaths[artifact] = {"destinations": []}
for d in destinations:
if not d.endswith(retained_artifact_path):
d = os.path.normpath(f"{d}{retained_artifact_path}")
concretePaths[artifact]["destinations"].append(d)

concreteArtifactMap.append(
{
"paths": concretePaths,
"taskId": map_["taskId"],
}
)

if errors:
raise ScriptWorkerTaskException(*errors)

return concreteArtifactMap


def ensure_no_overwrites_in_artifact_map(artifactMap):
dest_counts = defaultdict(int)
for map_ in artifactMap:
for output_path in map_["paths"].values():
for dest in output_path["destinations"]:
dest_counts[dest] += 1

errors = []
for dest, count in dest_counts.items():
if count > 1:
errors.append(f"'{dest}' would be written to more than once")

if errors:
raise ScriptWorkerTaskException(*errors)

return False


async def upload_translations_artifacts(context):
dryrun = context.task["payload"]["dryrun"]
artifactMap = context.task["payload"]["artifactMap"]

# Ignore any failed artifacts; we'll take whatever we can get. All artifacts are considered optional.
# TODO: cal lensure_no_overwrites somewhere. here or in get_concrete ?
upstreamArtifactPaths = scriptworker_artifacts.get_upstream_artifacts_full_paths_per_task_id(context)[0]
concreteArtifactMap = get_concrete_artifact_map_from_globbed(upstreamArtifactPaths, artifactMap)

for map_ in concreteArtifactMap:
for input_path, outputs in map_["paths"].items():
if dryrun:
log.info(f"Would've uploaded {input_path} to {outputs['destinations']}")
else:
log.info(f"Uploading {input_path} to {outputs['destinations']}")
await retry_upload(context, outputs["destinations"], input_path)


# copy_beets {{{1
def copy_beets(context, from_keys_checksums, to_keys_checksums):
creds = get_credentials(context, "aws")
Expand Down Expand Up @@ -334,6 +475,7 @@ def list_bucket_objects(context, s3_resource, prefix):
"direct-push-to-bucket": direct_push_to_bucket,
"push-to-maven": push_to_maven,
"import-from-gcs-to-artifact-registry": import_from_gcs_to_artifact_registry,
"upload-translations-artifacts": upload_translations_artifacts,
}


Expand Down Expand Up @@ -731,6 +873,7 @@ def main(config_path=None):
"maven_schema_file": os.path.join(data_dir, "maven_beetmover_task_schema.json"),
"artifactMap_schema_file": os.path.join(data_dir, "artifactMap_beetmover_task_schema.json"),
"import_from_gcs_to_artifact_registry_schema_file": os.path.join(data_dir, "import_from_gcs_to_artifact_registry_task_schema.json"),
"upload_translations_artifacts": os.path.join(data_dir, "upload_translations_artifacts_task_schema.json"),
}

# There are several task schema. Validation occurs in async_main
Expand Down
Loading