mozilla-releng · bhearsum · Jun 5, 2024 · Jun 5, 2024 · Jan 8, 2025 · Feb 27, 2025
diff --git a/addonscript/requirements/base.txt b/addonscript/requirements/base.txt
@@ -1007,9 +1007,9 @@ rsa==4.9 \
     --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \
     --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21
     # via python-jose
-scriptworker==60.7.0 \
-    --hash=sha256:9ebbae8c81ad27fa62d5c8f0b8d9b8e8792fee2d2f6b3fb9d711953327c70e4e \
-    --hash=sha256:a3286420970afbee519dd61c04dab83973483163c987666f239276aa6e823e0c
+scriptworker==60.7.1 \
+    --hash=sha256:a5c0e6e87ecc4df9ca1eccd7bb246e3f892b33ac2852d02825e7de417d24d9d1 \
+    --hash=sha256:ed2dcad392744654ec36d210ebc943f40d219d457769ac3b6123775309a4e037
     # via -r requirements/base.in
 six==1.17.0 \
     --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \
@@ -1025,9 +1025,9 @@ slugid==2.0.0 \
     # via
     #   taskcluster
     #   taskcluster-taskgraph
-taskcluster==82.0.0 \
-    --hash=sha256:6b31d731aead974c980fd85dd7367167cf0b34e1ab32a3ad3cf4897544f3ed03 \
-    --hash=sha256:adb73f4a92b06442208ab80f2e768afb601188bbfbf6e895b68226770fbde459
+taskcluster==82.0.1 \
+    --hash=sha256:399096c79ded948048e215a50a4e02937aaee8ef1c9a655a2c2b49a4cea452b3 \
+    --hash=sha256:f5d09cba026ee0f902aa04e662a9c6079ddb78f999661af9b956c607d12033a2
     # via
     #   -r requirements/base.in
     #   scriptworker

diff --git a/balrogscript/requirements/base.txt b/balrogscript/requirements/base.txt
@@ -1022,9 +1022,9 @@ rpds-py==0.23.1 \
     # via
     #   jsonschema
     #   referencing
-scriptworker==60.7.0 \
-    --hash=sha256:9ebbae8c81ad27fa62d5c8f0b8d9b8e8792fee2d2f6b3fb9d711953327c70e4e \
-    --hash=sha256:a3286420970afbee519dd61c04dab83973483163c987666f239276aa6e823e0c
+scriptworker==60.7.1 \
+    --hash=sha256:a5c0e6e87ecc4df9ca1eccd7bb246e3f892b33ac2852d02825e7de417d24d9d1 \
+    --hash=sha256:ed2dcad392744654ec36d210ebc943f40d219d457769ac3b6123775309a4e037
     # via -r requirements/base.in
 six==1.17.0 \
     --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \
@@ -1040,9 +1040,9 @@ slugid==2.0.0 \
     # via
     #   taskcluster
     #   taskcluster-taskgraph
-taskcluster==82.0.0 \
-    --hash=sha256:6b31d731aead974c980fd85dd7367167cf0b34e1ab32a3ad3cf4897544f3ed03 \
-    --hash=sha256:adb73f4a92b06442208ab80f2e768afb601188bbfbf6e895b68226770fbde459
+taskcluster==82.0.1 \
+    --hash=sha256:399096c79ded948048e215a50a4e02937aaee8ef1c9a655a2c2b49a4cea452b3 \
+    --hash=sha256:f5d09cba026ee0f902aa04e662a9c6079ddb78f999661af9b956c607d12033a2
     # via
     #   -r requirements/base.in
     #   scriptworker

diff --git a/beetmoverscript/requirements/base.txt b/beetmoverscript/requirements/base.txt
@@ -130,13 +130,13 @@ binaryornot==0.4.4 \
     --hash=sha256:359501dfc9d40632edc9fac890e19542db1a287bbcfa58175b66658392018061 \
     --hash=sha256:b8b71173c917bddcd2c16070412e369c3ed7f0528926f70cac18a6c97fd563e4
     # via cookiecutter
-boto3==1.37.0 \
-    --hash=sha256:01015b38017876d79efd7273f35d9a4adfba505237159621365bed21b9b65eca \
-    --hash=sha256:03bd8c93b226f07d944fd6b022e11a307bff94ab6a21d51675d7e3ea81ee8424
+boto3==1.37.1 \
+    --hash=sha256:4320441f904435a1b85e6ecb81793192e522c737cc9ed6566014e29f0a11cb22 \
+    --hash=sha256:96d18f7feb0c1fcb95f8837b74b6c8880e1b4e35ce5f8a8f8cb243a090c278ed
     # via -r requirements/base.in
-botocore==1.37.0 \
-    --hash=sha256:b129d091a8360b4152ab65327186bf4e250de827c4a9b7ddf40a72b1acf1f3c1 \
-    --hash=sha256:d01661f38c0edac87424344cdf4169f3ab9bc1bf1b677c8b230d025eb66c54a3
+botocore==1.37.1 \
+    --hash=sha256:b194db8fb2a0ffba53568c364ae26166e7eec0445496b2ac86a6e142f3dd982f \
+    --hash=sha256:c1db1bfc5d8c6b3b6d1ca6794f605294b4264e82a7e727b88e0fef9c2b9fbb9c
     # via
     #   boto3
     #   s3transfer
@@ -1196,9 +1196,9 @@ s3transfer==0.11.2 \
     --hash=sha256:3b39185cb72f5acc77db1a58b6e25b977f28d20496b6e58d6813d75f464d632f \
     --hash=sha256:be6ecb39fadd986ef1701097771f87e4d2f821f27f6071c872143884d2950fbc
     # via boto3
-scriptworker==60.7.0 \
-    --hash=sha256:9ebbae8c81ad27fa62d5c8f0b8d9b8e8792fee2d2f6b3fb9d711953327c70e4e \
-    --hash=sha256:a3286420970afbee519dd61c04dab83973483163c987666f239276aa6e823e0c
+scriptworker==60.7.1 \
+    --hash=sha256:a5c0e6e87ecc4df9ca1eccd7bb246e3f892b33ac2852d02825e7de417d24d9d1 \
+    --hash=sha256:ed2dcad392744654ec36d210ebc943f40d219d457769ac3b6123775309a4e037
     # via -r requirements/base.in
 six==1.17.0 \
     --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \
@@ -1213,9 +1213,9 @@ slugid==2.0.0 \
     # via
     #   taskcluster
     #   taskcluster-taskgraph
-taskcluster==82.0.0 \
-    --hash=sha256:6b31d731aead974c980fd85dd7367167cf0b34e1ab32a3ad3cf4897544f3ed03 \
-    --hash=sha256:adb73f4a92b06442208ab80f2e768afb601188bbfbf6e895b68226770fbde459
+taskcluster==82.0.1 \
+    --hash=sha256:399096c79ded948048e215a50a4e02937aaee8ef1c9a655a2c2b49a4cea452b3 \
+    --hash=sha256:f5d09cba026ee0f902aa04e662a9c6079ddb78f999661af9b956c607d12033a2
     # via
     #   -r requirements/base.in
     #   scriptworker

diff --git a/beetmoverscript/src/beetmoverscript/constants.py b/beetmoverscript/src/beetmoverscript/constants.py
@@ -87,6 +87,8 @@
 
 ARTIFACT_REGISTRY_ACTIONS = ("import-from-gcs-to-artifact-registry",)
 
+TRANSLATIONS_ACTIONS = ("upload-translations-artifacts",)
+
 # XXX this is a fairly clunky way of specifying which files to copy from
 # candidates to releases -- let's find a nicer way of doing this.
 # XXX if we keep this, let's make it configurable? overridable in config?

diff --git a/beetmoverscript/src/beetmoverscript/data/upload_translations_artifacts_task_schema.json b/beetmoverscript/src/beetmoverscript/data/upload_translations_artifacts_task_schema.json
@@ -0,0 +1,94 @@
+{
+  "title": "Taskcluster beetmover upload translations artifacts task schema",
+  "type": "object",
+  "properties": {
+    "dependencies": {
+      "type": "array",
+      "minItems": 1,
+      "uniqueItems": true,
+      "items": {
+        "type": "string"
+      }
+    },
+    "payload": {
+      "type": "object",
+      "properties": {
+        "dryrun": {
+          "type": "boolean"
+        },
+        "releaseProperties": {
+            "type": "object",
+            "properties": {
+                "appName": {
+                    "type": "string"
+                }
+            },
+            "required": [
+                "appName"
+            ]
+        },
+        "upstreamArtifacts": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "taskType": {
+                        "type": "string"
+                    },
+                    "taskId": {
+                        "type": "string"
+                    },
+                    "paths": {
+                        "type": "array",
+                        "minItems": 1,
+                        "uniqueItems": true,
+                        "items": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "required": ["taskId", "taskType", "paths"]
+            },
+            "minItems": 1,
+            "uniqueItems": true
+        },
+        "artifactMap": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "taskId": {
+                        "type": "string"
+                    },
+                    "paths": {
+                        "type": "object",
+                        "minItems": 1,
+                        "uniqueItems": true,
+                        "properties": {
+                            "destinations": {
+                                "type": "array",
+                                "minItems": 1,
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        }
+                    }
+                },
+                "required": [
+                    "taskId",
+                    "paths"
+                ]
+            },
+            "minItems": 1,
+            "uniqueItems": true
+        },
+        "maxRunTime": {
+          "type": "number"
+        }
+      },
+      "required": ["dryrun", "releaseProperties", "upstreamArtifacts", "artifactMap"]
+    }
+  },
+  "required": ["payload", "dependencies"]
+}
diff --git a/beetmoverscript/src/beetmoverscript/gcloud.py b/beetmoverscript/src/beetmoverscript/gcloud.py
@@ -105,8 +105,8 @@ def setup_gcs_credentials(raw_creds):
 async def upload_to_gcs(context, target_path, path, expiry=None):
     product = get_product_name(context.task, context.config)
     mime_type = mimetypes.guess_type(path)[0]
-    if not mime_type:
-        raise ScriptWorkerTaskException("Unable to discover valid mime-type for path ({}), mimetypes.guess_type() returned {}".format(path, mime_type))
+    # if not mime_type:
+    #     raise ScriptWorkerTaskException("Unable to discover valid mime-type for path ({}), mimetypes.guess_type() returned {}".format(path, mime_type))
     bucket_name = get_bucket_name(context, product, "gcloud")
 
     bucket = Bucket(context.gcs_client, name=bucket_name)

diff --git a/beetmoverscript/src/beetmoverscript/script.py b/beetmoverscript/src/beetmoverscript/script.py
@@ -2,17 +2,22 @@
 """Beetmover script"""
 
 import asyncio
+import copy
+import fnmatch
 import logging
 import mimetypes
 import os
+import os.path
 import re
 import sys
+from collections import defaultdict
 from multiprocessing.pool import ThreadPool
 
 import aiohttp
 import boto3
 from botocore.exceptions import ClientError
 from redo import retry
+from scriptworker import artifacts as scriptworker_artifacts
 from scriptworker import client
 from scriptworker.exceptions import (
     ScriptWorkerRetryException,
@@ -273,6 +278,142 @@ async def push_to_maven(context):
     )
 
 
+# TODO: maybe call upstreamArtifactPaths something different?
+def get_concrete_artifact_map_from_globbed(upstreamArtifactPaths, artifactMap, strip_prefixes=["public/build", "public/logs"]):
+    # Sanity check inputs. Each file in upstreamArtifactPaths should match either:
+    # - One non "*" glob in artifactMap
+    # - A non "*" glob and "*" (in which case the former takes precedence)
+    # - "*" only
+    # TODO: maybe move this sanity check out elsewhere?
+    # Additionally, each destination should only have one matching artifact
+    # (ie: nothing should be overridden)
+
+    # upstreamArtifactPaths here is in the form of:
+    # {
+    #   "taskId1": [
+    #     "/path/to/file",
+    #     "/path/to/file2",
+    #   ],
+    #   "taskId2": [
+    #     "/path/to/file",
+    #     "/path/to/file2",
+    #   ],
+    # }
+    # artifactMap is in the form of:
+    # [
+    #   {
+    #     "taskId": "taskId1",
+    #     "paths": {
+    #       "*": {
+    #         "destinations": [
+    #           "dest1",
+    #         ]
+    #       }
+    #     },
+    #   }
+    # ]
+
+    concreteArtifactMap = []
+    errors = []
+
+    for map_ in artifactMap:
+        concretePaths = {}
+
+        full_glob_destinations = map_["paths"].get("*", {}).get("destinations")
+        other_paths = copy.deepcopy(map_["paths"])
+        if "*" in other_paths:
+            del other_paths["*"]
+
+        for taskId, artifacts in upstreamArtifactPaths.items():
+            if map_["taskId"] != taskId:
+                continue
+
+            for artifact in artifacts:
+                retained_artifact_path = artifact
+                for sp in strip_prefixes:
+                    if sp in artifact:
+                        retained_artifact_path = retained_artifact_path[retained_artifact_path.find(sp) + len(sp) :]
+                destinations = []
+                # We need to look at non-'*' paths separate from '*' paths.
+
+                for input_path, output in other_paths.items():
+                    # Skip any input paths that don't match the artifact name.
+                    if "*" in input_path:
+                        if not fnmatch.fnmatch(retained_artifact_path, input_path):
+                            continue
+                    else:
+                        if input_path != artifact:
+                            continue
+
+                    # If there's already destinations, we've already seen this artifact,
+                    # and we have a clash.
+                    if destinations:
+                        errors.append(f"'{artifact}' matched multiple concrete paths")
+                    else:
+                        destinations.extend(output["destinations"])
+
+                # If we have destinations for the "*" glob any artifacts that
+                # aren't accounted for by any of the `other_paths` will go to
+                # the "*" destinations.
+                if full_glob_destinations and not destinations:
+                    destinations.extend(full_glob_destinations)
+
+                if destinations:
+                    concretePaths[artifact] = {"destinations": []}
+                    for d in destinations:
+                        if not d.endswith(retained_artifact_path):
+                            d = os.path.normpath(f"{d}{retained_artifact_path}")
+                        concretePaths[artifact]["destinations"].append(d)
+
+        concreteArtifactMap.append(
+            {
+                "paths": concretePaths,
+                "taskId": map_["taskId"],
+            }
+        )
+
+    if errors:
+        raise ScriptWorkerTaskException(*errors)
+
+    return concreteArtifactMap
+
+
+def ensure_no_overwrites_in_artifact_map(artifactMap):
+    dest_counts = defaultdict(int)
+    for map_ in artifactMap:
+        for output_path in map_["paths"].values():
+            for dest in output_path["destinations"]:
+                dest_counts[dest] += 1
+
+    errors = []
+    for dest, count in dest_counts.items():
+        if count > 1:
+            errors.append(f"'{dest}' would be written to more than once")
+
+    if errors:
+        raise ScriptWorkerTaskException(*errors)
+
+    return False
+
+
+async def upload_translations_artifacts(context):
+    dryrun = context.task["payload"]["dryrun"]
+    artifactMap = context.task["payload"]["artifactMap"]
+
+    # Ignore any failed artifacts; we'll take whatever we can get. All artifacts are considered optional.
+    # TODO: cal lensure_no_overwrites somewhere. here or in get_concrete ?
+    upstreamArtifactPaths = scriptworker_artifacts.get_upstream_artifacts_full_paths_per_task_id(context)[0]
+    concreteArtifactMap = get_concrete_artifact_map_from_globbed(upstreamArtifactPaths, artifactMap)
+
+    for map_ in concreteArtifactMap:
+        for input_path, outputs in map_["paths"].items():
+            if dryrun:
+                log.info(f"Would've uploaded {input_path} to {outputs['destinations']}")
+            else:
+                log.info(f"Uploading {input_path} to {outputs['destinations']}")
+                await retry_upload(context, outputs["destinations"], input_path)
+
+
 # copy_beets {{{1
 def copy_beets(context, from_keys_checksums, to_keys_checksums):
     creds = get_credentials(context, "aws")
@@ -334,6 +475,7 @@ def list_bucket_objects(context, s3_resource, prefix):
     "direct-push-to-bucket": direct_push_to_bucket,
     "push-to-maven": push_to_maven,
     "import-from-gcs-to-artifact-registry": import_from_gcs_to_artifact_registry,
+    "upload-translations-artifacts": upload_translations_artifacts,
 }
 
 
@@ -731,6 +873,7 @@ def main(config_path=None):
         "maven_schema_file": os.path.join(data_dir, "maven_beetmover_task_schema.json"),
         "artifactMap_schema_file": os.path.join(data_dir, "artifactMap_beetmover_task_schema.json"),
         "import_from_gcs_to_artifact_registry_schema_file": os.path.join(data_dir, "import_from_gcs_to_artifact_registry_task_schema.json"),
+        "upload_translations_artifacts": os.path.join(data_dir, "upload_translations_artifacts_task_schema.json"),
     }
 
     # There are several task schema. Validation occurs in async_main