Skip to content

Commit

Permalink
Added a Go application to wrap the rclone process
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewpatto committed Nov 28, 2023
1 parent 98724d6 commit c80cda5
Show file tree
Hide file tree
Showing 28 changed files with 3,802 additions and 723 deletions.
40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,46 @@ A service that can be installed into an Elsa Data environment
and which enables parallel file copying out into a
destination bucket in the same region.

NOTE: this is a general purpose "S3 file copy" tool - so might
be useful outside of Elsa Data. It can certainly be invoked
directly as a Steps function independent of Elsa Data (all
the Elsa Data does is sets up the input CSVs and then invokes
the Steps function itself).

## Development

On check-out (once only) (note that `pre-commit` is presumed installed externally)

```shell
pre-commit install
```

For package installation (note that `pnpm` is presumed installed externally)

```shell
pnpm install
```

Edit the packages and deploy to dev

```shell
(in the dev folder)
pnpm run deploy
```

## Testing

Because this service is very dependent on the behaviour of AWS Steps
(using distributed maps) - it was too complex to set up a "local" test
that would actually test much of the pieces likely to fail.

Instead, development is done and the CDK project is deployed to a "dev" account (noting
that this sets the minimum dev cadence for trying changes
to minutes rather than seconds).

There is then a test script - that creates samples objects - and launches
test invocations.

## Input

```json
Expand Down
13 changes: 11 additions & 2 deletions dev/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,18 @@
"private": true,
"version": "0.0.0",
"description": "Manual CDK deployment for development",
"scripts": {
"deploy": "pnpm -w run build && cdk deploy",
"destroy": "pnpm -w run build && cdk destroy",
"test": "ts-node --prefer-ts-exts test.ts",
"test-quick": "ts-node --prefer-ts-exts test.ts"
},
"dependencies": {
"aws-cdk": "2.93.0",
"aws-cdk-lib": "2.93.0",
"@aws-sdk/client-s3": "3.450.0",
"@aws-sdk/client-servicediscovery": "3.450.0",
"@aws-sdk/client-sfn": "3.450.0",
"aws-cdk": "2.108.1",
"aws-cdk-lib": "2.108.1",
"aws-copy-out-sharer": "link:../packages/aws-copy-out-sharer"
},
"devDependencies": {}
Expand Down
148 changes: 122 additions & 26 deletions dev/test.ts
Original file line number Diff line number Diff line change
@@ -1,26 +1,122 @@
import { CopyOutStack } from "aws-copy-out-sharer";
import { SubnetType } from "aws-cdk-lib/aws-ec2";
import { App } from "aws-cdk-lib";

const app = new App();

const description =
"Bulk copy-out service for Elsa Data - an application for controlled genomic data sharing";

const devId = "ElsaDataDevCopyOutStack";

new CopyOutStack(app, devId, {
// the stack can only be deployed to 'dev'
env: {
account: "843407916570",
region: "ap-southeast-2",
},
tags: {
"umccr-org:Product": "ElsaData",
"umccr-org:Stack": devId,
},
description: description,
isDevelopment: true,
infrastructureStackName: "ElsaDataDevInfrastructureStack",
infrastructureSubnetSelection: SubnetType.PRIVATE_WITH_EGRESS,
});
import { randomBytes } from "crypto";
import { S3Client, PutObjectCommand, StorageClass } from "@aws-sdk/client-s3";
import { SFNClient, StartExecutionCommand } from "@aws-sdk/client-sfn";
import {
DiscoverInstancesCommand,
ServiceDiscoveryClient,
} from "@aws-sdk/client-servicediscovery";

/**
* The only configurable item needed for the test cases - set this to a bucket you have
* full access to. Ideally the bucket should have a lifecycle that auto expires objects after 1 day.
*/
const TEST_BUCKET = "elsa-data-tmp";

const discoveryClient = new ServiceDiscoveryClient({});
const s3Client = new S3Client({});
const sfnClient = new SFNClient({});

const testFolder = randomBytes(16).toString("hex");
const testFolderSrc = testFolder + "-src";
const testFolderDest = testFolder + "-dest";

async function makeObjectListCsv(key: string, keys: string[]) {
let content = "";
for (const k of keys) {
content += `${TEST_BUCKET},"${k}"\n`;
}
const response = await s3Client.send(
new PutObjectCommand({
Bucket: TEST_BUCKET,
Key: key,
Body: content,
}),
);
}

async function makeTestObject(
key: string,
sizeInBytes: number,
storageClass: StorageClass = "STANDARD",
) {
const response = await s3Client.send(
new PutObjectCommand({
Bucket: TEST_BUCKET,
Key: key,
Body: "Hello S3!",
StorageClass: storageClass,
}),
);
}

async function createTestData() {
const sourceObjects = {
[`${testFolderSrc}/1.bin`]: StorageClass.STANDARD,
[`${testFolderSrc}/2.bin`]: StorageClass.STANDARD,
[`${testFolderSrc}/3.bin`]: StorageClass.GLACIER,
};

for (const [n, stor] of Object.entries(sourceObjects)) {
await makeTestObject(n, 1000, stor);
}

await makeObjectListCsv(
`${testFolderSrc}/objects-to-copy.tsv`,
Object.keys(sourceObjects),
);
}

(async () => {
console.log(`Src objects = ${TEST_BUCKET}:${testFolderSrc}`);
console.log(`Dest objects = ${TEST_BUCKET}:${testFolderDest}`);

const stepsDiscover = await discoveryClient.send(
new DiscoverInstancesCommand({
NamespaceName: "elsa-data",
ServiceName: "CopyOut",
}),
);

if (
!stepsDiscover ||
!stepsDiscover.Instances ||
stepsDiscover.Instances.length != 1
)
throw new Error(
"Did not discover the expected number of CopyOut instances in the Elsa Data CloudMap",
);

const stateMachineDiscovered = stepsDiscover?.Instances[0];

if (!stateMachineDiscovered.Attributes)
throw new Error(
"Did not discover state machine settings in CopyOut CloudMap",
);

const stateMachineArn = stateMachineDiscovered.Attributes["stateMachineArn"];

await createTestData();

/*await sfnClient.send(new StartExecutionCommand({
stateMachineArn: stateMachineArn,
input: JSON.stringify({
sourceFilesCsvBucket: TEST_BUCKET,
sourceFilesCsvKey: `${testFolderSrc}/objects-to-copy.tsv`,
destinationBucket: TEST_BUCKET,
maxItemsPerBatch: 1
})
}));*/

await sfnClient.send(
new StartExecutionCommand({
stateMachineArn: stateMachineArn,
input: JSON.stringify({
sourceFilesCsvBucket: TEST_BUCKET,
sourceFilesCsvKey: `${testFolderSrc}/objects-to-copy.tsv`,
destinationBucket: TEST_BUCKET,
destinationKey: testFolderDest,
maxItemsPerBatch: 1,
}),
}),
);
})();
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ type CanWriteLambdaStepProps = {
vpcSubnetSelection: SubnetType;

requiredRegion: string;

/**
* If true, will allow this CanWrite lambda to test a bucket that is
* in the same account. Otherwise, and by default, the CanWrite lambda
* is set up to not be able to test a bucket in the same account as it
* is installed. This is a security mechanism as writes to buckets in the
* same account is allowed implicitly but is dangerous. This should only
* be set to true for development/testing.
*/
allowWriteToThisAccount?: boolean;
};

/**
Expand All @@ -28,12 +38,16 @@ export class CanWriteLambdaStepConstruct extends Construct {

const canWriteLambda = new NodejsFunction(this, "CanWriteFunction", {
vpc: props.vpc,
entry: join(__dirname, "can-write-lambda", "can-write-lambda.js"),
entry: join(__dirname, "can-write-lambda", "can-write-lambda.ts"),
// by specifying the precise runtime - the bundler knows exactly what packages are already in
// the base image - and for us can skip bundling @aws-sdk
// if we need to move this forward to node 18+ - then we may need to revisit this
// if we need to move this forward beyond node 18 - then we may need to revisit this
runtime: Runtime.NODEJS_18_X,
handler: "handler",
bundling: {
externalModules: ["aws-sdk"],
minify: false,
},
vpcSubnets: {
subnetType: props.vpcSubnetSelection,
},
Expand All @@ -46,15 +60,17 @@ export class CanWriteLambdaStepConstruct extends Construct {
effect: Effect.ALLOW,
actions: ["s3:PutObject"],
resources: ["*"],
conditions: {
// yes - that's right - we want to give this lambda the ability to attempt the writes anywhere
// EXCEPT where we are deployed
// (under the assumption that buckets outside our account must be giving us explicit write permission,
// whilst within our account we get implicit access - in this case we don't want that ability)
StringNotEquals: {
"s3:ResourceAccount": [Stack.of(this).account],
},
},
// yes - that's right - we want to give this lambda the ability to attempt the writes anywhere
// EXCEPT where we are deployed
// (under the assumption that buckets outside our account must be giving us explicit write permission,
// whilst within our account we get implicit access - in this case we don't want that ability)
conditions: props.allowWriteToThisAccount
? undefined
: {
StringNotEquals: {
"s3:ResourceAccount": [Stack.of(this).account],
},
},
}),
);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
const { PutObjectCommand, S3Client } = require("@aws-sdk/client-s3");
import { PutObjectCommand, S3Client } from "@aws-sdk/client-s3";
import { AccessDeniedError, WrongRegionError } from "./errors";

export const handler = async (event) => {
function WrongRegionError(message) {
this.name = "WrongRegionError";
this.message = message;
}
WrongRegionError.prototype = new Error();

function AccessDeniedError(message) {
this.name = "AccessDeniedError";
this.message = message;
}
AccessDeniedError.prototype = new Error();
interface InvokeEvent {
requiredRegion: string;
destinationBucket: string;
}

export async function handler(event: InvokeEvent) {
console.log(event.requiredRegion);
console.log(event.destinationBucket);

Expand All @@ -27,8 +21,8 @@ export const handler = async (event) => {
Body: "A file created by Elsa Data copy out to ensure correct permissions",
});

const response = await client.send(putCommand);
} catch (e) {
await client.send(putCommand);
} catch (e: any) {
if (e.Code === "PermanentRedirect")
throw new WrongRegionError(
"S3 Put failed because bucket was in the wrong region",
Expand All @@ -39,7 +33,7 @@ export const handler = async (event) => {

throw e;
}
};
}

/*handler({
requiredRegion: "ap-southeast-2",
Expand Down
15 changes: 15 additions & 0 deletions packages/aws-copy-out-sharer/construct/can-write-lambda/errors.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
export class WrongRegionError extends Error {
constructor(message: string) {
super();
this.name = "WrongRegionError";
this.message = message;
}
}

export class AccessDeniedError extends Error {
constructor(message: string) {
super();
this.name = "AccessDeniedError";
this.message = message;
}
}
Loading

0 comments on commit c80cda5

Please sign in to comment.