Skip to content

Commit

Permalink
Better testing and creates a CSV of copy stats
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewpatto committed Dec 5, 2023
1 parent ece33d2 commit f0da9f6
Show file tree
Hide file tree
Showing 11 changed files with 341 additions and 151 deletions.
114 changes: 54 additions & 60 deletions dev/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
TEST_BUCKET_OBJECT_PREFIX,
TEST_BUCKET_WORKING_PREFIX,
} from "./constants";
import { makeObjectDictionaryCsv, makeTestObject } from "./tests-util";

const discoveryClient = new ServiceDiscoveryClient({});
const s3Client = new S3Client({});
Expand All @@ -18,31 +19,6 @@ const sfnClient = new SFNClient({});
// generate a unique run folder for this execution of the entire test suite
const uniqueFolder = randomBytes(8).toString("hex");

/**
* Put a list of objects as a CSV into an object.
*
* @param absoluteCsvKey the key of the CSV in the working folder
* @param keysBucket the source bucket of the objects
* @param keys the keys of the objects
*/
async function makeObjectListCsv(
absoluteCsvKey: string,
keysBucket: string,
keys: string[],
) {
let content = "";
for (const k of keys) {
content += `${keysBucket},"${k}"\n`;
}
const response = await s3Client.send(
new PutObjectCommand({
Bucket: TEST_BUCKET,
Key: absoluteCsvKey,
Body: content,
}),
);
}

function getPaths(testNumber: number) {
const tsvName = `${testNumber}-objects-to-copy.tsv`;

Expand All @@ -53,7 +29,7 @@ function getPaths(testNumber: number) {
testFolderObjectsTsvRelative: `${uniqueFolder}/${tsvName}`,
testFolderObjectsTsvAbsolute: `${TEST_BUCKET_WORKING_PREFIX}${uniqueFolder}/${tsvName}`,

testFolderSrc: `${TEST_BUCKET_OBJECT_PREFIX}${uniqueFolder}/${testNumber}-src`,
testFolderSrc: `${TEST_BUCKET_OBJECT_PREFIX}${uniqueFolder}/${testNumber}-src/`,
testFolderDest: `${TEST_BUCKET_OBJECT_PREFIX}${uniqueFolder}/${testNumber}-dest/`,
};
}
Expand All @@ -74,14 +50,12 @@ async function doTest1(stateMachineArn: string) {
};

for (const [n, stor] of Object.entries(sourceObjects)) {
await makeTestObject(n, 256 * 1024, stor);
await makeTestObject(TEST_BUCKET, n, 256 * 1024, stor);
}

await makeObjectListCsv(
testFolderObjectsTsvAbsolute,
TEST_BUCKET,
Object.keys(sourceObjects),
);
await makeObjectDictionaryCsv(TEST_BUCKET, testFolderObjectsTsvAbsolute, {
TEST_BUCKET: Object.keys(sourceObjects),
});

await sfnClient.send(
new StartExecutionCommand({
Expand All @@ -98,15 +72,54 @@ async function doTest1(stateMachineArn: string) {

async function doTest2(stateMachineArn: string) {
const {
testFolderSrc,
testFolderDest,
testFolderObjectsTsvAbsolute,
testFolderObjectsTsvRelative,
} = getPaths(1);

await makeObjectListCsv(testFolderObjectsTsvAbsolute, "umccr-10g-data-dev", [
"HG00096/HG00096.hard-filtered.vcf.gz",
"HG00097/HG00097.hard-filtered.vcf.gz",
]);
// we are going to make objects that are in both the src *and* destination
// this will let us test our "checksum skipping"

// same name and same content
await makeTestObject(
TEST_BUCKET,
`${testFolderSrc}existing-a.bin`,
256 * 1024,
);
await makeTestObject(
TEST_BUCKET,
`${testFolderDest}existing-a.bin`,
256 * 1024,
);

// same name and different content - the result should be that rclone *does* copy this
await makeTestObject(
TEST_BUCKET,
`${testFolderSrc}existing-b.bin`,
64 * 1024,
);
await makeTestObject(
TEST_BUCKET,
`${testFolderDest}existing-b.bin`,
64 * 1024,
"STANDARD",
1,
);

await makeObjectDictionaryCsv(TEST_BUCKET, testFolderObjectsTsvAbsolute, {
"umccr-10g-data-dev": [
"HG00096/HG00096.hard-filtered.vcf.gz",
"HG00097/HG00097.hard-filtered.vcf.gz",
// this does not exist
"HG000XX/HG000XX.hard-filtered.vcf.gz",
],
"not-a-bucket-that-exists": ["a-file-that-also-does-not-exist.bam"],
[TEST_BUCKET]: [
`${testFolderSrc}existing-a.bin`,
`${testFolderSrc}existing-b.bin`,
],
});

await sfnClient.send(
new StartExecutionCommand({
Expand All @@ -115,7 +128,7 @@ async function doTest2(stateMachineArn: string) {
sourceFilesCsvKey: testFolderObjectsTsvRelative,
destinationBucket: TEST_BUCKET,
destinationPrefixKey: testFolderDest,
maxItemsPerBatch: 1,
maxItemsPerBatch: 2,
}),
}),
);
Expand All @@ -133,14 +146,12 @@ async function doTest3(stateMachineArn: string) {
};

for (const [n, stor] of Object.entries(sourceObjects)) {
await makeTestObject(n, 1000, stor);
await makeTestObject(TEST_BUCKET, n, 1000, stor);
}

await makeObjectListCsv(
`${testFolderSrc}/objects-to-copy.tsv`,
TEST_BUCKET,
Object.keys(sourceObjects),
);
//await makeObjectDictionaryCsv(TEST_BUCKET, testFolderObjectsTsvAbsolute, {
// TEST_BUCKET: Object.keys(sourceObjects),
//});

await sfnClient.send(
new StartExecutionCommand({
Expand All @@ -155,23 +166,6 @@ async function doTest3(stateMachineArn: string) {
);
}

async function makeTestObject(
key: string,
sizeInBytes: number,
storageClass: StorageClass = "STANDARD",
) {
const response = await s3Client.send(
new PutObjectCommand({
Bucket: TEST_BUCKET,
Key: key,
Body: Buffer.alloc(sizeInBytes, 13),
StorageClass: storageClass,
}),
);
}

async function createTestData() {}

(async () => {
console.log(`Working folder = ${TEST_BUCKET}:${uniqueFolder}`);

Expand Down
68 changes: 68 additions & 0 deletions dev/tests-util.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import { PutObjectCommand, S3Client, StorageClass } from "@aws-sdk/client-s3";

const s3Client = new S3Client({});

/**
* Put a dictionary of objects as a two column CSV into an S3 object.
*
* @param csvBucket
* @param csvAbsoluteKey the key of the CSV in the working folder
* @param objects a dictionary of buckets->key[]
*/
export async function makeObjectDictionaryCsv(
csvBucket: string,
csvAbsoluteKey: string,
objects: Record<string, string[]>,
) {
let content = "";

// for each bucket
for (const b of Object.keys(objects)) {
// for each key
for (const k of objects[b]) content += `${b},"${k}"\n`;
}

// now save the CSV to S3
const response = await s3Client.send(
new PutObjectCommand({
Bucket: csvBucket,
Key: csvAbsoluteKey,
Body: content,
}),
);
}

/**
* Makes an S3 object of a certain size and storage class - and
* filled with basically blank data
*
* @param bucket the bucket of the object
* @param key the key of the object
* @param sizeInBytes the size in bytes of the object to make
* @param storageClass the storage class for the object, defaults to STANDARD
* @param forceContentByte force a content byte if the default needs to be overridden
* @returns the byte value that is the content of the created file
*/
export async function makeTestObject(
bucket: string,
key: string,
sizeInBytes: number,
storageClass: StorageClass = "STANDARD",
forceContentByte: number | undefined = undefined,
) {
const contentByte =
forceContentByte === undefined ? sizeInBytes % 256 : forceContentByte;
const response = await s3Client.send(
new PutObjectCommand({
Bucket: bucket,
Key: key,
// so rather than make every file filled with 0s - we fill
// with a value that depends on the size... no particular
// point other than we can I guess assert content has been
// successfully copied by looking at the destination content after copy
Body: Buffer.alloc(sizeInBytes, contentByte),
StorageClass: storageClass,
}),
);
return contentByte;
}
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ func main() {
"--stats", "10000h",
// normally no bandwidth limiting ("0") - but can institute bandwidth limit if asked
"--bwlimit", If(debugBandwidthOk, debugBandwidth, "0"),
// because we are transferring between S3 - which has a consistent idea of checksums
// at src and destination we enable this options
"--checksum",
"copy", source, destination)

// we are only interested in stderr
Expand Down Expand Up @@ -225,13 +228,13 @@ func main() {
case 143:
results[which] = map[string]any{
"errors": 1,
"lastError": "Interrupted by SIGTERM",
"lastError": "interrupted by SIGTERM",
"source": source}
resultErrorCount++
default:
results[which] = map[string]any{
"errors": 1,
"lastError": fmt.Sprintf("Exit of rclone with code %v but no JSON statistics block generated", runExitErr.ExitCode()),
"lastError": fmt.Sprintf("exit of rclone with code %v but no JSON statistics block generated", runExitErr.ExitCode()),
"systemError": fmt.Sprintf("%#v", runExitErr),
"source": source}
resultErrorCount++
Expand All @@ -245,7 +248,7 @@ func main() {
// create a fake "compatible" stats block
results[which] = map[string]any{
"errors": 1,
"lastError": "Skipped due to previous SIGTERM received",
"lastError": "skipped due to previous SIGTERM received",
"source": source}
resultErrorCount++
}
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"main": "summarise-copy-lambda.ts",
"dependencies": {
"@aws-sdk/client-s3": "3.405.0",
"@types/aws-lambda": "8.10.93"
"@types/aws-lambda": "8.10.93",
"csv-stringify": "6.4.4"
}
}
Loading

0 comments on commit f0da9f6

Please sign in to comment.