Skip to content

Commit

Permalink
feat(aws-glue): adds AWS Glue view L2 construct
Browse files Browse the repository at this point in the history
  • Loading branch information
steffeng committed Mar 11, 2022
1 parent d91b2e2 commit d13d5f5
Show file tree
Hide file tree
Showing 7 changed files with 1,257 additions and 1 deletion.
85 changes: 85 additions & 0 deletions packages/@aws-cdk/aws-glue/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -448,3 +448,88 @@ new glue.Table(this, 'MyTable', {
| array(itemType: Type) | Function | An array of some other type |
| map(keyType: Type, valueType: Type) | Function | A map of some primitive key type to any value type |
| struct(collumns: Column[]) | Function | Nested structure containing individually named and typed collumns |

## View

A Glue view describes a view that consists of an SQL statement and a column definition as used by a Table. A minimal view definition looks like this:

```ts
new glue.View(stack, "MyView", {
database: myDatabase,
tableName: "my_view",
columns: [{
name: "x",
type: glue.Schema.INTEGER
}],
statement: "SELECT 1 x"
});
```

### Placeholders in statements

A view's SQL statement may refer to resources like tables which you want to replace at deploy time. Take the following view as an example:

```ts
new glue.View(stack, "MyView", {
database: myDatabase,
tableName: "my_view",
columns: [{
name: "x",
type: glue.Schema.INTEGER
}],
statement: "SELECT x from table_a UNION SELECT x from table_b"
});
```

Now imagine that the table names can't be hardcoded. You replace value in a view's SQL statement with the optional `placeHolders` property:

```ts
new glue.View(stack, "MyView", {
database: myDatabase,
tableName: "my_view",
columns: [{
name: "x",
type: glue.Schema.INTEGER
}],
statement: "SELECT x from ${table1} UNION SELECT x from ${table2}",
placeHolders: {
table1: table1.tableName,
table2: table2.tableName
}
});
```

### View statements in external files

A view's SQL statement can get large and complex. Then it's hard to author and validate within the CDK script. You can pull the SQL statement from an external file.

Take the following example in a file named `myView.sql`:

```sql
SELECT x from ${table1}
UNION
SELECT x from ${table2}
```

You can include the statement and replace the placeholders like this:

```ts
import * as fs from 'fs';
import * as path from 'path';

new glue.View(stack, "MyView", {
database: myDatabase,
tableName: "my_view",
columns: [{
name: "x",
type: glue.Schema.INTEGER
}],
statement: fs
.readFileSync(path.join(__dirname, 'myView.sql'))
.toString(),
placeHolders: {
table1: table1.tableName,
table2: table2.tableName,
},
});
```
3 changes: 2 additions & 1 deletion packages/@aws-cdk/aws-glue/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ export * from './job-executable';
export * from './code';
export * from './schema';
export * from './security-configuration';
export * from './table';
export * from './table';
export * from './view';
149 changes: 149 additions & 0 deletions packages/@aws-cdk/aws-glue/lib/view.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import { Fn, Resource } from '@aws-cdk/core';
import { Construct } from 'constructs';
import { IDatabase } from './database';
import { CfnTable } from './glue.generated';
import { Column, Schema } from './schema';
import { ITable } from './table';

/**
* View props.
*/
export interface ViewProps {
/**
* Name of the table.
*/
readonly tableName: string;

/**
* Description of the table.
*
* @default generated
*/
readonly description?: string;

/**
* Database in which to store the table.
*/
readonly database: IDatabase;

/**
* Columns of the table.
*/
readonly columns: Column[];

/**
* Statement to run for this view.
*/
readonly statement: string;

/**
* Placeholders to replace in the statement.
*
* @default - `database` resolves to the databaseName of {database} and can't be overridden.
*/
readonly placeHolders?: { [key: string]: string };
}

// the mapping of athena create table to PrestoDB types
const prestoTypes: { [key: string]: string } = {
[Schema.FLOAT.inputString]: 'real',
[Schema.BIG_INT.inputString]: 'bigint',
[Schema.INTEGER.inputString]: 'integer',
[Schema.SMALL_INT.inputString]: 'smallint',
[Schema.TINY_INT.inputString]: 'tinyint',
[Schema.STRING.inputString]: 'varchar',
[Schema.BINARY.inputString]: 'varbinary',
};

/**
* A View in AWS Glue, implemented by an AWS Glue table.
*
* @resource AWS::glue::Table
*/
export class View extends Resource implements ITable {
/**
* Name of this table.
*/
public readonly tableName: string;

/**
* ARN of this table.
*/
public readonly tableArn: string;

/**
* @summary Creates a Table construct that represents a view in the AWS Glue data catalogue.
*
* @param scope The scope creating construct (usually `this`).
* @param id The construct's id.
* @param props Properties.
*/
constructor(scope: Construct, id: string, props: ViewProps) {
super(scope, id, {
physicalName: props.tableName,
});

const columns = props.columns;

const placeHolders = {};
if (props.placeHolders) {
Object.assign(placeHolders, props.placeHolders);
}
Object.assign(placeHolders, { database: props.database.databaseName });

const viewOriginalText = {
originalSql: props.statement,
catalog: 'awsdatacatalog',
columns: this.renderColumns(columns, true),
schema: '${database}',
};

const tableResource = new CfnTable(this, 'Table', {
catalogId: props.database.catalogId,
databaseName: props.database.databaseName,
tableInput: {
name: this.physicalName,
description: props.description || `${props.tableName} generated by CDK`,
parameters: { presto_view: true },
storageDescriptor: {
columns: this.renderColumns(columns),
serdeInfo: {},
},
partitionKeys: [],
tableType: 'VIRTUAL_VIEW',
viewOriginalText:
'/* Presto View: ' + Fn.base64(Fn.sub(JSON.stringify(viewOriginalText), placeHolders)) + ' */',
},
});
this.node.defaultChild = tableResource;

this.tableName = this.getResourceNameAttribute(tableResource.ref);
this.tableArn = this.stack.formatArn({
service: 'glue',
resource: 'table',
resourceName: `${props.database.databaseName}/${this.tableName}`,
});
}

/**
* Writes a json object used by glue from the list of columns.
*
* @param columns the columns
* @param usePrestoTypes if `true`, replaces all data types with their presto types
*/
private renderColumns(columns?: Column[], usePrestoTypes = false) {
if (columns === undefined) {
return undefined;
}
return columns.map((column) => {
return {
name: column.name,
type:
usePrestoTypes && column.type.inputString in prestoTypes
? prestoTypes[column.type.inputString]
: column.type.inputString,
comment: column.comment,
};
});
}
}
12 changes: 12 additions & 0 deletions packages/@aws-cdk/aws-glue/test/combinedView.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
SELECT
*
, "$path" "file"
FROM
${sourceTable}
WHERE ("concat"("year", "month", "day", "hour") >= "date_format"("date_trunc"('hour', ((current_timestamp - INTERVAL '15' MINUTE) - INTERVAL '1' HOUR)), '%Y%m%d%H'))
UNION ALL SELECT
*
, "$path" "file"
FROM
${targetTable}
WHERE ("concat"("year", "month", "day", "hour") < "date_format"("date_trunc"('hour', ((current_timestamp - INTERVAL '15' MINUTE) - INTERVAL '1' HOUR)), '%Y%m%d%H'))
Loading

0 comments on commit d13d5f5

Please sign in to comment.