Skip to content

Commit

Permalink
update docs and improve the validation
Browse files Browse the repository at this point in the history
  • Loading branch information
hgiasac committed Feb 4, 2025
1 parent 2466b9b commit df75343
Show file tree
Hide file tree
Showing 12 changed files with 109 additions and 32 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,22 @@ This connector is built using the [Go Data Connector SDK](https://github.com/has

### Supported storage services

At this moment, the connector supports S3 Compatible Storage services.

| Service | Supported |
| -------------------- | --------- |
| AWS S3 ||
| MinIO ||
| AWS S3 | ✅ (\*) |
| Google Cloud Storage ||
| Cloudflare R2 ||
| DigitalOcean Spaces ||
| Azure Blob Storage ||
| MinIO | ✅ (\*) |
| Cloudflare R2 | ✅ (\*) |
| DigitalOcean Spaces | ✅ (\*) |

(\*): Support Amazon S3 Compatible Cloud Storage providers. The connector uses [MinIO Go Client SDK](https://github.com/minio/minio-go) behind the scenes.

## Get Started

Follow the [Quick Start Guide](https://hasura.io/docs/3.0/getting-started/overview/) in Hasura DDN docs. At the `Connect to data` step, choose the `hasura/storage` data connector from the dropdown and follow the interactive prompts to set required environment variables.

The connector is built upon the MinIO Go Client SDK so it supports most of methods in the [API interface](https://min.io/docs/minio/linux/developers/go/API.html)
AWS S3 environment variables are default settings in the interactive prompt. If you want to use other storage providers you need to manually configure the configuration.yaml files and add required environment variable mappings to the subgraph definition.

## Documentation

Expand Down
2 changes: 1 addition & 1 deletion connector-definition/configuration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ clients:
allowedBuckets: []
concurrency:
query: 5
mutation: 3
mutation: 1
runtime:
maxDownloadSizeMBs: 10
17 changes: 10 additions & 7 deletions connector/connector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,20 @@ import (
func TestConnector(t *testing.T) {
setConnectorTestEnv(t)

logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
Level: slog.LevelDebug,
}))
for i, dir := range []string{"01-setup", "02-get", "03-cleanup"} {
var serverOptions []connector.ServeOption

if i == 0 {
logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
Level: slog.LevelDebug,
}))
serverOptions = append(serverOptions, connector.WithLogger(logger))
}

for _, dir := range []string{"01-setup", "02-get", "03-cleanup"} {
ndctest.TestConnector(t, &Connector{}, ndctest.TestConnectorOptions{
Configuration: "../tests/configuration",
TestDataDir: filepath.Join("testdata", dir),
ServerOptions: []connector.ServeOption{
connector.WithLogger(logger),
},
ServerOptions: serverOptions,
})
}
}
Expand Down
2 changes: 2 additions & 0 deletions connector/functions/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ func FunctionStorageObject(ctx context.Context, state *types.State, args *common

// FunctionDownloadStorageObject returns a stream of the object data. Most of the common errors occur when reading the stream.
func FunctionDownloadStorageObject(ctx context.Context, state *types.State, args *common.GetStorageObjectArguments) (*scalar.Bytes, error) {
args.Base64Encoded = true

reader, err := downloadStorageObject(ctx, state, args)
if err != nil {
return nil, err
Expand Down
56 changes: 56 additions & 0 deletions connector/query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ import (
"io"
"net/http"
"path/filepath"
"strings"
"testing"

"github.com/hasura/ndc-sdk-go/connector"
"github.com/hasura/ndc-sdk-go/ndctest"
"github.com/hasura/ndc-sdk-go/schema"
"gotest.tools/v3/assert"
Expand Down Expand Up @@ -129,3 +131,57 @@ func TestConnectorQueries(t *testing.T) {
})
}
}

func TestMaxDownloadSizeValidation(t *testing.T) {
setConnectorTestEnv(t)

server, err := connector.NewServer(&Connector{}, &connector.ServerOptions{
Configuration: "../tests/configuration",
}, connector.WithoutRecovery())
assert.NilError(t, err)

httpServer := server.BuildTestServer()
defer httpServer.Close()

getQueryBody := func(name string) string {
return fmt.Sprintf(`{
"arguments": {
"clientId": {
"type": "literal",
"value": "minio"
},
"bucket": {
"type": "literal",
"value": "dummy-bucket-0"
},
"object": {
"type": "literal",
"value": "movies/2000s/movies.json"
}
},
"collection": "%s",
"collection_relationships": {},
"query": {
"fields": {
"__value": {
"column": "__value",
"type": "column"
}
}
}
}`, name)
}

for _, name := range []string{"downloadStorageObject", "downloadStorageObjectText"} {
t.Run(name, func(t *testing.T) {
resp, err := http.DefaultClient.Post(httpServer.URL+"/query", "application/json", strings.NewReader(getQueryBody(name)))
assert.NilError(t, err)
assert.Equal(t, http.StatusUnprocessableEntity, resp.StatusCode)
var respBody schema.ErrorResponse
assert.NilError(t, json.NewDecoder(resp.Body).Decode(&respBody))
assert.Equal(t, respBody.Message, "file size >= 2 MB is not allowed to be downloaded directly. Please use presignedGetObject function for large files")
resp.Body.Close()
})
}

}
3 changes: 2 additions & 1 deletion connector/storage/common/arguments.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,8 @@ type GetStorageObjectOptions struct {
VersionID *string `json:"versionId"`
PartNumber *int `json:"partNumber"`
// Options to be included for the object information.
Include StorageObjectIncludeOptions `json:"-"`
Include StorageObjectIncludeOptions `json:"-"`
Base64Encoded bool `json:"-"`
}

// StorageCopyDestOptions represents options specified by user for CopyObject/ComposeObject APIs.
Expand Down
9 changes: 8 additions & 1 deletion connector/storage/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,14 @@ func (m *Manager) GetObject(ctx context.Context, bucketInfo common.StorageBucket
return nil, schema.UnprocessableContentError("cannot download directory: "+objectName, nil)
}

if objectStat.Size == nil || *objectStat.Size >= (m.runtime.MaxDownloadSizeMBs*1024*1024) {
maxDownloadSize := m.runtime.MaxDownloadSizeMBs * 1024 * 1024

// encoding the file content to base64 increases the size to 33%
if opts.Base64Encoded {
maxDownloadSize = maxDownloadSize * 2 / 3
}

if objectStat.Size == nil || *objectStat.Size >= maxDownloadSize {
return nil, schema.UnprocessableContentError(fmt.Sprintf("file size >= %d MB is not allowed to be downloaded directly. Please use presignedGetObject function for large files", m.runtime.MaxDownloadSizeMBs), nil)
}

Expand Down
2 changes: 1 addition & 1 deletion connector/types/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func (c Configuration) Validate() error {
// ConcurrencySettings represent settings for concurrent webhook executions to remote servers.
type ConcurrencySettings struct {
// Maximum number of concurrent executions if there are many query variables.
Query int `json:"query" jsonschema:"min=1,default=10" yaml:"query"`
Query int `json:"query" jsonschema:"min=1,default=5" yaml:"query"`
// Maximum number of concurrent executions if there are many mutation operations.
Mutation int `json:"mutation" jsonschema:"min=1,default=1" yaml:"mutation"`
}
13 changes: 13 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,16 @@ You must configure the endpoint URL along with [Access Key ID and Secret Access
#### DigitalOcean Spaces

See [Spaces API Reference Documentation](https://docs.digitalocean.com/reference/api/spaces-api/).

## Runtime Settings

| Name | Description | Default |
| -------------------- | ------------------------------------------------------------------------- | ------- |
| `maxDownloadSizeMBs` | Limit the max download size in MBs for `downloadStorageObject*` functions | `10` |

## Concurrency Settings

| Name | Description | Default |
| ---------- | ----------------------------------------------------------------------------- | ------- |
| `query` | Max number of concurrent threads when fetching remote relationships in query | `5` |
| `mutation` | Max number of concurrent commands if the mutation request has many operations | `1` |
19 changes: 7 additions & 12 deletions docs/objects.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ query GetSignedDownloadURL {

The response is a base64-encode string. The client must decode the string to get the raw content.

> [!NOTE]
> The connector limits the maximum download size via the `runtime.maxDownloadSizeMBs` setting to avoid memory leaks. The GraphQL engine on Hasura Cloud also limits the max response size from connectors. The acceptable file size should be 30 MB in maximum.
> Note that the file content is encoded to base64 string so the response is 33% increased. If the maximum download size is 30 MB the actual allowed size is 20 MB only.
```gql
query DownloadObject {
downloadStorageObject(object: "hello.txt")
Expand All @@ -84,6 +88,9 @@ query DownloadObject {

Use the `downloadStorageObjectText` query if you are confident that the object content is plain text.

> [!NOTE]
> The connector limits the maximum download size via the `runtime.maxDownloadSizeMBs` setting to avoid memory leaks. The GraphQL engine on Hasura Cloud also limits the max response size from connectors. The acceptable file size should be 30 MB in maximum.
```gql
query DownloadObjectText {
downloadStorageObjectText(object: "hello.txt")
Expand All @@ -98,18 +105,6 @@ query DownloadObjectText {

### List Objects

> [!NOTE]
> The pagination information is optional. It depends on whether the storage provider's API supports this feature. The pagination method is cursor-based.
| Service | Pagination |
| -------------------- | ---------- |
| AWS S3 ||
| Google Cloud Storage ||
| Azure Blob Storage ||
| MinIO ||
| Cloudflare R2 ||
| DigitalOcean Spaces ||

```graphql
query ListObjects {
storageObjects(where: { object: { _starts_with: "hello" } }) {
Expand Down
2 changes: 1 addition & 1 deletion jsonschema/configuration.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@
"properties": {
"query": {
"type": "integer",
"default": 10
"default": 5
},
"mutation": {
"type": "integer",
Expand Down
2 changes: 1 addition & 1 deletion tests/configuration/configuration.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# yaml-language-server: $schema=../../jsonschema/configuration.schema.json
runtime:
maxDownloadSizeMBs: 10
maxDownloadSizeMBs: 2
clients:
- id: minio
type: s3
Expand Down

0 comments on commit df75343

Please sign in to comment.