Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/htsget random access - PLACEHOLDER #820

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
0ec3ed8
First step of implementation of backend random/ranged access
pontus Feb 29, 2024
83400ca
Linter fixes
pontus Mar 1, 2024
24287db
Run integration tests for s3seekable
pontus Mar 1, 2024
5298960
Verify requested content-range is used
pontus Mar 1, 2024
1a96eef
Add/correct some performance optimisations
pontus Mar 9, 2024
26efde3
Make it possible to configure archive for seekable s3
pontus Mar 9, 2024
1c0d0c7
Cosmetic improvements and allow seeking to the end
pontus Mar 11, 2024
fb93e65
Return EOF when reading at the end
pontus Mar 11, 2024
ae1b0a2
Remove unneeded code
pontus Mar 11, 2024
0212bea
Only accept seekable readers to SeekableMultiReader
pontus Mar 11, 2024
d677679
Cosmetic improvements and allow seeking to the end
pontus Mar 11, 2024
822606a
Fix test for more data available in SeekableMultiReader
pontus Mar 11, 2024
d39abfc
Adapt to SeekableMultiReader instantiation being able to fail
pontus Mar 11, 2024
5882777
Updated test for seekable s3 and seekableMultiReader
pontus Mar 11, 2024
17fcb35
Give more time for stability
pontus Mar 11, 2024
af37d19
Lint fix
pontus Mar 11, 2024
b30072a
Lint fix
pontus Mar 11, 2024
b6d2a5c
Remove spurious log and empty lines
pontus Mar 11, 2024
ff082c9
Allow waiting for prefetch as a proper code would
pontus Mar 11, 2024
2710591
feat: use seekable reader in file download
MalinAhlberg Apr 9, 2024
2d14c7d
feat: set client additional bytes
MalinAhlberg Apr 10, 2024
0db289a
fix: make sure bytes ranges are inclusive
MalinAhlberg Apr 10, 2024
9f0c95b
refactor: move seeker to function
MalinAhlberg Apr 10, 2024
0c4e92c
tests: make sure byte ranges are inclusive
MalinAhlberg Apr 10, 2024
b6a4561
fix: add header size to encrypted file size
MalinAhlberg Apr 11, 2024
46cfcab
Remove server public key and rearrange calculate coordinates
dbampalikis Apr 15, 2024
ae8b09c
refactor: clean up code
MalinAhlberg Apr 16, 2024
7f270dc
tests: adjust testing of coords calculation and instatiate headers
MalinAhlberg Apr 16, 2024
d862b0c
refactor: rename caluclateCoords function
MalinAhlberg Apr 16, 2024
f4daaa5
Simplify TLS config
jbygdell Apr 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/functionality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:

strategy:
matrix:
storagetype: [s3, posix, s3notls]
storagetype: [s3, posix, s3notls, s3seekable]
go-version: ['1.21']
fail-fast: false
steps:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash

sed -i 's/=s3/=posix/g' dev_utils/env.download
sed -i 's/ARCHIVE_TYPE=.*/ARCHIVE_TYPE=posix/g' dev_utils/env.download

Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

pip3 install s3cmd

cd dev_utils || exit 1

# Make buckets if they don't exist already
s3cmd -c s3cmd.conf mb s3://archive || true

# # Upload test file
s3cmd -c s3cmd.conf put archive_data/4293c9a7-dc50-46db-b79a-27ddc0dad1c6 s3://archive/4293c9a7-dc50-46db-b79a-27ddc0dad1c6
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

sed -i 's/ARCHIVE_TYPE=.*/ARCHIVE_TYPE=s3seekable/g' dev_utils/env.download

7 changes: 1 addition & 6 deletions sda-download/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,7 @@ func Setup() *http.Server {
// Configure TLS settings
log.Info("(3/5) Configuring TLS")
cfg := &tls.Config{
MinVersion: tls.VersionTLS12,
CurvePreferences: []tls.CurveID{tls.CurveP521, tls.CurveP384, tls.CurveP256},
PreferServerCipherSuites: true,
CipherSuites: []uint16{
tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
},
MinVersion: tls.VersionTLS12,
}

// Configure web server
Expand Down
126 changes: 79 additions & 47 deletions sda-download/api/sda/sda.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,23 +272,13 @@ func Download(c *gin.Context) {
return
}

switch c.Param("type") {
case "encrypted":
// calculate coordinates
start, end, err = calculateEncryptedCoords(start, end, c.GetHeader("Range"), fileDetails)
if err != nil {
log.Errorf("Byte range coordinates invalid! %v", err)

return
}
if start > 0 {
// reading from an offset in encrypted file is not yet supported
log.Errorf("Start coordinate for encrypted files not implemented! %v", start)
c.String(http.StatusBadRequest, "Start coordinate for encrypted files not implemented!")
start, end, err = calculateCoords(start, end, c.GetHeader("Range"), fileDetails, c.Param("type"))
if err != nil {
log.Errorf("Byte range coordinates invalid! %v", err)

return
}
default:
return
}
if c.Param("type") != "encrypted" {
// set the content-length for unencrypted files
if start == 0 && end == 0 {
c.Header("Content-Length", fmt.Sprint(fileDetails.DecryptedSize))
Expand Down Expand Up @@ -325,35 +315,39 @@ func Download(c *gin.Context) {
// set the user and server public keys that is send from htsget
log.Debugf("Got to setting the headers: %s", c.GetHeader("client-public-key"))
c.Header("Client-Public-Key", c.GetHeader("Client-Public-Key"))
c.Header("Server-Public-Key", c.GetHeader("Server-Public-Key"))
}

if c.Request.Method == http.MethodHead {

// Create headers for htsget, containing size of the crypt4gh header
reencKey := c.GetHeader("Client-Public-Key")
headerSize := bytes.NewReader(fileDetails.Header).Size()
// Size of the header in the archive
c.Header("Server-Additional-Bytes", fmt.Sprint(headerSize))
if reencKey != "" {
newHeader, _ := reencryptHeader(fileDetails.Header, reencKey)
headerSize = bytes.NewReader(newHeader).Size()
// Size of the header if the file is re-encrypted before downloading
c.Header("Client-Additional-Bytes", fmt.Sprint(headerSize))
}
if c.Param("type") == "encrypted" {
c.Header("Content-Length", fmt.Sprint(fileDetails.ArchiveSize))

// set the length of the crypt4gh header for htsget
c.Header("Server-Additional-Bytes", fmt.Sprint(bytes.NewReader(fileDetails.Header).Size()))
// TODO figure out if client crypt4gh header will have other size
// c.Header("Client-Additional-Bytes", ...)
// Update the content length to match the encrypted file size
c.Header("Content-Length", fmt.Sprint(int(headerSize)+fileDetails.ArchiveSize))
}

return
}

// Prepare the file for streaming, encrypted or decrypted
var fileStream io.Reader

var fileStream io.ReadSeeker
hr := bytes.NewReader(fileDetails.Header)

switch c.Param("type") {
case "encrypted":
// The key provided in the header should be base64 encoded
reencKey := c.GetHeader("Client-Public-Key")
if strings.HasPrefix(c.GetHeader("User-Agent"), "htsget") {
reencKey = c.GetHeader("Server-Public-Key")
}
if reencKey == "" {
c.String(http.StatusBadRequest, "c4gh public key is mmissing from the header")
c.String(http.StatusBadRequest, "c4gh public key is missing from the header")

return
}
Expand All @@ -369,27 +363,31 @@ func Download(c *gin.Context) {
}
log.Debugf("Reencrypted c4gh file header = %v", newHeader)
newHr := bytes.NewReader(newHeader)
fileStream = io.MultiReader(newHr, file)

fileStream, err = storage.SeekableMultiReader(newHr, file)
if err != nil {
log.Errorf("Construct SeekableMultiReader for file: %v", err)
c.String(http.StatusInternalServerError, "file stream error")

return
}

default:
encryptedFileReader := io.MultiReader(bytes.NewReader(fileDetails.Header), file)
c4ghfileStream, err := streaming.NewCrypt4GHReader(encryptedFileReader, *config.Config.App.Crypt4GHKey, nil)
defer c4ghfileStream.Close()

fileStream, err = storage.SeekableMultiReader(hr, file)
if err != nil {
log.Errorf("could not prepare file for streaming, %s", err)
log.Errorf("Construct SeekableMultiReader for file: %v", err)
c.String(http.StatusInternalServerError, "file stream error")

return
}
if start != 0 {
// We don't want to read from start, skip ahead to where we should be
_, err = c4ghfileStream.Seek(start, 0)
if err != nil {
log.Errorf("error occurred while finding sending start: %v", err)
c.String(http.StatusInternalServerError, "an error occurred")
c4ghfileStream, err := streaming.NewCrypt4GHReader(fileStream, *config.Config.App.Crypt4GHKey, nil)
defer c4ghfileStream.Close()
if err != nil {
log.Errorf("could not prepare file for streaming, %s", err)
c.String(http.StatusInternalServerError, "file stream error")

return
}
return
}
fileStream = c4ghfileStream
}
Expand All @@ -403,8 +401,31 @@ func Download(c *gin.Context) {
}
}

var seekStart = func(fileStream io.ReadSeeker, start, end int64) (int64, int64, error) {
if start != 0 {

// We don't want to read from start, skip ahead to where we should be
_, err := fileStream.Seek(start, 0)
if err != nil {

return 0, 0, fmt.Errorf("error occurred while finding sending start: %v", err)
}
// adjust end to reflect that the file start has been moved
end -= start
start = 0

}

return start, end, nil
}

// used from: https://github.com/neicnordic/crypt4gh/blob/master/examples/reader/main.go#L48C1-L113C1
var sendStream = func(reader io.Reader, writer http.ResponseWriter, start, end int64) error {
var sendStream = func(reader io.ReadSeeker, writer http.ResponseWriter, start, end int64) error {

start, end, err := seekStart(reader, start, end)
if err != nil {
return err
}

// Calculate how much we should read (if given)
togo := end - start
Expand Down Expand Up @@ -451,10 +472,12 @@ var sendStream = func(reader io.Reader, writer http.ResponseWriter, start, end i
}

// Calculates the start and end coordinats to use. If a range is set in HTTP headers,
// it will be used as is. If not, the functions parameters will be used,
// and adjusted to match the data block boundaries of the encrypted file.
var calculateEncryptedCoords = func(start, end int64, htsget_range string, fileDetails *database.FileDownload) (int64, int64, error) {
// it will be used as is. If not, the functions parameters will be used.
// If in encrypted mode, the parameters will be adjusted to match the data block boundaries.
var calculateCoords = func(start, end int64, htsget_range string, fileDetails *database.FileDownload, encryptedType string) (int64, int64, error) {
log.Warnf("calculate")
if htsget_range != "" {
log.Warnf("calculate non empty")
startEnd := strings.Split(strings.TrimPrefix(htsget_range, "bytes="), "-")
if len(startEnd) > 1 {
a, err := strconv.ParseInt(startEnd[0], 10, 64)
Expand All @@ -469,9 +492,17 @@ var calculateEncryptedCoords = func(start, end int64, htsget_range string, fileD
return 0, 0, fmt.Errorf("endCoordinate must be greater than startCoordinate")
}

return a, b, nil
// Byte ranges are inclusive; +1 so that the last byte is included

return a, b + 1, nil
}
}

// For unencrypted files, return the coordinates as is
if encryptedType != "encrypted" {
return start, end, nil
}

// Adapt end coordinate to follow the crypt4gh block boundaries
headlength := bytes.NewReader(fileDetails.Header)
bodyEnd := int64(fileDetails.ArchiveSize)
Expand All @@ -484,4 +515,5 @@ var calculateEncryptedCoords = func(start, end int64, htsget_range string, fileD
}

return start, headlength.Size() + bodyEnd, nil

}
39 changes: 25 additions & 14 deletions sda-download/api/sda/sda_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ import (
"bytes"
"errors"
"io"
"net/http"
"net/http/httptest"
"net/url"
"strconv"
"testing"

"github.com/gin-gonic/gin"
Expand Down Expand Up @@ -477,9 +480,14 @@ func TestDownload_Fail_OpenFile(t *testing.T) {
return fileDetails, nil
}

// Mock request and response holders
// Mock request and response holders and initialize headers
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
req := &http.Request{
URL: &url.URL{},
Header: make(http.Header),
}
c.Request = req

// Test the outcomes of the handler
Download(c)
Expand All @@ -506,7 +514,7 @@ func TestDownload_Fail_OpenFile(t *testing.T) {

}

func TestEncrypted_Coords(t *testing.T) {
func Test_CalucalateCoords(t *testing.T) {
var to, from int64
from, to = 0, 1000
fileDetails := &database.FileDownload{
Expand All @@ -515,40 +523,43 @@ func TestEncrypted_Coords(t *testing.T) {
Header: make([]byte, 124),
}

// htsget_range should be used first and as is
// htsget_range should be used first and its end position should be increased by one
headerSize := bytes.NewReader(fileDetails.Header).Size()
fullSize := headerSize + int64(fileDetails.ArchiveSize)
start, end, err := calculateEncryptedCoords(from, to, "bytes=10-20", fileDetails)
var endPos int64
endPos = 20
start, end, err := calculateCoords(from, to, "bytes=10-"+strconv.FormatInt(endPos, 10), fileDetails, "default")
assert.Equal(t, start, int64(10))
assert.Equal(t, end, int64(20))
assert.Equal(t, end, endPos+1)
assert.NoError(t, err)

// end should be greater than or equal to inputted end
_, end, err = calculateEncryptedCoords(from, to, "", fileDetails)
_, end, err = calculateCoords(from, to, "", fileDetails, "encrypted")
assert.GreaterOrEqual(t, end, from)
assert.NoError(t, err)

// end should not be smaller than a header
_, end, err = calculateEncryptedCoords(from, headerSize-10, "", fileDetails)
_, end, err = calculateCoords(from, headerSize-10, "", fileDetails, "encrypted")
assert.GreaterOrEqual(t, end, headerSize)
assert.NoError(t, err)

// end should not be larger than file length + header
_, end, err = calculateEncryptedCoords(from, fullSize+1900, "", fileDetails)
_, end, err = calculateCoords(from, fullSize+1900, "", fileDetails, "encrypted")
assert.Equal(t, fullSize, end)
assert.NoError(t, err)

// range 0-0 should give whole file
start, end, err = calculateEncryptedCoords(0, 0, "", fileDetails)
// param range 0-0 should give whole file
start, end, err = calculateCoords(0, 0, "", fileDetails, "encrypted")
assert.Equal(t, end-start, fullSize)
assert.NoError(t, err)

// range 0-0 with range in the header should return the range size
_, end, err = calculateEncryptedCoords(0, 0, "bytes=0-1000", fileDetails)
assert.Equal(t, end, int64(1000))
// byte range 0-1000 should return the range size, end coord inclusive
endPos = 1000
_, end, err = calculateCoords(0, 0, "bytes=0-"+strconv.FormatInt(endPos, 10), fileDetails, "encrypted")
assert.Equal(t, end, endPos+1)
assert.NoError(t, err)

// range in the header should return error if values are not numbers
_, _, err = calculateEncryptedCoords(0, 0, "bytes=start-end", fileDetails)
_, _, err = calculateCoords(0, 0, "bytes=start-end", fileDetails, "encrypted")
assert.Error(t, err)
}
14 changes: 11 additions & 3 deletions sda-download/internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

const POSIX = "posix"
const S3 = "s3"
const S3seekable = "s3seekable"

// availableMiddlewares list the options for middlewares
// empty string "" is an alias for default, for when the config key is not set, or it's empty
Expand Down Expand Up @@ -176,7 +177,7 @@ func NewConfig() (*Map, error) {
"db.host", "db.user", "db.password", "db.database", "c4gh.filepath", "c4gh.passphrase", "oidc.configuration.url",
}

if viper.GetString("archive.type") == S3 {
if viper.GetString("archive.type") == S3 || viper.GetString("archive.type") == S3seekable {
requiredConfVars = append(requiredConfVars, []string{"archive.url", "archive.accesskey", "archive.secretkey", "archive.bucket"}...)
} else if viper.GetString("archive.type") == POSIX {
requiredConfVars = append(requiredConfVars, []string{"archive.location"}...)
Expand Down Expand Up @@ -305,10 +306,17 @@ func (c *Map) configureOIDC() error {
// configArchive provides configuration for the archive storage
// we default to POSIX unless S3 specified
func (c *Map) configArchive() {
if viper.GetString("archive.type") == S3 {

switch viper.GetString("archive.type") {
case S3:
c.Archive.Type = S3
c.Archive.S3 = configS3Storage("archive")
} else {

case S3seekable:
c.Archive.Type = S3seekable
c.Archive.S3 = configS3Storage("archive")

default:
c.Archive.Type = POSIX
c.Archive.Posix.Location = viper.GetString("archive.location")
}
Expand Down
Loading
Loading