Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gw): support UnixFS 1.5 on gateway responses as Last-Modified #659

Merged
merged 4 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ The following emojis are used to highlight certain changes:
### Changed

- `chunker` refactored to reduce overall memory use by reducing heap fragmentation [#649](https://github.com/ipfs/boxo/pull/649)
- `gateway` deserialized responses will have `Last-Modified` set to value from optional UnixFS 1.5 modification time field (if present in DAG) and a matching `If-Modified-Since` will return `304 Not Modified` (UnixFS 1.5 files only) [#659](https://github.com/ipfs/boxo/pull/659)

### Removed

Expand Down
12 changes: 12 additions & 0 deletions gateway/backend_blocks.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@
return md, nil, err
}

// Set modification time in ContentPathMetadata if found in dag-pb's optional mtime field (UnixFS 1.5)
mtime := f.ModTime()
if !mtime.IsZero() {
md.ModTime = mtime
}

if d, ok := f.(files.Directory); ok {
dir, err := uio.NewDirectoryFromNode(bb.dagService, nd)
if err != nil {
Expand Down Expand Up @@ -231,6 +237,12 @@
return ContentPathMetadata{}, nil, err
}

// Set modification time in ContentPathMetadata if found in dag-pb's optional mtime field (UnixFS 1.5)
mtime := fileNode.ModTime()
if !mtime.IsZero() {
md.ModTime = mtime
}

Check warning on line 244 in gateway/backend_blocks.go

View check run for this annotation

Codecov / codecov/patch

gateway/backend_blocks.go#L241-L244

Added lines #L241 - L244 were not covered by tests

sz, err := fileNode.Size()
if err != nil {
return ContentPathMetadata{}, nil, err
Expand Down
3 changes: 2 additions & 1 deletion gateway/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ type ContentPathMetadata struct {
PathSegmentRoots []cid.Cid
LastSegment path.ImmutablePath
LastSegmentRemainder []string
ContentType string // Only used for UnixFS requests
ContentType string // Only used for UnixFS requests
ModTime time.Time // Optional, non-zero values may be present in UnixFS 1.5 DAGs
}

// ByteRange describes a range request within a UnixFS file. "From" and "To" mostly
Expand Down
160 changes: 160 additions & 0 deletions gateway/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,60 @@ func TestHeaders(t *testing.T) {
test(dagCborResponseFormat, dagCborPath)
})

// We have UnixFS1.5 tests in TestHeadersUnixFSModeModTime, here we test default behavior (DAG without modtime)
t.Run("If-Modified-Since is noop against DAG without optional UnixFS 1.5 mtime", func(t *testing.T) {
test := func(responseFormat string, path string) {
t.Run(responseFormat, func(t *testing.T) {
// Make regular request and read Last-Modified
url := ts.URL + path
req := mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
res := mustDoWithoutRedirect(t, req)
_, err := io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
lastModified := res.Header.Get("Last-Modified")
require.Empty(t, lastModified)

// Make second request with If-Modified-Since far in past and expect normal response
req = mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
req.Header.Add("If-Modified-Since", "Mon, 13 Jun 2000 22:18:32 GMT")
res = mustDoWithoutRedirect(t, req)
_, err = io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
})
}

test("", dirPath)
test("text/html", dirPath)
test(carResponseFormat, dirPath)
test(rawResponseFormat, dirPath)
test(tarResponseFormat, dirPath)

test("", hamtFilePath)
test("text/html", hamtFilePath)
test(carResponseFormat, hamtFilePath)
test(rawResponseFormat, hamtFilePath)
test(tarResponseFormat, hamtFilePath)

test("", filePath)
test("text/html", filePath)
test(carResponseFormat, filePath)
test(rawResponseFormat, filePath)
test(tarResponseFormat, filePath)

test("", dagCborPath)
test("text/html", dagCborPath+"/")
test(carResponseFormat, dagCborPath)
test(rawResponseFormat, dagCborPath)
test(dagJsonResponseFormat, dagCborPath)
test(dagCborResponseFormat, dagCborPath)
})

t.Run("X-Ipfs-Roots contains expected values", func(t *testing.T) {
test := func(responseFormat string, path string, roots string) {
t.Run(responseFormat, func(t *testing.T) {
Expand Down Expand Up @@ -495,6 +549,112 @@ func TestHeaders(t *testing.T) {
})
}

// Testing a DAG with (optional) UnixFS1.5 modification time
func TestHeadersUnixFSModeModTime(t *testing.T) {
t.Parallel()

ts, _, root := newTestServerAndNode(t, "unixfs-dir-with-mode-mtime.car")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ℹ️ created with Kubo 0.30.0-rc1

var (
rootCID = root.String() // "bafybeidbcy4u6y55gsemlubd64zk53xoxs73ifd6rieejxcr7xy46mjvky"
filePath = "/ipfs/" + rootCID + "/file1"
dirPath = "/ipfs/" + rootCID + "/dir1/"
)

t.Run("If-Modified-Since matching UnixFS 1.5 modtime returns Not Modified", func(t *testing.T) {
test := func(responseFormat string, path string, entityType string, supported bool) {
t.Run(fmt.Sprintf("%s/%s support=%t", responseFormat, entityType, supported), func(t *testing.T) {
// Make regular request and read Last-Modified
url := ts.URL + path
req := mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
res := mustDoWithoutRedirect(t, req)
_, err := io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
lastModified := res.Header.Get("Last-Modified")
if supported {
assert.NotEmpty(t, lastModified)
} else {
assert.Empty(t, lastModified)
lastModified = "Mon, 13 Jun 2022 22:18:32 GMT" // manually set value for use in next steps
}

ifModifiedSinceTime, err := time.Parse(time.RFC1123, lastModified)
require.NoError(t, err)
oneHourBefore := ifModifiedSinceTime.Add(-1 * time.Hour).Truncate(time.Second)
oneHourAfter := ifModifiedSinceTime.Add(1 * time.Hour).Truncate(time.Second)
oneHourBeforeStr := oneHourBefore.Format(time.RFC1123)
oneHourAfterStr := oneHourAfter.Format(time.RFC1123)
lastModifiedStr := ifModifiedSinceTime.Format(time.RFC1123)

// Make second request with If-Modified-Since and value read from response to first request
req = mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
req.Header.Add("If-Modified-Since", lastModifiedStr)
res = mustDoWithoutRedirect(t, req)
_, err = io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
if supported {
// 304 on exact match, can skip body
assert.Equal(t, http.StatusNotModified, res.StatusCode)
} else {
assert.Equal(t, http.StatusOK, res.StatusCode)
}

// Make third request with If-Modified-Since 1h before value read from response to first request
// and expect HTTP 200
req = mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
req.Header.Add("If-Modified-Since", oneHourBeforeStr)
res = mustDoWithoutRedirect(t, req)
_, err = io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
// always return 200 with body because mtime from unixfs is after value from If-Modified-Since
// so it counts as an update
assert.Equal(t, http.StatusOK, res.StatusCode)

// Make third request with If-Modified-Since 1h after value read from response to first request
// and expect HTTP 200
req = mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
req.Header.Add("If-Modified-Since", oneHourAfterStr)
res = mustDoWithoutRedirect(t, req)
_, err = io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
if supported {
// 304 because mtime from unixfs is before value from If-Modified-Since
// so no update, can skip body
assert.Equal(t, http.StatusNotModified, res.StatusCode)
} else {
assert.Equal(t, http.StatusOK, res.StatusCode)
}
})
}

file, dir := "file", "directory"
// supported on file-based web responses
test("", filePath, file, true)
test("text/html", filePath, file, true)

// not supported on other formats
// we may implement support for If-Modified-Since for below request types
// if users raise the need, but If-None-Match is way better
test(carResponseFormat, filePath, file, false)
test(rawResponseFormat, filePath, file, false)
test(tarResponseFormat, filePath, file, false)

test("", dirPath, dir, false)
test("text/html", dirPath, dir, false)
test(carResponseFormat, dirPath, dir, false)
test(rawResponseFormat, dirPath, dir, false)
test(tarResponseFormat, dirPath, dir, false)
})
}

func TestGoGetSupport(t *testing.T) {
ts, _, root := newTestServerAndNode(t, "fixtures.car")

Expand Down
79 changes: 75 additions & 4 deletions gateway/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,11 @@
return
}

// Detect when If-Modified-Since HTTP header + UnixFS 1.5 allow returning HTTP 304 Not Modified.
if i.handleIfModifiedSince(w, r, rq) {
return
}

Check warning on line 306 in gateway/handler.go

View check run for this annotation

Codecov / codecov/patch

gateway/handler.go#L305-L306

Added lines #L305 - L306 were not covered by tests

// Support custom response formats passed via ?format or Accept HTTP header
switch responseFormat {
case "", jsonResponseFormat, cborResponseFormat:
Expand Down Expand Up @@ -410,18 +415,25 @@
}

if lastMod.IsZero() {
// Otherwise, we set Last-Modified to the current time to leverage caching heuristics
// If no lastMod, set Last-Modified to the current time to leverage caching heuristics
// built into modern browsers: https://github.com/ipfs/kubo/pull/8074#pullrequestreview-645196768
modtime = time.Now()
} else {
// set Last-Modified to a meaningful value e.g. one read from dag-pb (UnixFS 1.5, mtime field)
// or the last time DNSLink / IPNS Record was modified / resoved or cache

Check warning on line 423 in gateway/handler.go

View check run for this annotation

Codecov / codecov/patch

gateway/handler.go#L422-L423

Added lines #L422 - L423 were not covered by tests
modtime = lastMod
}

} else {
w.Header().Set("Cache-Control", immutableCacheControl)
modtime = noModtime // disable Last-Modified

// TODO: consider setting Last-Modified if UnixFS V1.5 ever gets released
// with metadata: https://github.com/ipfs/kubo/issues/6920
if lastMod.IsZero() {
// (noop) skip Last-Modified on immutable response
modtime = noModtime
} else {
// set Last-Modified to value read from dag-pb (UnixFS 1.5, mtime field)
modtime = lastMod
}
}

return modtime
Expand Down Expand Up @@ -507,6 +519,21 @@
w.Header().Set("X-Ipfs-Roots", rootCidList)
}

// lastModifiedMatch returns true if we can respond with HTTP 304 Not Modified
// It compares If-Modified-Since with logical modification time read from DAG
// (e.g. UnixFS 1.5 modtime, if present)
func lastModifiedMatch(ifModifiedSinceHeader string, lastModified time.Time) bool {
if ifModifiedSinceHeader == "" || lastModified.IsZero() {
return false
}
ifModifiedSinceTime, err := time.Parse(time.RFC1123, ifModifiedSinceHeader)
if err != nil {
return false
}

Check warning on line 532 in gateway/handler.go

View check run for this annotation

Codecov / codecov/patch

gateway/handler.go#L529-L532

Added lines #L529 - L532 were not covered by tests
// ignoring fractional seconds (as HTTP dates don't include fractional seconds)
return !lastModified.Truncate(time.Second).After(ifModifiedSinceTime)

Check warning on line 534 in gateway/handler.go

View check run for this annotation

Codecov / codecov/patch

gateway/handler.go#L534

Added line #L534 was not covered by tests
}

// etagMatch evaluates if we can respond with HTTP 304 Not Modified
// It supports multiple weak and strong etags passed in If-None-Match string
// including the wildcard one.
Expand Down Expand Up @@ -745,6 +772,50 @@
return false
}

func (i *handler) handleIfModifiedSince(w http.ResponseWriter, r *http.Request, rq *requestData) bool {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ℹ️ this is based on what we already had in handleIfNoneMatch to support If-None-Match, which operates on Etags.

This one is executed only if If-Modified-Since is present in request, which usually is less popular than more deterministic If-None-Match.

Still, we add is here as it costs us nothing to avoid returning payload and returning HTTP 304 Not Modified instead.

// Detect when If-Modified-Since HTTP header allows returning HTTP 304 Not Modified
ifModifiedSince := r.Header.Get("If-Modified-Since")
if ifModifiedSince == "" {
return false
}

// Resolve path to be able to read pathMetadata.ModTime
pathMetadata, err := i.backend.ResolvePath(r.Context(), rq.immutablePath)
if err != nil {
var forwardedPath path.ImmutablePath
var continueProcessing bool
if isWebRequest(rq.responseFormat) {
forwardedPath, continueProcessing = i.handleWebRequestErrors(w, r, rq.mostlyResolvedPath(), rq.immutablePath, rq.contentPath, err, rq.logger)
if continueProcessing {
pathMetadata, err = i.backend.ResolvePath(r.Context(), forwardedPath)
}

Check warning on line 791 in gateway/handler.go

View check run for this annotation

Codecov / codecov/patch

gateway/handler.go#L785-L791

Added lines #L785 - L791 were not covered by tests
}
if !continueProcessing || err != nil {
err = fmt.Errorf("failed to resolve %s: %w", debugStr(rq.contentPath.String()), err)
i.webError(w, r, err, http.StatusInternalServerError)
return true
}

Check warning on line 797 in gateway/handler.go

View check run for this annotation

Codecov / codecov/patch

gateway/handler.go#L793-L797

Added lines #L793 - L797 were not covered by tests
}

// Currently we only care about optional mtime from UnixFS 1.5 (dag-pb)
// but other sources of this metadata could be added in the future
lastModified := pathMetadata.ModTime
if lastModifiedMatch(ifModifiedSince, lastModified) {
w.WriteHeader(http.StatusNotModified)
return true
}

Check warning on line 806 in gateway/handler.go

View check run for this annotation

Codecov / codecov/patch

gateway/handler.go#L804-L806

Added lines #L804 - L806 were not covered by tests

// Check if the resolvedPath is an immutable path.
_, err = path.NewImmutablePath(pathMetadata.LastSegment)
if err != nil {
i.webError(w, r, err, http.StatusInternalServerError)
return true
}

Check warning on line 813 in gateway/handler.go

View check run for this annotation

Codecov / codecov/patch

gateway/handler.go#L811-L813

Added lines #L811 - L813 were not covered by tests

rq.pathMetadata = &pathMetadata
return false
}

// check if request was for one of known explicit formats,
// or should use the default, implicit Web+UnixFS behaviors.
func isWebRequest(responseFormat string) bool {
Expand Down
8 changes: 8 additions & 0 deletions gateway/handler_defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,14 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h

setIpfsRootsHeader(w, rq, &pathMetadata)

// On deserialized responses, we prefer Last-Modified from pathMetadata
// (mtime in UnixFS 1.5 DAG). This also applies to /ipns/, because value
// from dag-pb, if present, is more meaningful than lastMod inferred from
// namesys.
if !pathMetadata.ModTime.IsZero() {
rq.lastMod = pathMetadata.ModTime
}

resolvedPath := pathMetadata.LastSegment
switch mc.Code(resolvedPath.RootCid().Prefix().Codec) {
case mc.Json, mc.DagJson, mc.Cbor, mc.DagCbor:
Expand Down
Binary file added gateway/testdata/unixfs-dir-with-mode-mtime.car
Binary file not shown.
Loading