From 08f200aa8eef7d227c7dee14fa8d1ed2012d26a0 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 28 Aug 2024 02:07:43 +0200 Subject: [PATCH] feat(gw): support UnixFS 1.5 on deserialized responses as Last-Modified (#659) bare minimum support for Last-Modified and If-Modified-Since if mtime present in dag-pb --- CHANGELOG.md | 1 + gateway/backend_blocks.go | 12 ++ gateway/gateway.go | 3 +- gateway/gateway_test.go | 160 ++++++++++++++++++ gateway/handler.go | 79 ++++++++- gateway/handler_defaults.go | 8 + .../testdata/unixfs-dir-with-mode-mtime.car | Bin 0 -> 1037 bytes 7 files changed, 258 insertions(+), 5 deletions(-) create mode 100644 gateway/testdata/unixfs-dir-with-mode-mtime.car diff --git a/CHANGELOG.md b/CHANGELOG.md index 07527a74e..d0c5f4f67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The following emojis are used to highlight certain changes: ### Changed - `chunker` refactored to reduce overall memory use by reducing heap fragmentation [#649](https://github.com/ipfs/boxo/pull/649) +- `gateway` deserialized responses will have `Last-Modified` set to value from optional UnixFS 1.5 modification time field (if present in DAG) and a matching `If-Modified-Since` will return `304 Not Modified` (UnixFS 1.5 files only) [#659](https://github.com/ipfs/boxo/pull/659) ### Removed diff --git a/gateway/backend_blocks.go b/gateway/backend_blocks.go index 42440dfcd..d62d3d876 100644 --- a/gateway/backend_blocks.go +++ b/gateway/backend_blocks.go @@ -156,6 +156,12 @@ func (bb *BlocksBackend) Get(ctx context.Context, path path.ImmutablePath, range return md, nil, err } + // Set modification time in ContentPathMetadata if found in dag-pb's optional mtime field (UnixFS 1.5) + mtime := f.ModTime() + if !mtime.IsZero() { + md.ModTime = mtime + } + if d, ok := f.(files.Directory); ok { dir, err := uio.NewDirectoryFromNode(bb.dagService, nd) if err != nil { @@ -231,6 +237,12 @@ func (bb *BlocksBackend) Head(ctx context.Context, path path.ImmutablePath) (Con return ContentPathMetadata{}, nil, err } + // Set modification time in ContentPathMetadata if found in dag-pb's optional mtime field (UnixFS 1.5) + mtime := fileNode.ModTime() + if !mtime.IsZero() { + md.ModTime = mtime + } + sz, err := fileNode.Size() if err != nil { return ContentPathMetadata{}, nil, err diff --git a/gateway/gateway.go b/gateway/gateway.go index dccdaf792..d79689925 100644 --- a/gateway/gateway.go +++ b/gateway/gateway.go @@ -209,7 +209,8 @@ type ContentPathMetadata struct { PathSegmentRoots []cid.Cid LastSegment path.ImmutablePath LastSegmentRemainder []string - ContentType string // Only used for UnixFS requests + ContentType string // Only used for UnixFS requests + ModTime time.Time // Optional, non-zero values may be present in UnixFS 1.5 DAGs } // ByteRange describes a range request within a UnixFS file. "From" and "To" mostly diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index 3c1ab3f72..e4a9935ac 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -256,6 +256,60 @@ func TestHeaders(t *testing.T) { test(dagCborResponseFormat, dagCborPath) }) + // We have UnixFS1.5 tests in TestHeadersUnixFSModeModTime, here we test default behavior (DAG without modtime) + t.Run("If-Modified-Since is noop against DAG without optional UnixFS 1.5 mtime", func(t *testing.T) { + test := func(responseFormat string, path string) { + t.Run(responseFormat, func(t *testing.T) { + // Make regular request and read Last-Modified + url := ts.URL + path + req := mustNewRequest(t, http.MethodGet, url, nil) + req.Header.Add("Accept", responseFormat) + res := mustDoWithoutRedirect(t, req) + _, err := io.Copy(io.Discard, res.Body) + require.NoError(t, err) + defer res.Body.Close() + require.Equal(t, http.StatusOK, res.StatusCode) + lastModified := res.Header.Get("Last-Modified") + require.Empty(t, lastModified) + + // Make second request with If-Modified-Since far in past and expect normal response + req = mustNewRequest(t, http.MethodGet, url, nil) + req.Header.Add("Accept", responseFormat) + req.Header.Add("If-Modified-Since", "Mon, 13 Jun 2000 22:18:32 GMT") + res = mustDoWithoutRedirect(t, req) + _, err = io.Copy(io.Discard, res.Body) + require.NoError(t, err) + defer res.Body.Close() + require.Equal(t, http.StatusOK, res.StatusCode) + }) + } + + test("", dirPath) + test("text/html", dirPath) + test(carResponseFormat, dirPath) + test(rawResponseFormat, dirPath) + test(tarResponseFormat, dirPath) + + test("", hamtFilePath) + test("text/html", hamtFilePath) + test(carResponseFormat, hamtFilePath) + test(rawResponseFormat, hamtFilePath) + test(tarResponseFormat, hamtFilePath) + + test("", filePath) + test("text/html", filePath) + test(carResponseFormat, filePath) + test(rawResponseFormat, filePath) + test(tarResponseFormat, filePath) + + test("", dagCborPath) + test("text/html", dagCborPath+"/") + test(carResponseFormat, dagCborPath) + test(rawResponseFormat, dagCborPath) + test(dagJsonResponseFormat, dagCborPath) + test(dagCborResponseFormat, dagCborPath) + }) + t.Run("X-Ipfs-Roots contains expected values", func(t *testing.T) { test := func(responseFormat string, path string, roots string) { t.Run(responseFormat, func(t *testing.T) { @@ -495,6 +549,112 @@ func TestHeaders(t *testing.T) { }) } +// Testing a DAG with (optional) UnixFS1.5 modification time +func TestHeadersUnixFSModeModTime(t *testing.T) { + t.Parallel() + + ts, _, root := newTestServerAndNode(t, "unixfs-dir-with-mode-mtime.car") + var ( + rootCID = root.String() // "bafybeidbcy4u6y55gsemlubd64zk53xoxs73ifd6rieejxcr7xy46mjvky" + filePath = "/ipfs/" + rootCID + "/file1" + dirPath = "/ipfs/" + rootCID + "/dir1/" + ) + + t.Run("If-Modified-Since matching UnixFS 1.5 modtime returns Not Modified", func(t *testing.T) { + test := func(responseFormat string, path string, entityType string, supported bool) { + t.Run(fmt.Sprintf("%s/%s support=%t", responseFormat, entityType, supported), func(t *testing.T) { + // Make regular request and read Last-Modified + url := ts.URL + path + req := mustNewRequest(t, http.MethodGet, url, nil) + req.Header.Add("Accept", responseFormat) + res := mustDoWithoutRedirect(t, req) + _, err := io.Copy(io.Discard, res.Body) + require.NoError(t, err) + defer res.Body.Close() + require.Equal(t, http.StatusOK, res.StatusCode) + lastModified := res.Header.Get("Last-Modified") + if supported { + assert.NotEmpty(t, lastModified) + } else { + assert.Empty(t, lastModified) + lastModified = "Mon, 13 Jun 2022 22:18:32 GMT" // manually set value for use in next steps + } + + ifModifiedSinceTime, err := time.Parse(time.RFC1123, lastModified) + require.NoError(t, err) + oneHourBefore := ifModifiedSinceTime.Add(-1 * time.Hour).Truncate(time.Second) + oneHourAfter := ifModifiedSinceTime.Add(1 * time.Hour).Truncate(time.Second) + oneHourBeforeStr := oneHourBefore.Format(time.RFC1123) + oneHourAfterStr := oneHourAfter.Format(time.RFC1123) + lastModifiedStr := ifModifiedSinceTime.Format(time.RFC1123) + + // Make second request with If-Modified-Since and value read from response to first request + req = mustNewRequest(t, http.MethodGet, url, nil) + req.Header.Add("Accept", responseFormat) + req.Header.Add("If-Modified-Since", lastModifiedStr) + res = mustDoWithoutRedirect(t, req) + _, err = io.Copy(io.Discard, res.Body) + require.NoError(t, err) + defer res.Body.Close() + if supported { + // 304 on exact match, can skip body + assert.Equal(t, http.StatusNotModified, res.StatusCode) + } else { + assert.Equal(t, http.StatusOK, res.StatusCode) + } + + // Make third request with If-Modified-Since 1h before value read from response to first request + // and expect HTTP 200 + req = mustNewRequest(t, http.MethodGet, url, nil) + req.Header.Add("Accept", responseFormat) + req.Header.Add("If-Modified-Since", oneHourBeforeStr) + res = mustDoWithoutRedirect(t, req) + _, err = io.Copy(io.Discard, res.Body) + require.NoError(t, err) + defer res.Body.Close() + // always return 200 with body because mtime from unixfs is after value from If-Modified-Since + // so it counts as an update + assert.Equal(t, http.StatusOK, res.StatusCode) + + // Make third request with If-Modified-Since 1h after value read from response to first request + // and expect HTTP 200 + req = mustNewRequest(t, http.MethodGet, url, nil) + req.Header.Add("Accept", responseFormat) + req.Header.Add("If-Modified-Since", oneHourAfterStr) + res = mustDoWithoutRedirect(t, req) + _, err = io.Copy(io.Discard, res.Body) + require.NoError(t, err) + defer res.Body.Close() + if supported { + // 304 because mtime from unixfs is before value from If-Modified-Since + // so no update, can skip body + assert.Equal(t, http.StatusNotModified, res.StatusCode) + } else { + assert.Equal(t, http.StatusOK, res.StatusCode) + } + }) + } + + file, dir := "file", "directory" + // supported on file-based web responses + test("", filePath, file, true) + test("text/html", filePath, file, true) + + // not supported on other formats + // we may implement support for If-Modified-Since for below request types + // if users raise the need, but If-None-Match is way better + test(carResponseFormat, filePath, file, false) + test(rawResponseFormat, filePath, file, false) + test(tarResponseFormat, filePath, file, false) + + test("", dirPath, dir, false) + test("text/html", dirPath, dir, false) + test(carResponseFormat, dirPath, dir, false) + test(rawResponseFormat, dirPath, dir, false) + test(tarResponseFormat, dirPath, dir, false) + }) +} + func TestGoGetSupport(t *testing.T) { ts, _, root := newTestServerAndNode(t, "fixtures.car") diff --git a/gateway/handler.go b/gateway/handler.go index 4360d2163..d6e1b7054 100644 --- a/gateway/handler.go +++ b/gateway/handler.go @@ -300,6 +300,11 @@ func (i *handler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) { return } + // Detect when If-Modified-Since HTTP header + UnixFS 1.5 allow returning HTTP 304 Not Modified. + if i.handleIfModifiedSince(w, r, rq) { + return + } + // Support custom response formats passed via ?format or Accept HTTP header switch responseFormat { case "", jsonResponseFormat, cborResponseFormat: @@ -410,18 +415,25 @@ func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath } if lastMod.IsZero() { - // Otherwise, we set Last-Modified to the current time to leverage caching heuristics + // If no lastMod, set Last-Modified to the current time to leverage caching heuristics // built into modern browsers: https://github.com/ipfs/kubo/pull/8074#pullrequestreview-645196768 modtime = time.Now() } else { + // set Last-Modified to a meaningful value e.g. one read from dag-pb (UnixFS 1.5, mtime field) + // or the last time DNSLink / IPNS Record was modified / resoved or cache modtime = lastMod } + } else { w.Header().Set("Cache-Control", immutableCacheControl) - modtime = noModtime // disable Last-Modified - // TODO: consider setting Last-Modified if UnixFS V1.5 ever gets released - // with metadata: https://github.com/ipfs/kubo/issues/6920 + if lastMod.IsZero() { + // (noop) skip Last-Modified on immutable response + modtime = noModtime + } else { + // set Last-Modified to value read from dag-pb (UnixFS 1.5, mtime field) + modtime = lastMod + } } return modtime @@ -507,6 +519,21 @@ func setIpfsRootsHeader(w http.ResponseWriter, rq *requestData, md *ContentPathM w.Header().Set("X-Ipfs-Roots", rootCidList) } +// lastModifiedMatch returns true if we can respond with HTTP 304 Not Modified +// It compares If-Modified-Since with logical modification time read from DAG +// (e.g. UnixFS 1.5 modtime, if present) +func lastModifiedMatch(ifModifiedSinceHeader string, lastModified time.Time) bool { + if ifModifiedSinceHeader == "" || lastModified.IsZero() { + return false + } + ifModifiedSinceTime, err := time.Parse(time.RFC1123, ifModifiedSinceHeader) + if err != nil { + return false + } + // ignoring fractional seconds (as HTTP dates don't include fractional seconds) + return !lastModified.Truncate(time.Second).After(ifModifiedSinceTime) +} + // etagMatch evaluates if we can respond with HTTP 304 Not Modified // It supports multiple weak and strong etags passed in If-None-Match string // including the wildcard one. @@ -745,6 +772,50 @@ func (i *handler) handleIfNoneMatch(w http.ResponseWriter, r *http.Request, rq * return false } +func (i *handler) handleIfModifiedSince(w http.ResponseWriter, r *http.Request, rq *requestData) bool { + // Detect when If-Modified-Since HTTP header allows returning HTTP 304 Not Modified + ifModifiedSince := r.Header.Get("If-Modified-Since") + if ifModifiedSince == "" { + return false + } + + // Resolve path to be able to read pathMetadata.ModTime + pathMetadata, err := i.backend.ResolvePath(r.Context(), rq.immutablePath) + if err != nil { + var forwardedPath path.ImmutablePath + var continueProcessing bool + if isWebRequest(rq.responseFormat) { + forwardedPath, continueProcessing = i.handleWebRequestErrors(w, r, rq.mostlyResolvedPath(), rq.immutablePath, rq.contentPath, err, rq.logger) + if continueProcessing { + pathMetadata, err = i.backend.ResolvePath(r.Context(), forwardedPath) + } + } + if !continueProcessing || err != nil { + err = fmt.Errorf("failed to resolve %s: %w", debugStr(rq.contentPath.String()), err) + i.webError(w, r, err, http.StatusInternalServerError) + return true + } + } + + // Currently we only care about optional mtime from UnixFS 1.5 (dag-pb) + // but other sources of this metadata could be added in the future + lastModified := pathMetadata.ModTime + if lastModifiedMatch(ifModifiedSince, lastModified) { + w.WriteHeader(http.StatusNotModified) + return true + } + + // Check if the resolvedPath is an immutable path. + _, err = path.NewImmutablePath(pathMetadata.LastSegment) + if err != nil { + i.webError(w, r, err, http.StatusInternalServerError) + return true + } + + rq.pathMetadata = &pathMetadata + return false +} + // check if request was for one of known explicit formats, // or should use the default, implicit Web+UnixFS behaviors. func isWebRequest(responseFormat string) bool { diff --git a/gateway/handler_defaults.go b/gateway/handler_defaults.go index 78e5af952..ee4b130ee 100644 --- a/gateway/handler_defaults.go +++ b/gateway/handler_defaults.go @@ -94,6 +94,14 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h setIpfsRootsHeader(w, rq, &pathMetadata) + // On deserialized responses, we prefer Last-Modified from pathMetadata + // (mtime in UnixFS 1.5 DAG). This also applies to /ipns/, because value + // from dag-pb, if present, is more meaningful than lastMod inferred from + // namesys. + if !pathMetadata.ModTime.IsZero() { + rq.lastMod = pathMetadata.ModTime + } + resolvedPath := pathMetadata.LastSegment switch mc.Code(resolvedPath.RootCid().Prefix().Codec) { case mc.Json, mc.DagJson, mc.Cbor, mc.DagCbor: diff --git a/gateway/testdata/unixfs-dir-with-mode-mtime.car b/gateway/testdata/unixfs-dir-with-mode-mtime.car new file mode 100644 index 0000000000000000000000000000000000000000..feeb66ad2b2b6bfdacebc7298dd80c0eaf1d7f71 GIT binary patch literal 1037 zcmcColvgveUmC@qhJJ zrbSK@VoAv?GL+yK(ub%&EK+%Nf=a!1(#Oy9Z?1V8?>JrGRN3EQ!uAVcX>7ViF<|vZ z5_6bf7Q9?tr@Zmk60heE0(51H0^dddQ155ff4I)b{Oi~MDH^N58jK|ZVHRlRoY*Td z`6bu07q2@w&L59lE9@X$x!wMo@AEU)I`&+T5@Jou%t*rt7q2Nr?a`U9&Py0 z?Acc?#8On3J~IyT-|%f#sCD>p4rWA4p&i|_F- z>Hs^-NFos7stF)h*@L`J$TBWT4i+I^Jw1J(&y4gz;cg5Jx<8=c1_vEMyP=-?#=8AR zPOhlc1@SXJJukY}9&405KQ%G+^L@YkfXx$^9EHTGF*HuW@zVx!q$$Xegfw&Ub1+FT zSS$g>>L<9<2qrtI(~3=Vg}j1GB$k9-;N|LUSTwi0S9HnRnv&l?HN)c_Qzk*2W`yjt Xb#SK<(v0l1C2(DYQaG}%9Uxr*RLZ56 literal 0 HcmV?d00001