Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement downloading archived item + QA runs as multi-WACZ #1933

Merged
merged 18 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions backend/btrixcloud/basecrawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import asyncio
from fastapi import HTTPException, Depends
from fastapi.responses import StreamingResponse

from .models import (
CrawlFile,
Expand Down Expand Up @@ -792,6 +793,20 @@ async def get_all_crawl_search_values(
"firstSeeds": list(first_seeds),
}

async def download_crawl_as_single_wacz(self, crawl_id: str, org: Organization):
"""Download all WACZs in archived item as streaming nested WACZ"""
crawl = await self.get_crawl_out(crawl_id, org)

if not crawl.resources:
raise HTTPException(status_code=400, detail="no_crawl_resources")

resp = await self.storage_ops.download_streaming_wacz(org, crawl.resources)

headers = {"Content-Disposition": f'attachment; filename="{crawl_id}.wacz"'}
return StreamingResponse(
resp, headers=headers, media_type="application/wacz+zip"
)


# ============================================================================
def init_base_crawls_api(app, user_dep, *args):
Expand Down Expand Up @@ -891,6 +906,16 @@ async def get_base_crawl_admin(crawl_id, user: User = Depends(user_dep)):
async def get_crawl_out(crawl_id, org: Organization = Depends(org_viewer_dep)):
return await ops.get_crawl_out(crawl_id, org)

@app.get(
"/orgs/{oid}/all-crawls/{crawl_id}/download",
tags=["all-crawls"],
response_model=bytes,
)
async def download_base_crawl_as_single_wacz(
crawl_id: str, org: Organization = Depends(org_viewer_dep)
):
return await ops.download_crawl_as_single_wacz(crawl_id, org)

@app.patch("/orgs/{oid}/all-crawls/{crawl_id}", tags=["all-crawls"])
async def update_crawl(
update: UpdateCrawl, crawl_id: str, org: Organization = Depends(org_crawl_dep)
Expand Down
8 changes: 8 additions & 0 deletions backend/btrixcloud/crawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,14 @@ async def get_crawl_admin(crawl_id, user: User = Depends(user_dep)):
async def get_crawl_out(crawl_id, org: Organization = Depends(org_viewer_dep)):
return await ops.get_crawl_out(crawl_id, org, "crawl")

@app.get(
"/orgs/{oid}/crawls/{crawl_id}/download", tags=["crawls"], response_model=bytes
)
async def download_crawl_as_single_wacz(
crawl_id: str, org: Organization = Depends(org_viewer_dep)
):
return await ops.download_crawl_as_single_wacz(crawl_id, org)

# QA APIs
# ---------------------
@app.get(
Expand Down
10 changes: 10 additions & 0 deletions backend/btrixcloud/uploads.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,16 @@ async def get_upload_replay_admin(crawl_id, user: User = Depends(user_dep)):
async def get_upload_replay(crawl_id, org: Organization = Depends(org_viewer_dep)):
return await ops.get_crawl_out(crawl_id, org, "upload")

@app.get(
"/orgs/{oid}/uploads/{crawl_id}/download",
tags=["uploads"],
response_model=bytes,
)
async def download_upload_as_single_wacz(
crawl_id: str, org: Organization = Depends(org_viewer_dep)
):
return await ops.download_crawl_as_single_wacz(crawl_id, org)

@app.patch("/orgs/{oid}/uploads/{crawl_id}", tags=["uploads"])
async def update_uploads_api(
update: UpdateUpload, crawl_id: str, org: Organization = Depends(org_crawl_dep)
Expand Down
36 changes: 36 additions & 0 deletions backend/test/test_run_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
import re
import csv
import codecs
from tempfile import TemporaryFile
from zipfile import ZipFile, ZIP_STORED

import pytest

from .conftest import API_PREFIX, HOST_PREFIX
from .test_collections import UPDATED_NAME as COLLECTION_NAME
Expand Down Expand Up @@ -242,6 +246,38 @@ def test_verify_wacz():
assert len(pages.strip().split("\n")) == 4


@pytest.mark.parametrize(
"type_path",
[
# crawls endpoint
("crawls"),
# all-crawls endpoint
("all-crawls"),
],
)
def test_download_wacz_crawls(
admin_auth_headers, default_org_id, admin_crawl_id, type_path
):
with TemporaryFile() as fh:
with requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/{type_path}/{admin_crawl_id}/download",
headers=admin_auth_headers,
stream=True,
) as r:
assert r.status_code == 200
for chunk in r.iter_content():
fh.write(chunk)

fh.seek(0)
with ZipFile(fh, "r") as zip_file:
contents = zip_file.namelist()

assert len(contents) >= 2
for filename in contents:
assert filename.endswith(".wacz") or filename == "datapackage.json"
assert zip_file.getinfo(filename).compress_type == ZIP_STORED


def test_update_crawl(
admin_auth_headers,
default_org_id,
Expand Down
23 changes: 23 additions & 0 deletions backend/test/test_uploads.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import requests
import os
import time
from tempfile import TemporaryFile
from urllib.parse import urljoin
from zipfile import ZipFile, ZIP_STORED

import pytest

Expand Down Expand Up @@ -329,6 +331,27 @@ def test_update_upload_metadata(admin_auth_headers, default_org_id, upload_id):
assert data["collectionIds"] == UPDATED_COLLECTION_IDS


def test_download_wacz_uploads(admin_auth_headers, default_org_id, upload_id):
with TemporaryFile() as fh:
with requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/download",
headers=admin_auth_headers,
stream=True,
) as r:
assert r.status_code == 200
for chunk in r.iter_content():
fh.write(chunk)

fh.seek(0)
with ZipFile(fh, "r") as zip_file:
contents = zip_file.namelist()

assert len(contents) == 2
for filename in contents:
assert filename.endswith(".wacz") or filename == "datapackage.json"
assert zip_file.getinfo(filename).compress_type == ZIP_STORED


def test_delete_stream_upload(
admin_auth_headers, crawler_auth_headers, default_org_id, upload_id
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,21 @@ export class ArchivedItemDetail extends TailwindElement {
]);
break;
case "files":
sectionContent = this.renderPanel(msg("Files"), this.renderFiles());
sectionContent = this.renderPanel(
html` ${this.renderTitle(msg("Files"))}
<sl-tooltip content=${msg("Download all files as a single WACZ")}>
<sl-button
href=${`/api/orgs/${this.orgId}/all-crawls/${this.crawlId}/download?auth_bearer=${authToken}`}
download}
tw4l marked this conversation as resolved.
Show resolved Hide resolved
size="small"
variant="primary"
>
<sl-icon slot="prefix" name="cloud-download"></sl-icon>
${msg("Download")}
tw4l marked this conversation as resolved.
Show resolved Hide resolved
</sl-button>
</sl-tooltip>`,
this.renderFiles(),
);
break;
case "logs":
sectionContent = this.renderPanel(
Expand Down Expand Up @@ -558,6 +572,8 @@ export class ArchivedItemDetail extends TailwindElement {
private renderMenu() {
if (!this.crawl) return;

const authToken = this.authState!.headers.Authorization.split(" ")[1];

return html`
<sl-dropdown placement="bottom-end" distance="4" hoist>
<sl-button slot="trigger" size="small" caret
Expand Down Expand Up @@ -609,6 +625,19 @@ export class ArchivedItemDetail extends TailwindElement {
<sl-icon name="tags" slot="prefix"></sl-icon>
${msg("Copy Tags")}
</sl-menu-item>
${when(
finishedCrawlStates.includes(this.crawl.state),
() => html`
<sl-divider></sl-divider>
<btrix-menu-item-link
href=${`/api/orgs/${this.orgId}/all-crawls/${this.crawlId}/download?auth_bearer=${authToken}`}
download
>
<sl-icon name="cloud-download" slot="prefix"></sl-icon>
${msg("Download Item")}
</btrix-menu-item-link>
`,
)}
${when(
this.isCrawler && !isActive(this.crawl.state),
() => html`
Expand All @@ -618,7 +647,7 @@ export class ArchivedItemDetail extends TailwindElement {
@click=${() => void this.deleteCrawl()}
>
<sl-icon name="trash3" slot="prefix"></sl-icon>
${msg("Delete Crawl")}
${msg("Delete Item")}
Shrinks99 marked this conversation as resolved.
Show resolved Hide resolved
</sl-menu-item>
`,
)}
Expand Down
28 changes: 19 additions & 9 deletions frontend/src/pages/org/archived-items.ts
Original file line number Diff line number Diff line change
Expand Up @@ -603,23 +603,19 @@ export class CrawlsList extends TailwindElement {
?showStatus=${this.itemType !== null}
>
<btrix-table-cell slot="actionCell" class="px-1">
<btrix-overflow-dropdown
@click=${(e: MouseEvent) => {
// Prevent navigation to detail view
e.preventDefault();
e.stopImmediatePropagation();
}}
>
<btrix-overflow-dropdown>
<sl-menu>${this.renderMenuItems(item)}</sl-menu>
</btrix-overflow-dropdown>
</btrix-table-cell>
</btrix-archived-item-list-item>
`;

private readonly renderMenuItems = (item: ArchivedItem) =>
private readonly renderMenuItems = (item: ArchivedItem) => {
// HACK shoelace doesn't current have a way to override non-hover
// color without resetting the --sl-color-neutral-700 variable
html`
const authToken = this.authState!.headers.Authorization.split(" ")[1];

return html`
${when(
this.isCrawler,
() => html`
Expand Down Expand Up @@ -664,6 +660,19 @@ export class CrawlsList extends TailwindElement {
<sl-icon name="tags" slot="prefix"></sl-icon>
${msg("Copy Tags")}
</sl-menu-item>
${when(
finishedCrawlStates.includes(item.state),
() => html`
<sl-divider></sl-divider>
<btrix-menu-item-link
href=${`/api/orgs/${this.orgId}/all-crawls/${item.id}/download?auth_bearer=${authToken}`}
download
>
<sl-icon name="cloud-download" slot="prefix"></sl-icon>
${msg("Download Item")}
</btrix-menu-item-link>
`,
)}
${when(
this.isCrawler && !isActive(item.state),
() => html`
Expand All @@ -678,6 +687,7 @@ export class CrawlsList extends TailwindElement {
`,
)}
`;
};

private readonly renderStatusMenuItem = (state: CrawlState) => {
const { icon, label } = CrawlStatus.getContent(state);
Expand Down
Loading