Skip to content

Commit

Permalink
Fix nightly tests: modify kubectl exec syntax for creating new minio …
Browse files Browse the repository at this point in the history
…bucket (#2097)

Fixes #2096

For example failing test run, see:
https://github.com/webrecorder/browsertrix/actions/runs/11121185534/job/30899729448

---------
Co-authored-by: Ilya Kreymer <[email protected]>
  • Loading branch information
tw4l authored Oct 22, 2024
1 parent 1b1819b commit f7426cc
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/k3d-nightly-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
run: kubectl wait --for=condition=ready pod --all --timeout=240s

- name: Create Extra Test Buckets
run: kubectl exec -i deployment/local-minio -c minio mkdir /data/replica-0
run: kubectl exec -i deployment/local-minio -c minio -- mkdir /data/replica-0

- name: Run Tests
run: pytest -vv ./backend/test_nightly/test_*.py
Expand Down
51 changes: 31 additions & 20 deletions backend/btrixcloud/crawlconfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,8 +592,9 @@ async def stats_recompute_last(self, cid: UUID, size: int, inc_crawls: int = 1):
update_query: dict[str, object] = {}

running_crawl = await self.get_running_crawl(cid)
# only look up last finished crawl if no crawls running, otherwise
# lastCrawl* stats are already for running crawl

# If crawl is running, lastCrawl* stats are already for running crawl,
# so there's nothing to update other than size and crawl count
if not running_crawl:
match_query = {
"cid": cid,
Expand All @@ -603,26 +604,36 @@ async def stats_recompute_last(self, cid: UUID, size: int, inc_crawls: int = 1):
last_crawl = await self.crawls.find_one(
match_query, sort=[("finished", pymongo.DESCENDING)]
)
else:
last_crawl = None

if last_crawl:
last_crawl_finished = last_crawl.get("finished")

update_query["lastCrawlId"] = str(last_crawl.get("_id"))
update_query["lastCrawlStartTime"] = last_crawl.get("started")
update_query["lastStartedBy"] = last_crawl.get("userid")
update_query["lastStartedByName"] = last_crawl.get("userName")
update_query["lastCrawlTime"] = last_crawl_finished
update_query["lastCrawlState"] = last_crawl.get("state")
update_query["lastCrawlSize"] = sum(
file_.get("size", 0) for file_ in last_crawl.get("files", [])
)
update_query["lastCrawlStopping"] = False
update_query["isCrawlRunning"] = False
# Update to reflect last crawl
if last_crawl:
last_crawl_finished = last_crawl.get("finished")

update_query["lastCrawlId"] = str(last_crawl.get("_id"))
update_query["lastCrawlStartTime"] = last_crawl.get("started")
update_query["lastStartedBy"] = last_crawl.get("userid")
update_query["lastStartedByName"] = last_crawl.get("userName")
update_query["lastCrawlTime"] = last_crawl_finished
update_query["lastCrawlState"] = last_crawl.get("state")
update_query["lastCrawlSize"] = sum(
file_.get("size", 0) for file_ in last_crawl.get("files", [])
)
update_query["lastCrawlStopping"] = False
update_query["isCrawlRunning"] = False

if last_crawl_finished:
update_query["lastRun"] = last_crawl_finished
if last_crawl_finished:
update_query["lastRun"] = last_crawl_finished
# If no last crawl exists and no running crawl, reset stats
else:
update_query["lastCrawlId"] = None
update_query["lastCrawlStartTime"] = None
update_query["lastStartedBy"] = None
update_query["lastStartedByName"] = None
update_query["lastCrawlTime"] = None
update_query["lastCrawlState"] = None
update_query["lastCrawlSize"] = 0
update_query["lastRun"] = None
update_query["isCrawlRunning"] = False

result = await self.crawl_configs.find_one_and_update(
{"_id": cid, "inactive": {"$ne": True}},
Expand Down
3 changes: 3 additions & 0 deletions backend/test_nightly/test_crawlconfig_crawl_stats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import requests
import time

from .conftest import API_PREFIX

Expand Down Expand Up @@ -70,6 +71,8 @@ def test_crawlconfig_crawl_stats(admin_auth_headers, default_org_id, crawl_confi
data = r.json()
assert data["deleted"]

time.sleep(10)

# Verify crawl stats from /crawlconfigs
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{crawl_config_id}",
Expand Down
29 changes: 21 additions & 8 deletions backend/test_nightly/test_storage_quota.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@
from .utils import get_crawl_status


STORAGE_QUOTA_KB = 5
STORAGE_QUOTA_BYTES = STORAGE_QUOTA_KB * 1000
STORAGE_QUOTA_MB_TO_INCREASE = 5
STORAGE_QUOTA_BYTES_INC = STORAGE_QUOTA_MB_TO_INCREASE * 1000 * 1000

config_id = None

storage_quota = None

def run_crawl(org_id, headers):
crawl_data = {
"runNow": True,
"name": "Storage Quota",
"config": {
"seeds": [{"url": "https://webrecorder.net/"}],
"seeds": [{"url": "https://specs.webrecorder.net/"}],
"extraHops": 1,
},
}
Expand All @@ -34,10 +35,22 @@ def run_crawl(org_id, headers):


def test_storage_quota(org_with_quotas, admin_auth_headers):
# Get current storage usage
r = requests.get(
f"{API_PREFIX}/orgs/{org_with_quotas}",
headers=admin_auth_headers,
)
assert r.status_code == 200
bytes_stored = r.json()["bytesStored"]

global storage_quota
storage_quota = bytes_stored + STORAGE_QUOTA_BYTES_INC

# Set storage quota higher than bytesStored
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/quotas",
headers=admin_auth_headers,
json={"storageQuota": STORAGE_QUOTA_BYTES},
json={"storageQuota": storage_quota},
)
assert r.status_code == 200
assert r.json()["updated"]
Expand All @@ -49,9 +62,12 @@ def test_crawl_stopped_when_storage_quota_reached(org_with_quotas, admin_auth_he
crawl_id, config_id = run_crawl(org_with_quotas, admin_auth_headers)
time.sleep(1)

assert crawl_id

while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in (
"starting",
"waiting_capacity",
"waiting_org_limit",
):
time.sleep(2)

Expand All @@ -63,22 +79,19 @@ def test_crawl_stopped_when_storage_quota_reached(org_with_quotas, admin_auth_he
):
time.sleep(2)

# Ensure that crawl was stopped by quota
assert (
get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers)
== "stopped_storage_quota_reached"
)

time.sleep(10)

# Ensure crawl storage went over quota
r = requests.get(
f"{API_PREFIX}/orgs/{org_with_quotas}",
headers=admin_auth_headers,
)
data = r.json()
bytes_stored = data["bytesStored"]
assert bytes_stored >= STORAGE_QUOTA_BYTES
assert bytes_stored >= storage_quota

time.sleep(5)

Expand Down

0 comments on commit f7426cc

Please sign in to comment.