Skip to content

Commit

Permalink
logging cleanup, log crawl stopping reason
Browse files Browse the repository at this point in the history
  • Loading branch information
ikreymer committed Jul 25, 2024
1 parent fb21fb3 commit ef5b2a2
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 18 deletions.
22 changes: 5 additions & 17 deletions backend/btrixcloud/operator/crawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,6 @@ async def _load_qa_configmap(self, params, children):

params["name"] = name
params["qa_source_replay_json"] = crawl_replay.json(include={"resources"})
print(params["qa_source_replay_json"])
return self.load_from_yaml("qa_configmap.yaml", params)

def _load_crawler(self, params, i, status, children):
Expand Down Expand Up @@ -587,20 +586,12 @@ async def can_start_new(self, crawl: CrawlSpec, data: MCSyncData, status):

name = data.parent.get("metadata", {}).get("name")

# def metadata_key(val):
# return val.get("metadata").get("creationTimestamp")

# all_crawljobs = sorted(data.related[CJS].values(), key=metadata_key)
# print(list(data.related[CJS].keys()))

i = 0
for crawl_sorted in data.related[CJS].values():
if crawl_sorted.get("status", {}).get("state") in NON_RUNNING_STATES:
continue

# print(i, crawl_sorted.get("metadata").get("name"))
if crawl_sorted.get("metadata").get("name") == name:
# print("found: ", name, "index", i)
if i < max_crawls:
return True

Expand Down Expand Up @@ -1212,7 +1203,6 @@ async def is_crawl_stopping(
"""check if crawl is stopping and set reason"""
# if user requested stop, then enter stopping phase
if crawl.stopping:
print("Graceful Stop: User requested stop")
return "stopped_by_user"

# check timeout if timeout time exceeds elapsed time
Expand All @@ -1224,29 +1214,25 @@ async def is_crawl_stopping(
).total_seconds()

if elapsed > crawl.timeout:
print(
f"Graceful Stop: Crawl running time exceeded {crawl.timeout} second timeout"
)
return "time-limit"

# crawl size limit
if crawl.max_crawl_size and status.size > crawl.max_crawl_size:
print(f"Graceful Stop: Maximum crawl size {crawl.max_crawl_size} hit")
return "size-limit"

# gracefully stop crawl if current running crawl sizes reach storage quota
org = await self.org_ops.get_org_by_id(crawl.oid)

if org.quotas.storageQuota:
running_crawls_total_size = status.size
for crawl_jobs in data.related[CJS].values():
for crawl_job in data.related[CJS].values():
# if the job id matches current crawl job, then skip
# this job to avoid double-counting
# using the more up-to-date 'status.size' for this job
if crawl_jobs.get("spec", {}).get("id") == crawl.id:
if crawl_job.get("spec", {}).get("id") == crawl.id:
continue

crawl_status = crawl_jobs.get("status", {})
crawl_status = crawl_job.get("status", {})
if crawl_status:
running_crawls_total_size += crawl_status.get("size", 0)

Expand Down Expand Up @@ -1317,6 +1303,8 @@ async def update_crawl_state(
if not status.stopReason:
status.stopReason = await self.is_crawl_stopping(crawl, status, data)
status.stopping = status.stopReason is not None
if status.stopping:
print("Crawl gracefully stopping: {status.stopReason}, id: {crawl.id}")

# mark crawl as stopping
if status.stopping:
Expand Down
1 change: 0 additions & 1 deletion backend/btrixcloud/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ async def add_page_to_db(
return

compare = PageQACompare(**compare_dict)
print("Adding QA Run Data for Page", page_dict.get("url"), compare)

await self.add_qa_run_for_page(page.id, oid, qa_run_id, compare)

Expand Down

0 comments on commit ef5b2a2

Please sign in to comment.