Skip to content

Commit

Permalink
Merge branch 'main' into issue-1412-download-archived-item-btn
Browse files Browse the repository at this point in the history
  • Loading branch information
ikreymer committed Jul 24, 2024
2 parents 2632f12 + 24c8963 commit d296cb6
Show file tree
Hide file tree
Showing 39 changed files with 795 additions and 516 deletions.
1 change: 1 addition & 0 deletions backend/.pylintrc
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
[MASTER]
extension-pkg-whitelist=pydantic
ignore-paths=btrixcloud/migrations/migration_0028_page_files_errors.py
2 changes: 1 addition & 1 deletion backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/library/python:3.10-slim
FROM docker.io/library/python:3.12-slim

WORKDIR /app

Expand Down
4 changes: 3 additions & 1 deletion backend/btrixcloud/background_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,9 @@ async def retry_background_job(
"""Retry background job"""
return await ops.retry_background_job(job_id, org)

@app.post("/orgs/all/jobs/retryFailed", response_model=SuccessResponse)
@app.post(
"/orgs/all/jobs/retryFailed", response_model=SuccessResponse, tags=["jobs"]
)
async def retry_all_failed_background_jobs(user: User = Depends(user_dep)):
"""Retry failed background jobs from all orgs"""
if not user.is_superuser:
Expand Down
2 changes: 1 addition & 1 deletion backend/btrixcloud/basecrawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,7 @@ async def delete_crawls_all_types(
delete_list: DeleteCrawlList,
org: Organization,
user: Optional[User] = None,
):
) -> dict[str, bool]:
"""Delete uploaded crawls"""
crawls: list[str] = []
uploads: list[str] = []
Expand Down
27 changes: 13 additions & 14 deletions backend/btrixcloud/crawlconfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
CrawlConfigOut,
CrawlConfigProfileOut,
CrawlOut,
EmptyStr,
UpdateCrawlConfig,
Organization,
User,
Expand Down Expand Up @@ -188,13 +187,13 @@ async def add_crawl_config(
if not self.get_channel_crawler_image(config_in.crawlerChannel):
raise HTTPException(status_code=404, detail="crawler_not_found")

profile_id = None
profileid = None
if isinstance(config_in.profileid, UUID):
profile_id = config_in.profileid
profileid = config_in.profileid

# ensure profile is valid, if provided
if profile_id:
await self.profiles.get_profile(profile_id, org)
if profileid:
await self.profiles.get_profile(profileid, org)

now = dt_now()
crawlconfig = CrawlConfig(
Expand All @@ -216,7 +215,7 @@ async def add_crawl_config(
maxCrawlSize=config_in.maxCrawlSize,
scale=config_in.scale,
autoAddCollections=config_in.autoAddCollections,
profileid=profile_id,
profileid=profileid,
crawlerChannel=config_in.crawlerChannel,
crawlFilenameTemplate=config_in.crawlFilenameTemplate,
)
Expand Down Expand Up @@ -262,7 +261,7 @@ async def add_crawl_config(

async def add_new_crawl(
self, crawl_id: str, crawlconfig: CrawlConfig, user: User, manual: bool
):
) -> None:
"""increments crawl count for this config and adds new crawl"""

started = dt_now()
Expand All @@ -277,7 +276,7 @@ async def add_new_crawl(

await asyncio.gather(inc, add, info)

async def inc_crawl_count(self, cid: UUID):
async def inc_crawl_count(self, cid: UUID) -> None:
"""inc crawl count for config"""
await self.crawl_configs.find_one_and_update(
{"_id": cid, "inactive": {"$ne": True}},
Expand All @@ -286,7 +285,7 @@ async def inc_crawl_count(self, cid: UUID):

def check_attr_changed(
self, crawlconfig: CrawlConfig, update: UpdateCrawlConfig, attr_name: str
):
) -> bool:
"""check if attribute is set and has changed. if not changed, clear it on the update"""
if getattr(update, attr_name) is not None:
if getattr(update, attr_name) != getattr(crawlconfig, attr_name):
Expand All @@ -296,7 +295,7 @@ def check_attr_changed(

async def update_crawl_config(
self, cid: UUID, org: Organization, user: User, update: UpdateCrawlConfig
) -> dict[str, bool]:
) -> dict[str, bool | str]:
# pylint: disable=too-many-locals
"""Update name, scale, schedule, and/or tags for an existing crawl config"""

Expand Down Expand Up @@ -368,11 +367,11 @@ async def update_crawl_config(
query["modified"] = dt_now()

# if empty str, just clear the profile
if isinstance(update.profileid, EmptyStr) or update.profileid == "":
if update.profileid == "":
query["profileid"] = None
# else, ensure its a valid profile
elif update.profileid:
await self.profiles.get_profile(update.profileid, org)
await self.profiles.get_profile(cast(UUID, update.profileid), org)
query["profileid"] = update.profileid

if update.config is not None:
Expand Down Expand Up @@ -403,7 +402,7 @@ async def update_crawl_config(
status_code=404, detail=f"Crawl Config '{cid}' not found"
)

ret = {
ret: dict[str, bool | str] = {
"updated": True,
"settings_changed": changed,
"metadata_changed": metadata_changed,
Expand Down Expand Up @@ -908,7 +907,7 @@ def get_warc_prefix(self, org: Organization, crawlconfig: CrawlConfig) -> str:
name = crawlconfig.name
if not name:
if crawlconfig.config.seeds and len(crawlconfig.config.seeds):
url = crawlconfig.config.seeds[0].url
url = str(crawlconfig.config.seeds[0].url)
parts = urllib.parse.urlsplit(url)
name = parts.netloc

Expand Down
Loading

0 comments on commit d296cb6

Please sign in to comment.