diff --git a/.github/workflows/javascript_tests.yml b/.github/workflows/javascript_tests.yml index 14f22bda8d5..12440d142d4 100644 --- a/.github/workflows/javascript_tests.yml +++ b/.github/workflows/javascript_tests.yml @@ -24,7 +24,8 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-node@v3 with: - node-version: '20' # Should match what's in our Dockerfile + # Should match what's in our Dockerfile + node-version: '20' # Also update the `key` in the `with` map, below - uses: actions/cache@v3 id: npm-cache with: @@ -35,7 +36,7 @@ jobs: # `node_modules` directly. path: 'node_modules' # Note the version number in this string -- update it when updating Node! - key: ${{ runner.os }}-node16-${{ hashFiles('**/package-lock.json') }} + key: ${{ runner.os }}-node20-${{ hashFiles('**/package-lock.json') }} - if: steps.npm-cache.outputs.cache-hit != 'true' run: npm install --no-audit - run: npm run lint diff --git a/.github/workflows/new_comment_digest.yml b/.github/workflows/new_comment_digest.yml new file mode 100644 index 00000000000..ce72caf6430 --- /dev/null +++ b/.github/workflows/new_comment_digest.yml @@ -0,0 +1,21 @@ +name: new_comment_digest +on: + schedule: # 08:30 daily + - cron: '30 8 * * *' + workflow_dispatch: +permissions: + contents: read + +jobs: + new_comment_digest: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: 3.x + - run: pip install requests + - run: scripts/gh_scripts/issue_comment_bot.py 24 "$SLACK_CHANNEL" "$SLACK_TOKEN" + env: + SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL_ABC_TEAM_PLUS }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fcaab85073e..df8797c63fe 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,7 +32,7 @@ repos: - id: auto-walrus - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.13 + rev: v0.1.14 hooks: - id: ruff diff --git a/conf/coverstore.yml b/conf/coverstore.yml index 2baaebdec59..44c538f481c 100644 --- a/conf/coverstore.yml +++ b/conf/coverstore.yml @@ -12,4 +12,5 @@ sentry: enabled: false # Dummy endpoint; where sentry logs are sent to dsn: 'https://examplePublicKey@o0.ingest.sentry.io/0' + traces_sample_rate: 1.0 environment: 'local' diff --git a/conf/solr/conf/managed-schema.xml b/conf/solr/conf/managed-schema.xml index c74ebff8f0e..b513a8f9c51 100644 --- a/conf/solr/conf/managed-schema.xml +++ b/conf/solr/conf/managed-schema.xml @@ -514,8 +514,8 @@ - + = 8_820_000 and d.uploaded and '.zip' in d.filename: - raise web.found( + return web.found( archive.Cover.get_cover_url( d.id, size=size, protocol=web.ctx.protocol ) ) return read_image(d, size) except OSError: - raise web.notfound() + return web.notfound() def get_ia_cover_url(self, identifier, size="M"): url = "https://archive.org/metadata/%s/metadata" % identifier diff --git a/openlibrary/i18n/pt/messages.po b/openlibrary/i18n/pt/messages.po index fafca1c8542..bfd77928985 100644 --- a/openlibrary/i18n/pt/messages.po +++ b/openlibrary/i18n/pt/messages.po @@ -638,7 +638,7 @@ msgstr "Pedir emprestado \"%(title)s\"" #: ReadButton.html:15 type/list/embed.html:79 widget.html:39 msgid "Borrow" -msgstr "Emprestar" +msgstr "Empréstimo" #: widget.html:45 #, python-format @@ -3172,7 +3172,7 @@ msgstr "Explorar o Centro de Desenvolvimento da Open Library" #: lib/nav_foot.html:37 lib/nav_head.html:25 msgid "Developer Center" -msgstr "Centro de Desenvolvedor" +msgstr "Central de desenvolvedor" #: lib/nav_foot.html:38 msgid "Explore Open Library APIs" @@ -3204,7 +3204,7 @@ msgstr "Adicionar um novo livro à Open Library" #: lib/nav_foot.html:47 msgid "Help Center" -msgstr "Centro de ajuda" +msgstr "Central de ajuda" #: lib/nav_foot.html:48 msgid "Problems" diff --git a/openlibrary/macros/ShareModal.html b/openlibrary/macros/ShareModal.html index ede2b16d511..8ec6e488ec6 100644 --- a/openlibrary/macros/ShareModal.html +++ b/openlibrary/macros/ShareModal.html @@ -6,7 +6,7 @@ $def embed_iframe(page_url): - +
$:link_markup diff --git a/openlibrary/plugins/openlibrary/sentry.py b/openlibrary/plugins/openlibrary/sentry.py index e21ef55c4bd..ef729885c6e 100644 --- a/openlibrary/plugins/openlibrary/sentry.py +++ b/openlibrary/plugins/openlibrary/sentry.py @@ -1,6 +1,6 @@ import infogami from infogami.utils import delegate -from openlibrary.utils.sentry import Sentry, SentryProcessor +from openlibrary.utils.sentry import Sentry, InfogamiSentryProcessor sentry: Sentry | None = None @@ -12,4 +12,4 @@ def setup(): if sentry.enabled: sentry.init() delegate.add_exception_hook(lambda: sentry.capture_exception_webpy()) - delegate.app.add_processor(SentryProcessor()) + delegate.app.add_processor(InfogamiSentryProcessor(delegate.app)) diff --git a/openlibrary/utils/sentry.py b/openlibrary/utils/sentry.py index 8de2c91a923..cd17f266abb 100644 --- a/openlibrary/utils/sentry.py +++ b/openlibrary/utils/sentry.py @@ -76,6 +76,7 @@ def capture_exception(): return _internalerror() app.internalerror = capture_exception + app.add_processor(WebPySentryProcessor(app)) def capture_exception_webpy(self): with sentry_sdk.push_scope() as scope: @@ -97,12 +98,48 @@ def to_sentry_name(self) -> str: ) -class SentryProcessor: +class WebPySentryProcessor: + def __init__(self, app: web.application): + self.app = app + + def find_route_name(self) -> str: + handler, groups = self.app._match(self.app.mapping, web.ctx.path) + web.debug('ROUTE HANDLER', handler, groups) + return handler or '' + + def __call__(self, handler): + route_name = self.find_route_name() + hub = sentry_sdk.Hub.current + with sentry_sdk.Hub(hub) as hub: + with hub.configure_scope() as scope: + scope.clear_breadcrumbs() + scope.add_event_processor(add_web_ctx_to_event) + + environ = dict(web.ctx.env) + # Don't forward cookies to Sentry + if 'HTTP_COOKIE' in environ: + del environ['HTTP_COOKIE'] + + transaction = Transaction.continue_from_environ( + environ, + op="http.server", + name=route_name, + source=TRANSACTION_SOURCE_ROUTE, + ) + + with hub.start_transaction(transaction): + try: + return handler() + finally: + transaction.set_http_status(int(web.ctx.status.split()[0])) + + +class InfogamiSentryProcessor(WebPySentryProcessor): """ Processor to profile the webpage and send a transaction to Sentry. """ - def __call__(self, handler): + def find_route_name(self) -> str: def find_type() -> tuple[str, str] | None: return next( ( @@ -134,27 +171,4 @@ def find_route() -> InfogamiRoute: return result - route = find_route() - hub = sentry_sdk.Hub.current - with sentry_sdk.Hub(hub) as hub: - with hub.configure_scope() as scope: - scope.clear_breadcrumbs() - scope.add_event_processor(add_web_ctx_to_event) - - environ = dict(web.ctx.env) - # Don't forward cookies to Sentry - if 'HTTP_COOKIE' in environ: - del environ['HTTP_COOKIE'] - - transaction = Transaction.continue_from_environ( - environ, - op="http.server", - name=route.to_sentry_name(), - source=TRANSACTION_SOURCE_ROUTE, - ) - - with hub.start_transaction(transaction): - try: - return handler() - finally: - transaction.set_http_status(int(web.ctx.status.split()[0])) + return find_route().to_sentry_name() diff --git a/scripts/gh_scripts/README.md b/scripts/gh_scripts/README.md index 020ed75b1be..42e8a91041e 100644 --- a/scripts/gh_scripts/README.md +++ b/scripts/gh_scripts/README.md @@ -24,3 +24,27 @@ __Incorrect:__ `node auto_unassigner.mjs --daysSince=21` The GitHub action that runs this script automatically sets `--repoOwner` to the owner of the repository that triggered the action. + +To quickly see a script's purpose and arguments, run the script with the `-h` or `--help` flag. + +## `issue_comment_bot.py` + +This script fetches issues that have new comments from contributors within the past number of hours, then posts a message to the team in our Slack channel. + +### Usage: +This script has three positional arguments: +``` + hours Fetch issues that have been updated since this many hours ago + channel Issues will be published to this Slack channel + slack-token Slack authentication token +``` + +__Running the script locally:__ +``` +docker compose exec -e PYTHONPATH=. web bash + +# Publish digest of new comments from the past day to #openlibrary-g: +./scripts/gh_scripts/issue_comment_bot.py 24 "#openlibrary-g" "replace-with-slack-token" +``` + +__Note:__ When adding arguments, be sure to place any hyphenated values within double quotes. diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py new file mode 100755 index 00000000000..9f12ec0f9c6 --- /dev/null +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python +""" +Fetches Open Library GitHub issues that have been commented on +within some amount of time, in hours. + +Writes links to each issue to given Slack channel. +""" +import argparse +import errno +import sys +import time + +from datetime import datetime, timedelta +from typing import Any + +import requests + +# Maps lead label to GitHub username +lead_label_to_username = { + 'Lead: @mekarpeles': 'mekarpeles', + 'Lead: @cdrini': 'cdrini', + 'Lead: @scottbarnes': 'scottbarnes', + 'Lead: @seabelis': 'seabelis', + 'Lead: @jimchamp': 'jimchamp', +} + +# Maps GitHub username to Slack ID +username_to_slack_id = { + 'mekarpeles': '<@mek>', + 'cdrini': '<@cdrini>', + 'scottbarnes': '<@U03MNR6T7FH>', + 'seabelis': '<@UAHQ39ACT>', + 'jimchamp': '<@U01ARTHG9EV>', + 'hornc': '<@U0EUS8DV0>', +} + + +def fetch_issues(updated_since: str): + """ + Fetches all GitHub issues that have been updated since the given date string and have at least one comment. + + GitHub results are paginated. This functions appends each result to a list, and does so for all pages. + To keep API calls to a minimum, we request the maximum number of results per request (100 per page, as of writing). + + Important: Updated issues need not have a recent comment. Update events include many other things, such as adding a + label to an issue, or moving an issue to a milestone. Issues returned by this function will require additional + processing in order to determine if they have recent comments. + """ + # Make initial query for updated issues: + query = f'repo:internetarchive/openlibrary is:open is:issue comments:>0 updated:>{updated_since}' + p: dict[str, str | int] = { + 'q': query, + 'per_page': 100, + } + response = requests.get( + 'https://api.github.com/search/issues', + params=p, + ) + d = response.json() + results = d['items'] + + # Fetch additional updated issues, if any exist + def get_next_page(url: str): + """Returns list of issues and optional url for next page""" + resp = requests.get(url) + # Get issues + d = resp.json() + issues = d['items'] + # Prepare url for next page + next = resp.links.get('next', {}) + next_url = next.get('url', '') + + return issues, next_url + + links = response.links + next = links.get('next', {}) + next_url = next.get('url', '') + while next_url: + # Make call with next link + issues, next_url = get_next_page(next_url) + results = results + issues + + return results + + +def filter_issues(issues: list, since: datetime): + """ + Returns list of issues that were not last responded to by staff. + Requires fetching the most recent comments for the given issues. + """ + results = [] + + for i in issues: + # Fetch comments using URL from previous GitHub search results + comments_url = i.get('comments_url') + resp = requests.get(comments_url, params={'per_page': 100}) + + # Ensure that we have the last page of comments + links = resp.links + last = links.get('last', {}) + last_url = last.get('url', '') + + if last_url: + resp = requests.get(last_url) + + # Get last comment + comments = resp.json() + last_comment = comments[-1] + + # Determine if last comment meets our criteria for Slack notifications + # First step: Ensure that the last comment was left after the given `since` datetime + created = datetime.fromisoformat(last_comment['created_at']) + # Removing timezone info to avoid TypeErrors, which occur when + # comparing a timezone-aware datetime with a timezone-naive datetime + created = created.replace(tzinfo=None) + if created > since: + # Next step: Determine if the last commenter is a staff member + last_commenter = last_comment['user']['login'] + if last_commenter not in username_to_slack_id: + lead_label = find_lead_label(i.get('labels', [])) + results.append( + { + 'comment_url': last_comment['html_url'], + 'commenter': last_commenter, + 'issue_title': i['title'], + 'lead_label': lead_label, + } + ) + + return results + + +def find_lead_label(labels: list[dict[str, Any]]) -> str: + """ + Finds and returns the name of the first lead label found in the given list of GitHub labels. + + Returns an empty string if no lead label is found + """ + result = '' + for label in labels: + if label['name'].startswith('Lead:'): + result = label['name'] + break + + return result + + +def publish_digest( + issues: list[dict[str, str]], + slack_channel: str, + slack_token: str, + hours_passed: int, +): + """ + Creates a threaded Slack messaged containing a digest of recently commented GitHub issues. + + Parent Slack message will say how many comments were left, and the timeframe. Each reply + will include a link to the comment, as well as additional information. + """ + # Create the parent message + parent_thread_msg = ( + f'{len(issues)} new GitHub comment(s) since {hours_passed} hour(s) ago' + ) + + response = requests.post( + 'https://slack.com/api/chat.postMessage', + headers={ + 'Authorization': f"Bearer {slack_token}", + 'Content-Type': 'application/json; charset=utf-8', + }, + json={ + 'channel': slack_channel, + 'text': parent_thread_msg, + }, + ) + + if response.status_code != 200: + # XXX : Log this + print(f'Failed to send message to Slack. Status code: {response.status_code}') + # XXX : Add retry logic? + sys.exit(errno.ECOMM) + + d = response.json() + # Store timestamp, which, along with the channel, uniquely identifies the parent thread + ts = d.get('ts') + + def comment_on_thread(message: str): + """ + Posts the given message as a reply to the parent message. + """ + response = requests.post( + 'https://slack.com/api/chat.postMessage', + headers={ + 'Authorization': f"Bearer {slack_token}", + 'Content-Type': 'application/json; charset=utf-8', + }, + json={ + 'channel': slack_channel, + 'text': message, + 'thread_ts': ts, + }, + ) + if response.status_code != 200: + # XXX : Check "ok" field for errors + # XXX : Log this + print( + f'Failed to POST slack message\n Status code: {response.status_code}\n Message: {message}' + ) + # XXX : Retry logic? + + for i in issues: + # Slack rate limit is roughly 1 request per second + time.sleep(1) + + comment_url = i['comment_url'] + issue_title = i['issue_title'] + commenter = i['commenter'] + message = f'<{comment_url}|Latest comment for: *{issue_title}*>\n' + + username = lead_label_to_username.get(i['lead_label'], '') + slack_id = username_to_slack_id.get(username, '') + if slack_id: + message += f'Lead: {slack_id}\n' + elif i['lead_label']: + message += f'{i["lead_label"]}\n' + else: + message += 'Lead: N/A\n' + + message += f'Commenter: *{commenter}*' + comment_on_thread(message) + + +def time_since(hours): + """Returns datetime and string representations of the current time, minus the given hour""" + now = datetime.now() + # XXX : Add a minute or two to the delta (to avoid dropping issues)? + since = now - timedelta(hours=hours) + return since, since.strftime('%Y-%m-%dT%H:%M:%S') + + +def start_job(args: argparse.Namespace): + """ + Starts the new comment digest job. + """ + since, date_string = time_since(args.hours) + issues = fetch_issues(date_string) + + # XXX : If we are only running this script daily, we can remove this condition to + # always post a message to Slack. If the digest is ever not published, we'll know + # that something is wrong with our script runner. + if filtered_issues := filter_issues(issues, since): + publish_digest(filtered_issues, args.channel, args.slack_token, args.hours) + # XXX : Log this + print('Digest posted to Slack.') + else: + # XXX : Log this + print('No issues needing attention found.') + + +def _get_parser() -> argparse.ArgumentParser: + """ + Creates and returns an ArgumentParser containing default values which were + read from the config file. + """ + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + 'hours', + help='Fetch issues that have been updated since this many hours ago', + type=int, + ) + parser.add_argument( + 'channel', + help="Issues will be published to this Slack channel", + type=str, + ) + parser.add_argument( + 'slack_token', + metavar='slack-token', + help='Slack auth token', + type=str, + ) + + return parser + + +if __name__ == '__main__': + # Process command-line arguments and starts the notification job + parser = _get_parser() + args = parser.parse_args() + start_job(args) diff --git a/scripts/gh_scripts/requirements.txt b/scripts/gh_scripts/requirements.txt new file mode 100644 index 00000000000..2c24336eb31 --- /dev/null +++ b/scripts/gh_scripts/requirements.txt @@ -0,0 +1 @@ +requests==2.31.0 diff --git a/scripts/promise_batch_imports.py b/scripts/promise_batch_imports.py index 8a373eea502..3edbc987c08 100644 --- a/scripts/promise_batch_imports.py +++ b/scripts/promise_batch_imports.py @@ -44,7 +44,7 @@ def map_book_to_olbook(book, promise_id): isbn = book.get('ISBN') or ' ' sku = book['BookSKUB'] or book['BookSKU'] or book['BookBarcode'] olbook = { - 'local_id': [f"urn:bwbsku:{sku}"], + 'local_id': [f"urn:bwbsku:{sku.upper()}"], 'identifiers': { **({'amazon': [book.get('ASIN')]} if not asin_is_isbn_10 else {}), **({'better_world_books': [isbn]} if not is_isbn_13(isbn) else {}),