From 2c77634b3753b04b23a648d22b972294b6e84e3a Mon Sep 17 00:00:00 2001 From: jbaudet-pass <187622442+jbaudet-pass@users.noreply.github.com> Date: Mon, 13 Jan 2025 18:45:24 +0100 Subject: [PATCH] (PC-33771)[API] script: clean offer with ean inside title (part1) First step: clean books, cds and vinyles. If the offer has its EAN inside its name, find the product and update its information from it. If the EAN is unknown or if the product is not allowed by the GCU, reject it. --- .../clean_offer_titles_with_eans/main.py | 242 +++++++++++++++ .../clean_offer_titles_with_ean/test_main.py | 278 ++++++++++++++++++ 2 files changed, 520 insertions(+) create mode 100644 api/src/pcapi/scripts/clean_offer_titles_with_eans/main.py create mode 100644 api/tests/scripts/clean_offer_titles_with_ean/test_main.py diff --git a/api/src/pcapi/scripts/clean_offer_titles_with_eans/main.py b/api/src/pcapi/scripts/clean_offer_titles_with_eans/main.py new file mode 100644 index 00000000000..bd32b0b9b19 --- /dev/null +++ b/api/src/pcapi/scripts/clean_offer_titles_with_eans/main.py @@ -0,0 +1,242 @@ +from dataclasses import dataclass +from datetime import datetime +from datetime import timezone as tz +import functools +import json +import logging +from typing import Callable +from typing import Collection +from typing import cast + +import sqlalchemy as sa + +from pcapi.core.bookings import api as bookings_api +from pcapi.core.mails import transactional as transactional_mails +from pcapi.core.offers.models import GcuCompatibilityType +from pcapi.core.offers.models import Offer +from pcapi.core.offers.models import OfferValidationStatus +from pcapi.flask_app import app +from pcapi.models import db +from pcapi.models.offer_mixin import OfferValidationType +from pcapi.repository import atomic +from pcapi.repository import on_commit +from pcapi.utils.chunks import get_chunks + + +logger = logging.getLogger(__name__) + +# Mandatory since this module uses atomic() which needs an application context. +app.app_context().push() + + +BOOKS_CDS_VINYLES_QUERY = """ + SELECT + sub.id, + sub.ean, + sub.name, + sub."subcategoryId", + sub."isActive", + p.id is not null as exists, + p.id as product_id, + p.name as product_name, + p."jsonData" as product_json_data, + p."gcuCompatibilityType" + FROM ( + SELECT + id, + substring("name" similar '%#"[[:digit:]]{13}#"%' escape '#') as ean, + name, + "subcategoryId", + "isActive" + FROM + offer + WHERE + "name" similar to '%\\d{13}%' + and "validation" != 'REJECTED' + and "subcategoryId" in ( + 'LIVRE_PAPIER', + 'SUPPORT_PHYSIQUE_MUSIQUE_CD', + 'SUPPORT_PHYSIQUE_MUSIQUE_VINYLE' + ) + ) sub + LEFT JOIN + product p on p."jsonData"->>'ean' = sub.ean + LIMIT + 10000 +""" + + +@dataclass(frozen=True) +class OfferEanQueryRow: + id: int + ean: str + name: str + subcategory: str + is_active: bool + exists: bool + product_id: int | None + product_name: str | None + product_json_data: str | None + gcu_compatibility: str | None + + +def get_offers_with_ean_inside_title() -> Collection[OfferEanQueryRow]: + query = sa.text(BOOKS_CDS_VINYLES_QUERY) + rows = [] + for row in db.session.execute(query): + rows.append( + OfferEanQueryRow( + id=row[0], + ean=row[1], + name=row[2], + subcategory=row[3], + is_active=row[4], + exists=row[5], + product_id=row[6], + product_name=row[7], + product_json_data=json.dumps(row[8]) if row[8] is not None else None, + gcu_compatibility=row[9], + ) + ) + + return rows + + +def run() -> None: + while True: + rows = get_offers_with_ean_inside_title() + if not rows: + break + + parse_offers(rows) + + +def parse_offers(rows: Collection[OfferEanQueryRow]) -> None: + for chunk in get_chunks(rows, chunk_size=2_000): + + unknown_offer_rows = set() + gcu_incompatible_offer_rows = set() + legit_offer_rows = set() + + for offer_row in chunk: + if not offer_row.exists: + unknown_offer_rows.add(offer_row) + elif offer_row.gcu_compatibility != GcuCompatibilityType.COMPATIBLE.value: + gcu_incompatible_offer_rows.add(offer_row) + else: + legit_offer_rows.add(offer_row) + + update_unknown_offers(unknown_offer_rows) + update_gcu_incompatible_offers(gcu_incompatible_offer_rows) + update_legit_offers(legit_offer_rows) + + +@atomic() +def retry_and_log(func: Callable) -> Callable: + def retry_one_chunk_at_a_time(offer_rows: Collection[OfferEanQueryRow]) -> None: + chunk_size = len(offer_rows) // 5 + chunk_size = max(chunk_size, 1) + + for chunk in get_chunks(offer_rows, chunk_size=chunk_size): + try: + with atomic(): + func(chunk) + except Exception as exc: # pylint: disable=broad-exception-caught + if chunk_size == 1: + row = chunk[0] + msg = "[%s][%s] could not handle offer #%s with product #%s (ean: %s)" + logger.info(msg, str(exc), func.__name__, row.id, row.product_id, row.ean) + else: + retry_one_chunk_at_a_time(chunk) + continue + + def inner(offer_rows: Collection[OfferEanQueryRow]) -> bool: + try: + func(offer_rows) + except Exception: # pylint: disable=broad-exception-caught + retry_one_chunk_at_a_time(offer_rows) + return False + return True + + return inner + + +@atomic() +@retry_and_log +def update_unknown_offers(offer_rows: Collection[OfferEanQueryRow]) -> None: + reject_offers(offer_rows) + + +@atomic() +@retry_and_log +def update_gcu_incompatible_offers(offer_rows: Collection[OfferEanQueryRow]) -> None: + reject_offers(offer_rows) + + +@atomic() +@retry_and_log +def update_legit_offers(offer_rows: Collection[OfferEanQueryRow]) -> None: + ids = {row.id for row in offer_rows} + legit_offers = Offer.query.filter(Offer.id.in_(ids)) + + offer_to_product = {row.id: row for row in offer_rows} + + with atomic(): + for offer in legit_offers: + offer.name = offer_to_product[offer.id].product_name + + if offer_to_product[offer.id].product_json_data: + extra_data = cast(str, offer_to_product[offer.id].product_json_data) + offer.extraData = json.loads(extra_data) + db.session.add(offer) + + +def reject_offers(offer_rows: Collection[OfferEanQueryRow]) -> None: + def cancel_booking(offer: Offer) -> None: + cancelled_bookings = bookings_api.cancel_bookings_from_rejected_offer(offer) + for booking in cancelled_bookings: + on_commit( + functools.partial( + transactional_mails.send_booking_cancellation_by_pro_to_beneficiary_email, + booking, + rejected_by_fraud_action=True, + ) + ) + + def notify_offerer(offer: Offer) -> None: + if offer.venue.bookingEmail: + recipients = [offer.venue.bookingEmail] + else: + recipients = [recipient.user.email for recipient in offer.venue.managingOfferer.UserOfferers] + + offer_data = transactional_mails.get_email_data_from_offer( + offer, offer.validation, OfferValidationStatus.REJECTED + ) + on_commit( + functools.partial( + transactional_mails.send_offer_validation_status_update_email, + offer_data, + recipients, + ) + ) + + ids = {row.id for row in offer_rows} + base_query = Offer.query.filter( + Offer.id.in_(ids), + Offer.status != OfferValidationStatus.REJECTED.value, + ) + + for offer in base_query: + cancel_booking(offer) + notify_offerer(offer) + + base_query.update( + { + "validation": OfferValidationStatus.REJECTED.value, + "lastValidationDate": datetime.now(tz.utc), # pylint: disable=datetime-now + "lastValidationType": OfferValidationType.AUTO.value, + "lastValidationAuthorUserId": None, + "isActive": False, + }, + synchronize_session=False, + ) diff --git a/api/tests/scripts/clean_offer_titles_with_ean/test_main.py b/api/tests/scripts/clean_offer_titles_with_ean/test_main.py new file mode 100644 index 00000000000..d1926746c90 --- /dev/null +++ b/api/tests/scripts/clean_offer_titles_with_ean/test_main.py @@ -0,0 +1,278 @@ +import contextlib +import json + +import pytest + +from pcapi.core.bookings import factories as bookings_factories +from pcapi.core.bookings.models import BookingStatus +from pcapi.core.categories import subcategories_v2 as subcategories +import pcapi.core.mails.testing as mails_testing +from pcapi.core.mails.transactional.sendinblue_template_ids import TransactionalEmail +from pcapi.core.offers import factories as offers_factories +from pcapi.core.offers import models as offers_models +from pcapi.models import db +from pcapi.models.offer_mixin import OfferStatus +from pcapi.scripts.clean_offer_titles_with_eans.main import OfferEanQueryRow +from pcapi.scripts.clean_offer_titles_with_eans.main import retry_and_log +from pcapi.scripts.clean_offer_titles_with_eans.main import run + + +pytestmark = pytest.mark.usefixtures("db_session") + + +EAN = "1234567890987" +EXTRA_DATA = {"ean": EAN, "author": "someone"} + +TARGET_SUBCATEGORIES = [ + subcategories.LIVRE_PAPIER.id, + subcategories.SUPPORT_PHYSIQUE_MUSIQUE_CD.id, + subcategories.SUPPORT_PHYSIQUE_MUSIQUE_VINYLE.id, +] + + +def build_offer(subcategory_id, name=None, ean=None): + if ean is None: + ean = EAN + + if name is None: + name = f"My {subcategory_id} offer :: {ean}", + + return bookings_factories.BookingFactory( + stock__offer__name=name, + stock__offer__extraData={}, + stock__offer__subcategoryId=subcategory_id, + ).stock.offer + + +def build_offers(eans, subcategories): + return [ + build_offer(subcategory_id, ean=eans[idx]) + for idx, subcategory_id in enumerate(subcategories) + ] + + +def build_random_offer(ean=None, name=None): + if ean is None: + ean = EAN + + if name is None: + name = f"My movie {ean}" + + return bookings_factories.BookingFactory( + stock__offer__name=name, + stock__offer__extraData={}, + stock__offer__subcategoryId=subcategories.CARTE_CINE_MULTISEANCES.id + ).stock.offer + + +def build_product(incompatible=False, ean=None): + gcu = offers_models.GcuCompatibilityType.COMPATIBLE + if incompatible: + gcu = offers_models.GcuCompatibilityType.FRAUD_INCOMPATIBLE + + extra_data = EXTRA_DATA + if ean is not None: + extra_data["ean"] = ean + + return offers_factories.ProductFactory( + name="real product name", + extraData=EXTRA_DATA, + gcuCompatibilityType=gcu, + ) + + +class RunTest: + @pytest.mark.parametrize("subcategory_id", TARGET_SUBCATEGORIES) + def test_compatible_offer_is_updated_from_product(self, subcategory_id): + offer = build_offer(subcategory_id) + product = build_product() + + with assert_offers_updated_from_products((offer, product)): + run() + + @pytest.mark.parametrize("subcategory_id", TARGET_SUBCATEGORIES) + def test_targeted_offer_without_ean_inside_title_is_ignored(self, subcategory_id): + offer = build_offer(subcategory_id, name="simple name") + + with assert_no_changes(offer): + run() + + def test_not_targeted_offer_subcategory_is_ignored(self): + offer = build_random_offer() + + with assert_no_changes(offer): + run() + + @pytest.mark.parametrize("subcategory_id", TARGET_SUBCATEGORIES) + def test_incompatible_offer_with_ean_inside_title_is_rejected(self, subcategory_id): + offer = build_offer(subcategory_id) + build_product(incompatible=True) + + with assert_rejected(offer): + run() + + @pytest.mark.parametrize("subcategory_id", TARGET_SUBCATEGORIES) + def test_offer_with_unknown_ean_inside_title_is_rejected(self, subcategory_id): + offer = build_offer(subcategory_id) + build_product(ean="0000000000000") + + with assert_rejected(offer): + run() + + def test_offers_to_update_and_ignore_and_reject(self): + known_eans = ["0000000000001", "0000000000002", "0000000000003"] + unknown_eans = ["1111111111111", "1111111111112", "1111111111113"] + gcu_incompatible_eans = ["2222222222221", "2222222222222", "2222222222223"] + + offers_to_update = build_offers(known_eans, TARGET_SUBCATEGORIES) + valid_products = [build_product(ean=ean) for ean in known_eans] + + offers_to_reject = build_offers(unknown_eans, TARGET_SUBCATEGORIES) + offers_to_reject += build_offers(gcu_incompatible_eans, TARGET_SUBCATEGORIES) + [build_product(incompatible=True, ean=ean) for ean in gcu_incompatible_eans] + + offers_to_ignore = [ + build_random_offer(name="some offer to ignore"), + build_random_offer(name="another offer to ignore"), + ] + + with assert_offers_updated_from_products(*zip(offers_to_update, valid_products)): + with assert_no_changes(*offers_to_ignore): + with assert_rejected(*offers_to_reject): + run() + + +class RetryAndLogTest: + def test_no_error_no_retry(self): + func = retry_and_log(self.nop) + assert func([1, 2]) + + def test_error_means_retry(self): + func = retry_and_log(self.error_call) + rows = [self.build_random_offer_ean_query_row(1)] + assert not func(rows) + + def test_one_error_other_items_are_updated_during_retry(self): + self.offers = offers_factories.OfferFactory.create_batch(12) + self.failing_offer_ids = {self.offers[2].id, self.offers[3].id, self.offers[9].id} + + ids = [offer.id for offer in self.offers] + rows = [self.build_random_offer_ean_query_row(row_id) for row_id in ids] + + func = retry_and_log(self.update_offer) + assert not func(rows) + + for offer in self.offers: + if offer.id not in self.failing_offer_ids: + assert offer.name.endswith('+ updated') + else: + assert not offer.name.endswith('+ updated') + + def build_random_offer_ean_query_row(self, row_id): + return OfferEanQueryRow( + id=row_id, + ean="0000000000001", + name="name", + subcategory=subcategories.LIVRE_PAPIER.id, + is_active=True, + exists=True, + product_id=100, + product_name="same name", + product_json_data=json.dumps({"ean": "0000000000002"}), + gcu_compatibility='COMPATIBLE', + ) + + def nop(self, *args, **kwargs): + pass + + def error_call(self, rows): + raise RuntimeError('test') + + def update_offer(self, rows): + for row in rows: + if row.id in self.failing_offer_ids: + raise RuntimeError('test') + else: + offer = offers_models.Offer.query.get(row.id) + offer.name = f'{offer.name} + updated' + + +@contextlib.contextmanager +def assert_offers_updated_from_products(*offers_and_products): + for row in offers_and_products: + offer, product = row + assert offer.name != product.name + assert offer.extraData != product.extraData + + yield + + for row in offers_and_products: + offer, product = row + + db.session.refresh(offer) + db.session.refresh(product) + + assert offer.name == product.name + assert offer.extraData == product.extraData + + +@contextlib.contextmanager +def assert_no_changes(*offers): + old_names = {offer.id: offer.name for offer in offers} + old_extra_data = {offer.id: offer.extraData for offer in offers} + + old_booking_status = {offer.id: None for offer in offers} + for offer in offers: + for stock in offer.stocks: + old_booking_status[offer.id] = [(b.id, b.status) for b in stock.bookings] + + yield + + for offer in offers: + db.session.refresh(offer) + + new_names = {offer.id: offer.name for offer in offers} + new_extra_data = {offer.id: offer.extraData for offer in offers} + + new_booking_status = {offer.id: None for offer in offers} + for offer in offers: + for stock in offer.stocks: + new_booking_status[offer.id] = [(b.id, b.status) for b in stock.bookings] + + assert old_names == new_names + assert old_extra_data == new_extra_data + assert old_booking_status == new_booking_status + + +@contextlib.contextmanager +def assert_rejected(*offers): + old_names = {offer.id: offer.name for offer in offers} + old_extra_data = {offer.id: offer.extraData for offer in offers} + + yield + + bookings_count = 0 + + for offer in offers: + db.session.refresh(offer) + + assert offer.status == OfferStatus.REJECTED + assert offer.name == old_names[offer.id] + assert offer.extraData == old_extra_data[offer.id] + + bookings = [booking for stock in offer.stocks for booking in stock.bookings] + bookings_count += len(bookings) + for booking in bookings: + assert booking.status == BookingStatus.CANCELLED + + assert len(mails_testing.outbox) == len(offers) + bookings_count + + found_templates = {row["template"]["id_prod"] for row in mails_testing.outbox} + expected_templates = { + TransactionalEmail.OFFER_VALIDATED_TO_REJECTED_TO_PRO.value.id_prod, + TransactionalEmail.OFFER_REJECTION_TO_PRO.value.id_prod, + TransactionalEmail.OFFER_PENDING_TO_REJECTED_TO_PRO.value.id_prod, + TransactionalEmail.BOOKING_CANCELLATION_BY_PRO_TO_BENEFICIARY.value.id_prod + } + + assert found_templates <= expected_templates