Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for soft/hard block in upload_to_mllf_to_remote_settings cron #22885

Merged
merged 6 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 53 additions & 59 deletions src/olympia/blocklist/cron.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
from datetime import datetime
from typing import List

import waffle
from django_statsd.clients import statsd

import olympia.core.logger
from olympia.constants.blocklist import (
BASE_REPLACE_THRESHOLD,
MLBF_BASE_ID_CONFIG_KEY,
MLBF_TIME_CONFIG_KEY,
)
from olympia.zadmin.models import get_config

from .mlbf import MLBF
from .models import Block, BlocklistSubmission, BlockType
from .tasks import cleanup_old_files, process_blocklistsubmission, upload_filter
from .tasks import process_blocklistsubmission, upload_filter
from .utils import datetime_to_ts


Expand All @@ -28,9 +28,9 @@ def get_last_generation_time():
return get_config(MLBF_TIME_CONFIG_KEY, None, json_value=True)


def get_base_generation_time():
def get_base_generation_time(block_type: BlockType):
return get_config(
MLBF_BASE_ID_CONFIG_KEY(BlockType.BLOCKED, compat=True), None, json_value=True
MLBF_BASE_ID_CONFIG_KEY(block_type, compat=True), None, json_value=True
)


Expand Down Expand Up @@ -66,48 +66,45 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
# An add-on version/file from after this time can't be reliably asserted -
# there may be false positives or false negatives.
# https://github.com/mozilla/addons-server/issues/13695
generation_time = get_generation_time()
# This timestamp represents the last time the MLBF was generated and uploaded.
# It could have been a base filter or a stash.
last_generation_time = get_last_generation_time()
# This timestamp represents the point in time when
# the base filter was generated and uploaded.
base_generation_time = get_base_generation_time()

mlbf = MLBF.generate_from_db(generation_time)

base_filter = (
MLBF.load_from_storage(base_generation_time)
if base_generation_time is not None
else None
mlbf = MLBF.generate_from_db(get_generation_time())
previous_filter = MLBF.load_from_storage(
# This timestamp represents the last time the MLBF was generated and uploaded.
# It could have been a base filter or a stash.
get_last_generation_time()
)

previous_filter = (
# Only load previoous filter if there is a timestamp to use
# and that timestamp is not the same as the base_filter
MLBF.load_from_storage(last_generation_time)
if last_generation_time is not None
and (base_filter is None or base_filter.created_at != last_generation_time)
else base_filter
)

changes_count = mlbf.blocks_changed_since_previous(
BlockType.BLOCKED, previous_filter
)
statsd.incr(
'blocklist.cron.upload_mlbf_to_remote_settings.blocked_changed', changes_count
)
need_update = (
force_base
or base_filter is None
or (
previous_filter is not None
and previous_filter.created_at < get_blocklist_last_modified_time()
)
or changes_count > 0
)
if not need_update:
base_filters_to_update: List[BlockType] = []
create_stash = False

# Determine which base filters need to be re uploaded
# and whether a new stash needs to be created
KevinMind marked this conversation as resolved.
Show resolved Hide resolved
for block_type in BlockType:
# This prevents us from updating a stash or filter based on new soft blocks
KevinMind marked this conversation as resolved.
Show resolved Hide resolved
if block_type == BlockType.SOFT_BLOCKED:
log.info(
'Skipping soft-blocks because enable-soft-blocking switch is inactive'
)
continue

base_filter = MLBF.load_from_storage(get_base_generation_time(block_type))

# add this block type to the list of filters to be re-uploaded
KevinMind marked this conversation as resolved.
Show resolved Hide resolved
if (
force_base
or base_filter is None
or mlbf.should_upload_filter(block_type, base_filter)
):
base_filters_to_update.append(block_type)
# only update the stash if we should AND if
# we aren't already reuploading the filter for this block type
KevinMind marked this conversation as resolved.
Show resolved Hide resolved
elif mlbf.should_upload_stash(block_type, previous_filter or base_filter):
create_stash = True

skip_update = len(base_filters_to_update) == 0 and not create_stash
if skip_update:
log.info('No new/modified/deleted Blocks in database; skipping MLBF generation')
# Delete the locally generated MLBF directory and files as they are not needed
KevinMind marked this conversation as resolved.
Show resolved Hide resolved
mlbf.delete()
return

statsd.incr(
Expand All @@ -119,28 +116,25 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
len(mlbf.data.not_blocked_items),
)

make_base_filter = (
force_base
or base_filter is None
or previous_filter is None
or mlbf.blocks_changed_since_previous(BlockType.BLOCKED, base_filter)
> BASE_REPLACE_THRESHOLD
)
# Until we are ready to enable soft blocking, it should not be possible
# to create a stash and a filter at the same iteration
if create_stash and len(base_filters_to_update) > 0:
raise Exception(
'Cannot upload stash and filter without implementing soft blocking'
)
KevinMind marked this conversation as resolved.
Show resolved Hide resolved

if make_base_filter:
mlbf.generate_and_write_filter()
else:
if create_stash:
mlbf.generate_and_write_stash(previous_filter)

for block_type in base_filters_to_update:
mlbf.generate_and_write_filter(block_type)

upload_filter.delay(
generation_time,
filter_list=[BlockType.BLOCKED.name] if make_base_filter else [],
create_stash=not make_base_filter,
mlbf.created_at,
willdurand marked this conversation as resolved.
Show resolved Hide resolved
filter_list=[key.name for key in base_filters_to_update],
create_stash=create_stash,
)

if base_filter:
cleanup_old_files.delay(base_filter_id=base_filter.created_at)


def process_blocklistsubmissions():
qs = BlocklistSubmission.objects.filter(
Expand Down
36 changes: 33 additions & 3 deletions src/olympia/blocklist/mlbf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from olympia.amo.utils import SafeStorage
from olympia.blocklist.models import BlockType, BlockVersion
from olympia.blocklist.utils import datetime_to_ts
from olympia.constants.blocklist import BASE_REPLACE_THRESHOLD
from olympia.versions.models import Version


Expand Down Expand Up @@ -79,7 +80,8 @@ class BaseMLBFLoader:
def __init__(self, storage: SafeStorage):
self.storage = storage

def data_type_key(self, key: MLBFDataType) -> str:
@classmethod
def data_type_key(cls, key: MLBFDataType) -> str:
return key.name.lower()

@cached_property
Expand Down Expand Up @@ -207,13 +209,21 @@ def hash_filter_inputs(cls, input_list):
for (guid, version) in input_list
]

def filter_path(self, _block_type: BlockType = BlockType.BLOCKED):
return self.storage.path('filter')
def filter_path(self, block_type: BlockType):
# TODO: explain / test
willdurand marked this conversation as resolved.
Show resolved Hide resolved
if block_type == BlockType.BLOCKED:
return self.storage.path('filter')
KevinMind marked this conversation as resolved.
Show resolved Hide resolved
return self.storage.path(f'filter-{BaseMLBFLoader.data_type_key(block_type)}')

@property
def stash_path(self):
return self.storage.path('stash.json')

def delete(self):
if self.storage.exists(self.storage.base_location):
self.storage.rm_stored_dir(self.storage.base_location)
log.info(f'Deleted {self.storage.base_location}')

def generate_and_write_filter(self, block_type: BlockType = BlockType.BLOCKED):
stats = {}

Expand Down Expand Up @@ -297,6 +307,26 @@ def blocks_changed_since_previous(
_, _, changed_count = self.generate_diffs(previous_mlbf)[block_type]
return changed_count

def should_upload_filter(
self, block_type: BlockType = BlockType.BLOCKED, previous_mlbf: 'MLBF' = None
):
return (
self.blocks_changed_since_previous(
block_type=block_type, previous_mlbf=previous_mlbf
)
> BASE_REPLACE_THRESHOLD
)

def should_upload_stash(
self, block_type: BlockType = BlockType.BLOCKED, previous_mlbf: 'MLBF' = None
):
return (
self.blocks_changed_since_previous(
block_type=block_type, previous_mlbf=previous_mlbf
)
> 0
)

@classmethod
def load_from_storage(
cls, created_at: str = datetime_to_ts(), error_on_missing: bool = False
Expand Down
3 changes: 2 additions & 1 deletion src/olympia/blocklist/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
version_factory,
)
from olympia.blocklist.mlbf import MLBF
from olympia.blocklist.models import BlockType


class TestExportBlocklist(TestCase):
Expand Down Expand Up @@ -38,4 +39,4 @@ def test_command(self):

call_command('export_blocklist', '1')
mlbf = MLBF.load_from_storage(1)
assert mlbf.storage.exists(mlbf.filter_path())
assert mlbf.storage.exists(mlbf.filter_path(BlockType.BLOCKED))
Loading
Loading