Skip to content

Commit

Permalink
Add helper to reproducible shuffle the containers and schedules (#196)
Browse files Browse the repository at this point in the history
* Add helper to reproducible shuffle the containers and schedules

* Allow using hash sorting in tests

* Adjust title of notebook
  • Loading branch information
1kastner authored Sep 9, 2023
1 parent e1783c3 commit e064efd
Show file tree
Hide file tree
Showing 12 changed files with 46 additions and 9 deletions.
8 changes: 8 additions & 0 deletions conflowgen/database_connection/sqlite_database_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
from peewee import SqliteDatabase

from conflowgen.application.models.container_flow_generation_properties import ContainerFlowGenerationProperties
from conflowgen.application.repositories.random_seed_store_repository import get_initialised_random_object
from conflowgen.database_connection.create_tables import create_tables
from conflowgen.domain_models.base_model import database_proxy
from conflowgen.domain_models.container import Container
from conflowgen.domain_models.distribution_seeders import seed_all_distributions
from conflowgen.domain_models.vehicle import Truck, DeepSeaVessel, Feeder, Barge, Train
from conflowgen.tools import get_convert_to_random_value


class SqliteDatabaseIsMissingException(Exception):
Expand Down Expand Up @@ -52,6 +54,7 @@ class SqliteDatabaseConnection:
)

def __init__(self, sqlite_databases_directory: Optional[str] = None):
self.seeded_random = None

if sqlite_databases_directory is None:
sqlite_databases_directory = self.SQLITE_DEFAULT_DIR
Expand Down Expand Up @@ -123,6 +126,11 @@ def choose_database(
for vehicle in (DeepSeaVessel, Feeder, Barge, Train, Truck, Container):
self.logger.debug(f"Number entries in table '{vehicle.__name__}': {vehicle.select().count()}")

self.seeded_random = get_initialised_random_object(self.__class__.__name__)
random_bits = self.seeded_random.getrandbits(100)
convert_to_random_value = get_convert_to_random_value(random_bits)
self.sqlite_db_connection.func('assign_random_value')(convert_to_random_value)

return self.sqlite_db_connection

def delete_database(self, database_name: str) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ def allocate(self) -> None:
successful_assignment = 0

teu_total = 0
for i in range(number_containers_to_allocate):
i += 1
for i in range(1, number_containers_to_allocate + 1):
if i % 1000 == 0 or i == 1 or i == number_containers_to_allocate:
self.logger.info(
f"Progress: {i} / {number_containers_to_allocate} ({i / number_containers_to_allocate:.2%}) "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import logging
import typing

from peewee import fn

from conflowgen.application.repositories.random_seed_store_repository import get_initialised_random_object
from conflowgen.domain_models.container import Container
from conflowgen.domain_models.distribution_repositories.container_destination_distribution_repository import \
Expand Down Expand Up @@ -55,6 +57,8 @@ def assign(self) -> None:
LargeScheduledVehicle, on=Container.picked_up_by_large_scheduled_vehicle
).where(
Container.picked_up_by_large_scheduled_vehicle.schedule == schedule
).order_by(
fn.assign_random_value(Container.id)
)
distribution_for_schedule = self.distribution[schedule]
destinations = list(distribution_for_schedule.keys())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from typing import List, Type
import logging

from peewee import fn

from conflowgen.domain_models.data_types.mode_of_transport import ModeOfTransport
from conflowgen.domain_models.factories.container_factory import ContainerFactory
from conflowgen.domain_models.factories.fleet_factory import FleetFactory
Expand Down Expand Up @@ -37,7 +39,9 @@ def reload_properties(
def create(self) -> None:
assert self.container_flow_start_date is not None
assert self.container_flow_end_date is not None
schedules = Schedule.select()
schedules = Schedule.select().order_by(
fn.assign_random_value(Schedule.id)
)
number_schedules = schedules.count()
for i, schedule in enumerate(schedules):
i += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def choose_departing_vehicle_for_containers(self) -> None:
# This way no vehicle has an advantage over another by its earlier arrival (getting better slots etc.)
selected_containers: ModelSelect = Container.select(
).order_by(
fn.Random()
fn.assign_random_value(Container.id)
).where(
(Container.picked_up_by << ModeOfTransport.get_scheduled_vehicles())
& (Container.delivered_by << ModeOfTransport.get_scheduled_vehicles())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import datetime
from typing import Dict, Optional

from peewee import fn

from .abstract_truck_for_containers_manager import AbstractTruckForContainersManager, \
UnknownDistributionPropertyException
from ..domain_models.data_types.container_length import ContainerLength
Expand Down Expand Up @@ -113,6 +115,8 @@ def generate_trucks_for_delivering(self) -> None:
"""
containers = Container.select().where(
Container.delivered_by == ModeOfTransport.truck
).order_by(
fn.assign_random_value(Container.id)
)
number_containers = containers.count()
self.logger.info(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import datetime
from typing import Dict, Optional

from peewee import fn

from .abstract_truck_for_containers_manager import AbstractTruckForContainersManager, \
UnknownDistributionPropertyException
from ..domain_models.data_types.container_length import ContainerLength
Expand Down Expand Up @@ -88,6 +90,8 @@ def _get_container_pickup_time(
def generate_trucks_for_picking_up(self):
containers = Container.select().where(
Container.picked_up_by == ModeOfTransport.truck
).order_by(
fn.assign_random_value(Container.id)
)
number_containers = containers.count()
self.logger.info(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"id": "8ae71614-f852-4f1c-8c93-357bf47ad9e7",
"metadata": {},
"source": [
"# Detailed Analyses for CTA example"
"# Fast Analyses for POC example"
]
},
{
Expand Down
5 changes: 5 additions & 0 deletions conflowgen/tests/substitute_peewee_database.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import random

from peewee import SqliteDatabase

from conflowgen.data_summaries.data_summaries_cache import DataSummariesCache
from conflowgen.domain_models.base_model import database_proxy
from conflowgen.tools import get_convert_to_random_value


def setup_sqlite_in_memory_db() -> SqliteDatabase:
Expand All @@ -13,5 +16,7 @@ def setup_sqlite_in_memory_db() -> SqliteDatabase:
})
database_proxy.initialize(sqlite_db)
sqlite_db.connect()
random_bits = random.getrandbits(100)
sqlite_db.func('assign_random_value')(get_convert_to_random_value(random_bits))
DataSummariesCache.reset_cache()
return sqlite_db
9 changes: 9 additions & 0 deletions conflowgen/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
A collection of tools for which no nicer name has been found yet.
"""
import hashlib
from typing import Callable, Any, TypeVar

DecoratedType = TypeVar('DecoratedType') # pylint: disable=invalid-name
Expand All @@ -21,3 +22,11 @@ def hashable(obj: Any) -> bool:
except TypeError:
return False
return True


def get_convert_to_random_value(random_bits):
def convert_to_random_value(row_id):
hash_value = hashlib.new('sha256')
hash_value.update((random_bits + row_id).to_bytes(16, 'big'))
return hash_value.hexdigest()
return convert_to_random_value
4 changes: 2 additions & 2 deletions docs/notebooks/data/prepared_dbs/demo_deham_cta.sqlite
Git LFS file not shown
4 changes: 2 additions & 2 deletions docs/notebooks/data/prepared_dbs/demo_poc.sqlite
Git LFS file not shown

0 comments on commit e064efd

Please sign in to comment.