From fed611d30b65d85178866c631c73bb3d77925f6f Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Mon, 14 Oct 2024 15:18:03 +0200 Subject: [PATCH] test: get memory usage (#1238 #1276 #1277) --- openfisca_core/populations/__init__.py | 3 + .../populations/_core_population.py | 92 ++++++++++++------- openfisca_core/populations/types.py | 27 +++++- openfisca_core/types.py | 8 +- 4 files changed, 87 insertions(+), 43 deletions(-) diff --git a/openfisca_core/populations/__init__.py b/openfisca_core/populations/__init__.py index 172706048..3fc686a69 100644 --- a/openfisca_core/populations/__init__.py +++ b/openfisca_core/populations/__init__.py @@ -36,6 +36,8 @@ from .group_population import GroupPopulation from .population import Population +SinglePopulation = Population + __all__ = [ "ADD", "DIVIDE", @@ -46,6 +48,7 @@ "InvalidArraySizeError", "Population", "Projector", + "SinglePopulation", "UniqueRoleToEntityProjector", "get_projector_from_shortcut", "projectable", diff --git a/openfisca_core/populations/_core_population.py b/openfisca_core/populations/_core_population.py index ab97be383..34149814a 100644 --- a/openfisca_core/populations/_core_population.py +++ b/openfisca_core/populations/_core_population.py @@ -1,14 +1,11 @@ from __future__ import annotations from collections.abc import Sequence -from typing import NamedTuple, TypeVar -from typing_extensions import TypedDict +from typing import TypeVar -import enum import traceback import numpy -import strenum from openfisca_core import holders, periods @@ -19,22 +16,6 @@ _DT_co = TypeVar("_DT_co", covariant=True, bound=t.VarDType) -class Option(strenum.StrEnum): - ADD = enum.auto() - DIVIDE = enum.auto() - - -class Calculate(NamedTuple): - variable: str - period: t.Period - option: None | Sequence[str] - - -class MemoryUsageByVariable(TypedDict, total=False): - by_variable: dict[str, t.MemoryUsage] - total_nb_bytes: int - - class CorePopulation: """Base class to build populations from. @@ -59,19 +40,19 @@ class CorePopulation: def __init__(self, entity: t.CoreEntity, *__args: object, **__kwds: object) -> None: self.entity = entity - self._holders: t.Holders = {} + self._holders: t.HolderByVariable = {} def __call__( self, variable_name: t.VariableName, period: None | t.PeriodLike = None, - options: None | Sequence[str] = None, + options: None | Sequence[t.Option] = None, ) -> None | t.FloatArray: """Calculate ``variable_name`` for ``period``, using the formula if it exists. - Example: - >>> person("salary", "2017-04") - >>> array([300.0]) + # Example: + # >>> person("salary", "2017-04") + # >>> array([300.0]) Returns: None: If there is no :class:`.Simulation`. @@ -81,7 +62,7 @@ def __call__( if self.simulation is None: return None - calculate: Calculate = Calculate( + calculate = t.Calculate( variable=variable_name, period=periods.period(period), option=options, @@ -96,13 +77,13 @@ def __call__( calculate.period, ) - if Option.ADD in map(str.upper, calculate.option): + if t.Option.ADD in map(str.upper, calculate.option): return self.simulation.calculate_add( calculate.variable, calculate.period, ) - if Option.DIVIDE in map(str.upper, calculate.option): + if t.Option.DIVIDE in map(str.upper, calculate.option): return self.simulation.calculate_divide( calculate.variable, calculate.period, @@ -217,8 +198,51 @@ def get_holder(self, variable_name: t.VariableName) -> t.Holder: def get_memory_usage( self, - variables: Sequence[str] | None = None, - ) -> MemoryUsageByVariable: + variables: None | Sequence[t.VariableName] = None, + ) -> t.MemoryUsageByVariable: + """Return the memory usage of the population per variable. + + Args: + variables: The variables to get the memory usage for. + + Returns: + MemoryUsageByVariable: The memory usage of the population per variable. + + Examples: + >>> from openfisca_core import ( + ... entities, + ... holders, + ... periods, + ... populations, + ... simulations, + ... taxbenefitsystems, + ... simulations, + ... variables, + ... ) + + >>> class Person(entities.SingleEntity): ... + + >>> person = Person("person", "people", "", "") + + >>> class Salary(variables.Variable): + ... definition_period = periods.WEEK + ... entity = person + ... value_type = int + + >>> tbs = taxbenefitsystems.TaxBenefitSystem([person]) + >>> population = populations.SinglePopulation(person) + >>> simulation = simulations.Simulation(tbs, {person.key: population}) + >>> salary = Salary() + >>> holder = holders.Holder(salary, population) + >>> population._holders[salary.name] = holder + + >>> population.get_memory_usage() + {'total_nb_bytes': 0, 'by_variable': {'Salary': {'nb_cells_by...}}} + + >>> population.get_memory_usage([salary.name]) + {'total_nb_bytes': 0, 'by_variable': {'Salary': {'nb_cells_by...}}} + + """ holders_memory_usage = { variable_name: holder.get_memory_usage() for variable_name, holder in self._holders.items() @@ -230,11 +254,9 @@ def get_memory_usage( for holder_memory_usage in holders_memory_usage.values() ) - return MemoryUsageByVariable( - { - "total_nb_bytes": total_memory_usage, - "by_variable": holders_memory_usage, - }, + return t.MemoryUsageByVariable( + total_nb_bytes=total_memory_usage, + by_variable=holders_memory_usage, ) diff --git a/openfisca_core/populations/types.py b/openfisca_core/populations/types.py index 684c5456c..0cfccef36 100644 --- a/openfisca_core/populations/types.py +++ b/openfisca_core/populations/types.py @@ -1,8 +1,8 @@ from __future__ import annotations -from collections.abc import Iterable, MutableMapping -from typing import Union -from typing_extensions import NewType, TypeAlias +from collections.abc import Iterable, MutableMapping, Sequence +from typing import NamedTuple, Union +from typing_extensions import NewType, TypeAlias, TypedDict from openfisca_core.types import ( Array, @@ -22,6 +22,9 @@ VariableName, ) +import enum + +import strenum from numpy import ( bool_ as BoolDType, float32 as FloatDType, @@ -55,13 +58,29 @@ # Populations #: Type alias for a population's holders. -Holders: TypeAlias = MutableMapping[VariableName, Holder] +HolderByVariable: TypeAlias = MutableMapping[VariableName, Holder] # TODO(Mauko Quiroga-Alvarado): I'm not sure if this type alias is correct. # https://openfisca.org/doc/coding-the-legislation/50_entities.html Members: TypeAlias = Iterable[SinglePopulation] +class Option(strenum.StrEnum): + ADD = enum.auto() + DIVIDE = enum.auto() + + +class Calculate(NamedTuple): + variable: VariableName + period: Period + option: None | Sequence[Option] + + +class MemoryUsageByVariable(TypedDict, total=False): + by_variable: dict[VariableName, MemoryUsage] + total_nb_bytes: int + + __all__ = [ "CoreEntity", "CorePopulation", diff --git a/openfisca_core/types.py b/openfisca_core/types.py index cdf0606d9..85adf7b51 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -1,9 +1,9 @@ from __future__ import annotations -from collections.abc import Iterable, Mapping, Sequence, Sized +from collections.abc import Iterable, Sequence, Sized from numpy.typing import DTypeLike, NDArray from typing import NewType, TypeVar, Union -from typing_extensions import Protocol, Self, TypeAlias, TypedDict +from typing_extensions import Protocol, Required, Self, TypeAlias, TypedDict import abc import enum @@ -133,7 +133,7 @@ def __new__( class Holder(Protocol): def clone(self, population: CorePopulation, /) -> Holder: ... - def get_memory_usage(self, /) -> Mapping[str, object]: ... + def get_memory_usage(self, /) -> MemoryUsage: ... class MemoryUsage(TypedDict, total=False): @@ -143,7 +143,7 @@ class MemoryUsage(TypedDict, total=False): nb_cells_by_array: int nb_requests: int nb_requests_by_array: int - total_nb_bytes: int + total_nb_bytes: Required[int] # Parameters