Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to latest version of metasyn #32

Merged
merged 4 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
metasyn-version: [ "git+https://github.com/sodascience/metasyn.git@main"]
include:
- python-version: "3.11"
metasyn-version: "metasyn==0.8.0"
metasyn-version: "metasyn==1.0.0"

steps:
- uses: actions/checkout@v4
Expand Down
25 changes: 25 additions & 0 deletions metasyncontrib/disclosure/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""Base class for all disclosure control distributions."""

import polars as pl
from metasyn.distribution.base import BaseDistribution


def metadist_disclosure():
"""Decorate class to create a distribution with disclosure control.
Expand All @@ -17,3 +20,25 @@ def _wrap(cls):
return cls

return _wrap


class DisclosureConstantMixin(BaseDistribution):
"""Mixin class to overload fit method for constant distributions."""

@classmethod
def fit(cls, series, *args, n_avg: int = 11, **kwargs) -> BaseDistribution:
"""Fit constant distributions with disclosure control rules in place."""
pl_series: pl.Series = cls._to_series(series)

# if unique, just get that value if it occurs at least n_avg times
if pl_series.n_unique() == 1 and pl_series.len() >= n_avg:
return cls._fit(pl_series, *args, **kwargs)

if pl_series.n_unique() > 1:
# if not unique, ensure most common value occurs at least n_avg times
_value, count = pl_series.value_counts(sort=True).row(0)
if count >= n_avg:
return cls._fit(pl_series, *args, **kwargs)

return cls.default_distribution()

66 changes: 0 additions & 66 deletions metasyncontrib/disclosure/constant.py

This file was deleted.

7 changes: 6 additions & 1 deletion metasyncontrib/disclosure/continuous.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
"""Disclosure control implementations for continuous distributions."""

from metasyn.distribution.continuous import (
ConstantDistribution,
ExponentialDistribution,
LogNormalDistribution,
NormalDistribution,
TruncatedNormalDistribution,
UniformDistribution,
)

from metasyncontrib.disclosure.base import metadist_disclosure
from metasyncontrib.disclosure.base import DisclosureConstantMixin, metadist_disclosure
from metasyncontrib.disclosure.numerical import DisclosureNumericalMixin


Expand All @@ -35,3 +36,7 @@ class DisclosureTruncatedNormal(DisclosureNumericalMixin, TruncatedNormalDistrib
@metadist_disclosure()
class DisclosureExponential(DisclosureNumericalMixin, ExponentialDistribution):
"""Disclosure exponential distribution."""

@metadist_disclosure()
class DisclosureConstant(DisclosureConstantMixin, ConstantDistribution):
"""Disclosure controlled ConstantDistribution."""
19 changes: 18 additions & 1 deletion metasyncontrib/disclosure/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@

import polars as pl
from metasyn.distribution.datetime import (
DateConstantDistribution,
DateTimeConstantDistribution,
DateTimeUniformDistribution,
DateUniformDistribution,
TimeConstantDistribution,
TimeUniformDistribution,
)

from metasyncontrib.disclosure.base import metadist_disclosure
from metasyncontrib.disclosure.base import DisclosureConstantMixin, metadist_disclosure

# from metasyncontrib.disclosure.base import BaseDisclosureDistribution
from metasyncontrib.disclosure.utils import micro_aggregate
Expand Down Expand Up @@ -56,3 +59,17 @@ def _fit(cls, values: pl.Series, n_avg: int = 11) -> DisclosureDate:
# Convert back into dates
sub_series = pl.Series([dt_val.date() for dt_val in dt_sub_series])
return cls(sub_series.min(), sub_series.max())

@metadist_disclosure()
class DisclosureDateTimeConstant(DisclosureConstantMixin, DateTimeConstantDistribution):
"""Disclosure controlled DateTimeConstantDistribution."""


@metadist_disclosure()
class DisclosureTimeConstant(DisclosureConstantMixin, TimeConstantDistribution):
"""Disclosure controlled TimeConstantDistribution."""


@metadist_disclosure()
class DisclosureDateConstant(DisclosureConstantMixin, DateConstantDistribution):
"""Disclosure controlled DateConstantDistribution."""
7 changes: 6 additions & 1 deletion metasyncontrib/disclosure/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@

import polars as pl
from metasyn.distribution.discrete import (
DiscreteConstantDistribution,
DiscreteNormalDistribution,
DiscreteTruncatedNormalDistribution,
DiscreteUniformDistribution,
PoissonDistribution,
UniqueKeyDistribution,
)

from metasyncontrib.disclosure.base import metadist_disclosure
from metasyncontrib.disclosure.base import DisclosureConstantMixin, metadist_disclosure
from metasyncontrib.disclosure.numerical import DisclosureNumericalMixin
from metasyncontrib.disclosure.utils import micro_aggregate

Expand Down Expand Up @@ -49,3 +50,7 @@ def _fit(cls, values: pl.Series, n_avg: int = 11):
return cls(0, True)
sub_values = micro_aggregate(values, n_avg)
return super()._fit(sub_values)

@metadist_disclosure()
class DisclosureDiscreteConstant(DisclosureConstantMixin, DiscreteConstantDistribution):
"""Disclosure controlled DiscreteConstantDistribution."""
24 changes: 13 additions & 11 deletions metasyncontrib/disclosure/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,37 @@
from metasyn.provider import BaseDistributionProvider

from metasyncontrib.disclosure.categorical import DisclosureMultinoulli
from metasyncontrib.disclosure.constant import (
DisclosureConstant,
DisclosureDateConstant,
DisclosureDateTimeConstant,
DisclosureDiscreteConstant,
DisclosureStringConstant,
DisclosureTimeConstant,
)
from metasyncontrib.disclosure.continuous import (
DisclosureConstant,
DisclosureExponential,
DisclosureLogNormal,
DisclosureNormal,
DisclosureTruncatedNormal,
DisclosureUniform,
)
from metasyncontrib.disclosure.datetime import DisclosureDate, DisclosureDateTime, DisclosureTime
from metasyncontrib.disclosure.datetime import (
DisclosureDate,
DisclosureDateConstant,
DisclosureDateTime,
DisclosureDateTimeConstant,
DisclosureTime,
DisclosureTimeConstant,
)
from metasyncontrib.disclosure.discrete import (
DisclosureDiscreteConstant,
DisclosureDiscreteNormal,
DisclosureDiscreteTruncatedNormal,
DisclosureDiscreteUniform,
DisclosurePoisson,
DisclosureUniqueKey,
)
from metasyncontrib.disclosure.faker import (
from metasyncontrib.disclosure.na import DisclosureNA
from metasyncontrib.disclosure.string import (
DisclosureFaker,
DisclosureFreetext,
DisclosureStringConstant,
DisclosureUniqueFaker,
)
from metasyncontrib.disclosure.na import DisclosureNA


class DisclosureProvider(BaseDistributionProvider):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Module for disclosure control for string distributions."""

from metasyn.distribution.faker import (
from metasyn.distribution.string import (
FakerDistribution,
FreeTextDistribution,
StringConstantDistribution,
UniqueFakerDistribution,
)

from metasyncontrib.disclosure.base import metadist_disclosure
from metasyncontrib.disclosure.base import DisclosureConstantMixin, metadist_disclosure


@metadist_disclosure()
Expand Down Expand Up @@ -34,3 +35,7 @@ class DisclosureFreetext(FreeTextDistribution):
@classmethod
def _fit(cls, values, max_values: int = 50, n_avg: int = 11): # pylint: disable=unused-argument
return super()._fit(values, max_values=max_values)

@metadist_disclosure()
class DisclosureStringConstant(DisclosureConstantMixin, StringConstantDistribution):
"""Disclosure controlled StringConstantDistribution."""
16 changes: 8 additions & 8 deletions tests/test_constant.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
from metasyn.distribution.constant import (
ConstantDistribution,
from metasyn.distribution.continuous import ConstantDistribution
from metasyn.distribution.datetime import (
DateConstantDistribution,
DateTimeConstantDistribution,
DiscreteConstantDistribution,
StringConstantDistribution,
TimeConstantDistribution,
)
from metasyn.distribution.discrete import DiscreteConstantDistribution
from metasyn.distribution.string import StringConstantDistribution
from pytest import mark

from metasyncontrib.disclosure.constant import (
DisclosureConstant,
from metasyncontrib.disclosure.continuous import DisclosureConstant
from metasyncontrib.disclosure.datetime import (
DisclosureDateConstant,
DisclosureDateTimeConstant,
DisclosureDiscreteConstant,
DisclosureStringConstant,
DisclosureTimeConstant,
)
from metasyncontrib.disclosure.discrete import DisclosureDiscreteConstant
from metasyncontrib.disclosure.string import DisclosureStringConstant


@mark.parametrize(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_other_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from metasyncontrib.disclosure.categorical import DisclosureMultinoulli
from metasyncontrib.disclosure.datetime import DisclosureDate, DisclosureDateTime, DisclosureTime
from metasyncontrib.disclosure.faker import DisclosureFaker
from metasyncontrib.disclosure.string import DisclosureFaker


@mark.parametrize(
Expand Down