Skip to content

Commit

Permalink
Update to latest version of metasyn (#32)
Browse files Browse the repository at this point in the history
  • Loading branch information
qubixes authored May 27, 2024
1 parent 6205789 commit 64f8ddb
Show file tree
Hide file tree
Showing 10 changed files with 85 additions and 92 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
metasyn-version: [ "git+https://github.com/sodascience/metasyn.git@main"]
include:
- python-version: "3.11"
metasyn-version: "metasyn==0.8.0"
metasyn-version: "metasyn==1.0.0"

steps:
- uses: actions/checkout@v4
Expand Down
25 changes: 25 additions & 0 deletions metasyncontrib/disclosure/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""Base class for all disclosure control distributions."""

import polars as pl
from metasyn.distribution.base import BaseDistribution


def metadist_disclosure():
"""Decorate class to create a distribution with disclosure control.
Expand All @@ -17,3 +20,25 @@ def _wrap(cls):
return cls

return _wrap


class DisclosureConstantMixin(BaseDistribution):
"""Mixin class to overload fit method for constant distributions."""

@classmethod
def fit(cls, series, *args, n_avg: int = 11, **kwargs) -> BaseDistribution:
"""Fit constant distributions with disclosure control rules in place."""
pl_series: pl.Series = cls._to_series(series)

# if unique, just get that value if it occurs at least n_avg times
if pl_series.n_unique() == 1 and pl_series.len() >= n_avg:
return cls._fit(pl_series, *args, **kwargs)

if pl_series.n_unique() > 1:
# if not unique, ensure most common value occurs at least n_avg times
_value, count = pl_series.value_counts(sort=True).row(0)
if count >= n_avg:
return cls._fit(pl_series, *args, **kwargs)

return cls.default_distribution()

66 changes: 0 additions & 66 deletions metasyncontrib/disclosure/constant.py

This file was deleted.

7 changes: 6 additions & 1 deletion metasyncontrib/disclosure/continuous.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
"""Disclosure control implementations for continuous distributions."""

from metasyn.distribution.continuous import (
ConstantDistribution,
ExponentialDistribution,
LogNormalDistribution,
NormalDistribution,
TruncatedNormalDistribution,
UniformDistribution,
)

from metasyncontrib.disclosure.base import metadist_disclosure
from metasyncontrib.disclosure.base import DisclosureConstantMixin, metadist_disclosure
from metasyncontrib.disclosure.numerical import DisclosureNumericalMixin


Expand All @@ -35,3 +36,7 @@ class DisclosureTruncatedNormal(DisclosureNumericalMixin, TruncatedNormalDistrib
@metadist_disclosure()
class DisclosureExponential(DisclosureNumericalMixin, ExponentialDistribution):
"""Disclosure exponential distribution."""

@metadist_disclosure()
class DisclosureConstant(DisclosureConstantMixin, ConstantDistribution):
"""Disclosure controlled ConstantDistribution."""
19 changes: 18 additions & 1 deletion metasyncontrib/disclosure/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@

import polars as pl
from metasyn.distribution.datetime import (
DateConstantDistribution,
DateTimeConstantDistribution,
DateTimeUniformDistribution,
DateUniformDistribution,
TimeConstantDistribution,
TimeUniformDistribution,
)

from metasyncontrib.disclosure.base import metadist_disclosure
from metasyncontrib.disclosure.base import DisclosureConstantMixin, metadist_disclosure

# from metasyncontrib.disclosure.base import BaseDisclosureDistribution
from metasyncontrib.disclosure.utils import micro_aggregate
Expand Down Expand Up @@ -56,3 +59,17 @@ def _fit(cls, values: pl.Series, n_avg: int = 11) -> DisclosureDate:
# Convert back into dates
sub_series = pl.Series([dt_val.date() for dt_val in dt_sub_series])
return cls(sub_series.min(), sub_series.max())

@metadist_disclosure()
class DisclosureDateTimeConstant(DisclosureConstantMixin, DateTimeConstantDistribution):
"""Disclosure controlled DateTimeConstantDistribution."""


@metadist_disclosure()
class DisclosureTimeConstant(DisclosureConstantMixin, TimeConstantDistribution):
"""Disclosure controlled TimeConstantDistribution."""


@metadist_disclosure()
class DisclosureDateConstant(DisclosureConstantMixin, DateConstantDistribution):
"""Disclosure controlled DateConstantDistribution."""
7 changes: 6 additions & 1 deletion metasyncontrib/disclosure/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@

import polars as pl
from metasyn.distribution.discrete import (
DiscreteConstantDistribution,
DiscreteNormalDistribution,
DiscreteTruncatedNormalDistribution,
DiscreteUniformDistribution,
PoissonDistribution,
UniqueKeyDistribution,
)

from metasyncontrib.disclosure.base import metadist_disclosure
from metasyncontrib.disclosure.base import DisclosureConstantMixin, metadist_disclosure
from metasyncontrib.disclosure.numerical import DisclosureNumericalMixin
from metasyncontrib.disclosure.utils import micro_aggregate

Expand Down Expand Up @@ -49,3 +50,7 @@ def _fit(cls, values: pl.Series, n_avg: int = 11):
return cls(0, True)
sub_values = micro_aggregate(values, n_avg)
return super()._fit(sub_values)

@metadist_disclosure()
class DisclosureDiscreteConstant(DisclosureConstantMixin, DiscreteConstantDistribution):
"""Disclosure controlled DiscreteConstantDistribution."""
24 changes: 13 additions & 11 deletions metasyncontrib/disclosure/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,37 @@
from metasyn.provider import BaseDistributionProvider

from metasyncontrib.disclosure.categorical import DisclosureMultinoulli
from metasyncontrib.disclosure.constant import (
DisclosureConstant,
DisclosureDateConstant,
DisclosureDateTimeConstant,
DisclosureDiscreteConstant,
DisclosureStringConstant,
DisclosureTimeConstant,
)
from metasyncontrib.disclosure.continuous import (
DisclosureConstant,
DisclosureExponential,
DisclosureLogNormal,
DisclosureNormal,
DisclosureTruncatedNormal,
DisclosureUniform,
)
from metasyncontrib.disclosure.datetime import DisclosureDate, DisclosureDateTime, DisclosureTime
from metasyncontrib.disclosure.datetime import (
DisclosureDate,
DisclosureDateConstant,
DisclosureDateTime,
DisclosureDateTimeConstant,
DisclosureTime,
DisclosureTimeConstant,
)
from metasyncontrib.disclosure.discrete import (
DisclosureDiscreteConstant,
DisclosureDiscreteNormal,
DisclosureDiscreteTruncatedNormal,
DisclosureDiscreteUniform,
DisclosurePoisson,
DisclosureUniqueKey,
)
from metasyncontrib.disclosure.faker import (
from metasyncontrib.disclosure.na import DisclosureNA
from metasyncontrib.disclosure.string import (
DisclosureFaker,
DisclosureFreetext,
DisclosureStringConstant,
DisclosureUniqueFaker,
)
from metasyncontrib.disclosure.na import DisclosureNA


class DisclosureProvider(BaseDistributionProvider):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Module for disclosure control for string distributions."""

from metasyn.distribution.faker import (
from metasyn.distribution.string import (
FakerDistribution,
FreeTextDistribution,
StringConstantDistribution,
UniqueFakerDistribution,
)

from metasyncontrib.disclosure.base import metadist_disclosure
from metasyncontrib.disclosure.base import DisclosureConstantMixin, metadist_disclosure


@metadist_disclosure()
Expand Down Expand Up @@ -34,3 +35,7 @@ class DisclosureFreetext(FreeTextDistribution):
@classmethod
def _fit(cls, values, max_values: int = 50, n_avg: int = 11): # pylint: disable=unused-argument
return super()._fit(values, max_values=max_values)

@metadist_disclosure()
class DisclosureStringConstant(DisclosureConstantMixin, StringConstantDistribution):
"""Disclosure controlled StringConstantDistribution."""
16 changes: 8 additions & 8 deletions tests/test_constant.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
from metasyn.distribution.constant import (
ConstantDistribution,
from metasyn.distribution.continuous import ConstantDistribution
from metasyn.distribution.datetime import (
DateConstantDistribution,
DateTimeConstantDistribution,
DiscreteConstantDistribution,
StringConstantDistribution,
TimeConstantDistribution,
)
from metasyn.distribution.discrete import DiscreteConstantDistribution
from metasyn.distribution.string import StringConstantDistribution
from pytest import mark

from metasyncontrib.disclosure.constant import (
DisclosureConstant,
from metasyncontrib.disclosure.continuous import DisclosureConstant
from metasyncontrib.disclosure.datetime import (
DisclosureDateConstant,
DisclosureDateTimeConstant,
DisclosureDiscreteConstant,
DisclosureStringConstant,
DisclosureTimeConstant,
)
from metasyncontrib.disclosure.discrete import DisclosureDiscreteConstant
from metasyncontrib.disclosure.string import DisclosureStringConstant


@mark.parametrize(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_other_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from metasyncontrib.disclosure.categorical import DisclosureMultinoulli
from metasyncontrib.disclosure.datetime import DisclosureDate, DisclosureDateTime, DisclosureTime
from metasyncontrib.disclosure.faker import DisclosureFaker
from metasyncontrib.disclosure.string import DisclosureFaker


@mark.parametrize(
Expand Down

0 comments on commit 64f8ddb

Please sign in to comment.