diff --git a/docs/extending.md b/docs/extending.md index 48798da5..5f1dd94b 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -413,7 +413,8 @@ use `context.evaluate_raw()` instead of `context.evaluate()`. Plugins that require "memory" or "state" are possible using `PluginResult` objects or subclasses. Consider a plugin that generates child objects -that include values that sum up values on child objects to a value specified on a parent: +that include values that sum up values on child objects to a value specified on a parent (similar to a simple version +of `Math.random_partition`): ```yaml # examples/sum_child_values.yml diff --git a/docs/index.md b/docs/index.md index c2186b71..af778e06 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1861,6 +1861,108 @@ Or: twelve: ${Math.sqrt} ``` +#### Rolling up numbers: `Math.random_partition` + +Sometimes you want a parent object to have a field value which +is the sum of many child values. Snowfakery allow you to +specify or randomly generate the parent sum value and then +it will generate an appropriate number of children with +values that sum up to match it, using `Math.random_partition`: + +```yaml +# examples/math_partition/math_partition_simple.recipe.yml +- plugin: snowfakery.standard_plugins.Math +- object: ParentObject__c + count: 2 + fields: + TotalAmount__c: + random_number: + min: 30 + max: 90 + friends: + - object: ChildObject__c + for_each: + var: child_value + value: + Math.random_partition: + total: ${{ParentObject__c.TotalAmount__c}} + fields: + Amount__c: ${{child_value}} +``` + +The `Math.random_partition` function splits up a number. +So this recipe might spit out the following +set of parents and children: + +```json +ParentObject__c(id=1, TotalAmount__c=40) +ChildObject__c(id=1, Amount__c=3) +ChildObject__c(id=2, Amount__c=1) +ChildObject__c(id=3, Amount__c=24) +ChildObject__c(id=4, Amount__c=12) +ParentObject__c(id=2, TotalAmount__c=83) +ChildObject__c(id=5, Amount__c=2) +ChildObject__c(id=6, Amount__c=81) +``` + +There are 2 Parent objects created and a random number of +children per parent. + +The `Math.random_partition`function takes argument +`min`, which is the smallest +value each part can have, `max`, which is the largest +possible value, `total` which is what all of the values +sum up to and `step` which is a number that each value +must have as a factor. E.g. if `step` is `4` then +values of `4`, `8`, `12` are valid. + +For example: + +```yaml +# examples/math_partition/sum_simple_example.recipe.yml +- plugin: snowfakery.standard_plugins.Math + +- object: Values + for_each: + var: current_value + value: + Math.random_partition: + total: 100 + min: 10 + max: 50 + step: 5 + fields: + Amount: ${{current_value}} +``` + +Which might generate `15,15,25,20,15,10` or `50,50` or `25,50,25`. + +If `step` is a number smaller then `1`, then you can generate +pennies for numeric calculations. Valid values are `0.01` (penny +granularity), `0.05` (nickle), `0.10` (dime), `0.25` (quarter) and +`0.50` (half dollars). Other values are not supported. + +```yaml +# examples/math_partition/sum_pennies.recipe.yml +- plugin: snowfakery.standard_plugins.Math + +- object: Values + for_each: + var: current_value + value: + Math.random_partition: + total: 100 + min: 10 + max: 50 + step: 0.01 + fields: + Amount: ${{current_value}} +``` + +It is possible to specify values which are inconsistent. +When that happens one of the constraints will be +violated. + ### Advanced Unique IDs with the UniqueId plugin There is a plugin which gives you more control over the generation of diff --git a/examples/math_partition/math_partition_simple.recipe.yml b/examples/math_partition/math_partition_simple.recipe.yml new file mode 100644 index 00000000..29f4b59c --- /dev/null +++ b/examples/math_partition/math_partition_simple.recipe.yml @@ -0,0 +1,17 @@ +- plugin: snowfakery.standard_plugins.Math +- object: ParentObject__c + count: 2 + fields: + TotalAmount__c: + random_number: + min: 30 + max: 90 + friends: + - object: ChildObject__c + for_each: + var: child_value + value: + Math.random_partition: + total: ${{ParentObject__c.TotalAmount__c}} + fields: + Amount__c: ${{child_value}} diff --git a/examples/math_partition/sum_pennies.recipe.yml b/examples/math_partition/sum_pennies.recipe.yml new file mode 100644 index 00000000..6b61e792 --- /dev/null +++ b/examples/math_partition/sum_pennies.recipe.yml @@ -0,0 +1,13 @@ +- plugin: snowfakery.standard_plugins.Math + +- object: Values + for_each: + var: current_value + value: + Math.random_partition: + total: 100 + min: 10 + max: 50 + step: 0.01 + fields: + Amount: ${{current_value}} diff --git a/examples/math_partition/sum_pennies_param.recipe.yml b/examples/math_partition/sum_pennies_param.recipe.yml new file mode 100644 index 00000000..e9498037 --- /dev/null +++ b/examples/math_partition/sum_pennies_param.recipe.yml @@ -0,0 +1,15 @@ +- plugin: snowfakery.standard_plugins.Math +- option: step + default: 0.01 + +- object: Values + for_each: + var: current_value + value: + Math.random_partition: + total: 100 + min: 10 + max: 50 + step: ${{step}} + fields: + Amount: ${{current_value}} diff --git a/examples/math_partition/sum_simple_example.recipe.yml b/examples/math_partition/sum_simple_example.recipe.yml new file mode 100644 index 00000000..2301b66f --- /dev/null +++ b/examples/math_partition/sum_simple_example.recipe.yml @@ -0,0 +1,13 @@ +- plugin: snowfakery.standard_plugins.Math + +- object: Values + for_each: + var: current_value + value: + Math.random_partition: + total: 100 + min: 10 + max: 50 + step: 5 + fields: + Amount: ${{current_value}} diff --git a/examples/math_partition/test_bad_step.recipe.yml b/examples/math_partition/test_bad_step.recipe.yml new file mode 100644 index 00000000..42465eb8 --- /dev/null +++ b/examples/math_partition/test_bad_step.recipe.yml @@ -0,0 +1,10 @@ +- plugin: snowfakery.standard_plugins.Math +- object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 28 + step: 0.3 + fields: + Amount: ${{child_value}} diff --git a/examples/sum_plugin_example.yml b/examples/sum_plugin_example.yml new file mode 100644 index 00000000..303d6a59 --- /dev/null +++ b/examples/sum_plugin_example.yml @@ -0,0 +1,25 @@ +# This shows how you could create a plugin or feature where +# a parent object generates child objects which sum up +# to any particular value. + +- plugin: examples.sum_totals.SummationPlugin +- var: summation_helper + value: + SummationPlugin.summer: + total: 100 + step: 10 + +- object: ParentObject__c + count: 10 + fields: + MinimumChildObjectAmount__c: 10 + MinimumStep: 5 + TotalAmount__c: ${{summation_helper.total}} + friends: + - object: ChildObject__c + count: ${{summation_helper.count}} + fields: + Parent__c: + reference: ParentObject__c + Amount__c: ${{summation_helper.next_amount}} + RunningTotal__c: ${{summation_helper.running_total}} diff --git a/schema/snowfakery_recipe.jsonschema.json b/schema/snowfakery_recipe.jsonschema.json index acc5902d..e81c6fee 100644 --- a/schema/snowfakery_recipe.jsonschema.json +++ b/schema/snowfakery_recipe.jsonschema.json @@ -61,6 +61,14 @@ } ] }, + "for_each": { + "type": "object", + "anyOf": [ + { + "$ref": "#/$defs/var" + } + ] + }, "fields": { "type": "object", "additionalProperties": true diff --git a/snowfakery/api.py b/snowfakery/api.py index 7b76ac82..442d7294 100644 --- a/snowfakery/api.py +++ b/snowfakery/api.py @@ -151,6 +151,7 @@ def generate_data( update_passthrough_fields: T.Sequence[ str ] = (), # pass through these fields from input to output + seed: T.Optional[int] = None, ) -> None: stopping_criteria = stopping_criteria_from_target_number(target_number) dburls = dburls or ([dburl] if dburl else []) @@ -193,6 +194,7 @@ def open_with_cleanup(file, mode, **kwargs): plugin_options=plugin_options, update_input_file=open_update_input_file, update_passthrough_fields=update_passthrough_fields, + seed=seed, ) if open_cci_mapping_file: diff --git a/snowfakery/data_generator.py b/snowfakery/data_generator.py index 53c1b6a6..864572b5 100644 --- a/snowfakery/data_generator.py +++ b/snowfakery/data_generator.py @@ -131,6 +131,7 @@ def generate( plugin_options: dict = None, update_input_file: OpenFileLike = None, update_passthrough_fields: T.Sequence[str] = (), + seed: T.Optional[int] = None, ) -> ExecutionSummary: """The main entry point to the package for Python applications.""" from .api import SnowfakeryApplication @@ -188,6 +189,7 @@ def generate( parse_result=parse_result, globals=globls, continuing=bool(continuation_data), + seed=seed, ) as interpreter: runtime_context = interpreter.execute() diff --git a/snowfakery/data_generator_runtime.py b/snowfakery/data_generator_runtime.py index aa8ea9f0..618f4daa 100644 --- a/snowfakery/data_generator_runtime.py +++ b/snowfakery/data_generator_runtime.py @@ -3,6 +3,7 @@ from collections import defaultdict, ChainMap from datetime import date, datetime, timezone from contextlib import contextmanager +from random import Random from typing import Optional, Dict, Sequence, Mapping, NamedTuple, Set import typing as T @@ -300,6 +301,7 @@ def __init__( snowfakery_plugins: Optional[Mapping[str, callable]] = None, faker_providers: Sequence[object] = (), continuing=False, + seed: Optional[int] = None, ): self.output_stream = output_stream self.options = options or {} @@ -354,6 +356,7 @@ def __init__( self.globals.nicknames_and_tables, ) self.resave_objects_from_continuation(globals, self.tables_to_keep_history_for) + self.random_number_generator = Random(seed) def resave_objects_from_continuation( self, globals: Globals, tables_to_keep_history_for: T.Iterable[str] diff --git a/snowfakery/plugins.py b/snowfakery/plugins.py index 29766d94..89e33543 100644 --- a/snowfakery/plugins.py +++ b/snowfakery/plugins.py @@ -1,3 +1,4 @@ +from random import Random import sys from typing import Any, Callable, Mapping, Union, NamedTuple, List, Tuple @@ -141,8 +142,8 @@ def current_filename(self): return self.interpreter.current_context.current_template.filename @property - def current_filename(self): - return self.interpreter.current_context.current_template.filename + def random_number_generator(self) -> Random: + return self.interpreter.random_number_generator def lazy(func: Any) -> Callable: diff --git a/snowfakery/standard_plugins/_math.py b/snowfakery/standard_plugins/_math.py index 9af57125..0359144c 100644 --- a/snowfakery/standard_plugins/_math.py +++ b/snowfakery/standard_plugins/_math.py @@ -1,20 +1,149 @@ import math -from snowfakery.plugins import SnowfakeryPlugin +from random import Random +from types import SimpleNamespace +from typing import List, Optional, Union +from snowfakery.plugins import SnowfakeryPlugin, memorable, PluginResultIterator class Math(SnowfakeryPlugin): def custom_functions(self, *args, **kwargs): "Expose math functions to Snowfakery" - class MathNamespace: - pass + class MathNamespace(SimpleNamespace): + @memorable + def random_partition( + self, + total: int, + *, + min: int = 1, + max: Optional[int] = None, + step: float = 1, + ): + random = self.context.random_number_generator + return GenericPluginResultIterator( + False, parts(total, min, max, step, random) + ) mathns = MathNamespace() - mathns.__dict__ = math.__dict__.copy() + mathns.__dict__.update(math.__dict__.copy()) mathns.pi = math.pi mathns.round = round mathns.min = min mathns.max = max - + mathns.context = self.context return mathns + + +class GenericPluginResultIterator(PluginResultIterator): + def __init__(self, repeat, iterable): + super().__init__(repeat) + self.next = iter(iterable).__next__ + + +def parts( + user_total: int, + user_min: int = 1, + user_max: Optional[int] = None, + user_step: float = 1, + rand: Optional[Random] = None, +) -> List[Union[int, float]]: + """Split a number into a randomized set of 'pieces'. + The pieces add up to the `total`. E.g. + + parts(12) -> [3, 6, 3] + parts(16) -> [8, 4, 2, 2] + + The numbers generated will never be less than `min_`, if provided. + + The numbers generated will never be less than `max_`, if provided. + + The numbers generated will always be a multiple of `step`, if provided. + + But...if you provide inconsistent constraints then your values + will be inconsistent with them. e.g. if `total` is not a multiple + of `step`. + """ + max_ = user_max or user_total + rand = rand or Random() + + if user_step < 1: + allowed_steps = [0.01, 0.5, 0.1, 0.20, 0.25, 0.50] + assert ( + user_step in allowed_steps + ), f"`step` must be one of {', '.join(str(f) for f in allowed_steps)}, not {user_step}" + # multiply up into the integer range so we don't need to do float math + total = int(user_total / user_step) + step = 1 + min_ = int(user_min / user_step) + max_ = int(max_ / user_step) + else: + step = int(user_step) + min_ = user_min + total = user_total + assert step == user_step, f"`step` should be an integer, not {step}" + + pieces = [] + + while sum(pieces) < total: + remaining = total - sum(pieces) + smallest = max(min_, step) + if remaining < smallest: + # mutates pieces + success = handle_last_bit(pieces, rand, remaining, min_, max_) + # our constraints must have been impossible to fulfill + assert ( + success + ), f"No way to match all constraints: total: {user_total}, min: {user_min}, max: {user_max}, step: {user_step}" + + else: + pieces.append(generate_piece(rand, smallest, remaining, max_, step)) + + assert sum(pieces) == total, pieces + assert 0 not in pieces, pieces + + if user_step != step: + pieces = [round(p * user_step, 2) for p in pieces] + return pieces + + +def handle_last_bit( + pieces: List[int], rand: Random, remaining: int, min_: int, max_: int +) -> bool: + """If the piece is big enough, add it. + Otherwise, try to add it to another piece.""" + + if remaining > min_: + pos = rand.randint(0, len(pieces)) + pieces.insert(pos, remaining) + return True + + # try to add it to some other piece + for i, val in enumerate(pieces): + if val + remaining <= max_: + pieces[i] += remaining + remaining = 0 + return True + + # No other piece has enough room...so + # split it up among several other pieces + for i, val in enumerate(pieces): + chunk = min(max_ - pieces[i], remaining) + remaining -= chunk + pieces[i] = max_ + assert remaining >= 0 + if remaining == 0: + return True + + return False + + +def generate_piece(rand: Random, smallest: int, remaining: int, max_: int, step: int): + part = rand.randint(smallest, min(remaining, max_)) + round_up = part + step - (part % step) + if round_up <= min(remaining, max_) and rand.randint(0, 1): + part = round_up + else: + part -= part % step + + return part diff --git a/tests/test_math_partition.py b/tests/test_math_partition.py new file mode 100644 index 00000000..c83c1f27 --- /dev/null +++ b/tests/test_math_partition.py @@ -0,0 +1,175 @@ +import pytest +from random import randint +from io import StringIO +from snowfakery import generate_data +from snowfakery.data_gen_exceptions import DataGenError + +REPS = 1 +SEEDS = [randint(0, 2 ** 32) for r in range(REPS)] + + +@pytest.mark.parametrize("seed", SEEDS) +class TestMathPartition: + def test_example(self, generated_rows, seed): + generate_data( + "examples/math_partition/math_partition_simple.recipe.yml", seed=seed + ) + parents = generated_rows.table_values("ParentObject__c") + children = generated_rows.table_values("ChildObject__c") + assert sum(p["TotalAmount__c"] for p in parents) == sum( + c["Amount__c"] for c in children + ), (parents, children) + + regression_seeds = [824956277] + + @pytest.mark.parametrize("seed2", regression_seeds + SEEDS) + def test_example_pennies(self, generated_rows, seed, seed2): + generate_data("examples/math_partition/sum_pennies.recipe.yml", seed=seed2) + objs = generated_rows.table_values("Values") + assert round(sum(p["Amount"] for p in objs)) == 100, sum( + p["Amount"] for p in objs + ) + + @pytest.mark.parametrize("step", [0.01, 0.5, 0.1, 0.20, 0.25, 0.50]) + def test_example_pennies_param(self, generated_rows, seed, step: int): + generate_data( + "examples/math_partition/sum_pennies_param.recipe.yml", + user_options={"step": step}, + seed=seed, + ) + objs = generated_rows.table_values("Values") + assert round(sum(p["Amount"] for p in objs)) == 100, sum( + p["Amount"] for p in objs + ) + + def test_step(self, generated_rows, seed): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 60 + step: 10 + fields: + Amount: ${{child_value}} + """ + generate_data(StringIO(yaml), seed=seed) + values = generated_rows.table_values("Obj") + assert 1 <= len(values) <= 6 + amounts = [r["Amount"] for r in values] + assert sum(amounts) == 60, amounts + assert sum([r % 10 for r in amounts]) == 0, amounts + + def test_min(self, generated_rows, seed): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 60 + min: 5 + fields: + Amount: ${{child_value}} + """ + generate_data(StringIO(yaml), seed=seed) + values = generated_rows.table_values("Obj") + results = [r["Amount"] for r in values] + assert sum(results) == 60, results + assert not [r for r in results if r < 5], results + + def test_min_not_factor_of_total(self, generated_rows, seed): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 63 + min: 5 + fields: + Amount: ${{child_value}} + """ + generate_data(StringIO(yaml), seed=seed) + values = generated_rows.table_values("Obj") + results = [r["Amount"] for r in values] + assert sum(results) == 63 + assert not [r for r in results if r < 5], results + + def test_step_not_factor_of_total(self, generated_rows, seed): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 63 + step: 5 + fields: + Amount: ${{child_value}} + """ + generate_data(StringIO(yaml), seed=seed) + values = generated_rows.table_values("Obj") + results = [r["Amount"] for r in values] + assert sum(results) == 63, results + assert len([r for r in results if r < 5]) <= 1, results + + def test_max(self, generated_rows, seed): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 28 + step: 2 + max: 6 + fields: + Amount: ${{child_value}} + """ + generate_data(StringIO(yaml), seed=seed) + values = generated_rows.table_values("Obj") + results = [r["Amount"] for r in values] + assert sum(results) == 28, results + assert not [r for r in results if r % 2], results + assert not [r for r in results if r > 6], results + + def test_bad_step(self, generated_rows, seed): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 28 + step: 0.3 + fields: + Amount: ${{child_value}} + """ + with pytest.raises(DataGenError, match="step.*0.3"): + generate_data(StringIO(yaml), seed=seed) + + def test_inconsistent_constraints(self, generated_rows, seed): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 10 + min: 8 + max: 8 + step: 5 + fields: + Amount: ${{child_value}} + """ + with pytest.raises(DataGenError, match="constraints"): + generate_data(StringIO(yaml), seed=seed)