Skip to content

Commit

Permalink
chore: ensure pre-commit run --all-files runs (#227)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored Dec 16, 2024
1 parent 9719f5e commit c12f161
Show file tree
Hide file tree
Showing 11 changed files with 142 additions and 85 deletions.
2 changes: 1 addition & 1 deletion docsite/docs/assets/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

.jp-Collapser {
--jp-cell-collapser-width: 0px;
}
}
36 changes: 22 additions & 14 deletions docsite/docs/guides/contrasts.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,17 @@
}
],
"source": [
"from pandas import Categorical, DataFrame\n",
"\n",
"from formulaic import model_matrix\n",
"from pandas import DataFrame, Categorical\n",
"\n",
"df = DataFrame({\n",
" \"letters\": [\"a\", \"b\", \"c\"],\n",
" \"numbers\": Categorical([1,2,3]),\n",
" \"values\": [20, 200, 30],\n",
"})\n",
"df = DataFrame(\n",
" {\n",
" \"letters\": [\"a\", \"b\", \"c\"],\n",
" \"numbers\": Categorical([1, 2, 3]),\n",
" \"values\": [20, 200, 30],\n",
" }\n",
")\n",
"\n",
"model_matrix(\"letters + numbers + values\", df)"
]
Expand Down Expand Up @@ -1220,13 +1223,16 @@
],
"source": [
"import numpy\n",
"Z = numpy.array([\n",
" [1, 0, 0, 0], # A\n",
" [-1, 1, 0, 0], # B - A\n",
" [0, -1, 1, 0], # C - B\n",
" [-1, 0, 0, 1], # D - A\n",
"])\n",
"coding = numpy.linalg.inv(Z)[:,1:]\n",
"\n",
"Z = numpy.array(\n",
" [\n",
" [1, 0, 0, 0], # A\n",
" [-1, 1, 0, 0], # B - A\n",
" [0, -1, 1, 0], # C - B\n",
" [-1, 0, 0, 1], # D - A\n",
" ]\n",
")\n",
"coding = numpy.linalg.inv(Z)[:, 1:]\n",
"coding"
]
},
Expand Down Expand Up @@ -1315,7 +1321,9 @@
}
],
"source": [
"model_matrix(\"C(letters, contr.custom(coding))\", DataFrame({\"letters\": [\"A\", \"B\", \"C\", \"D\"]}))"
"model_matrix(\n",
" \"C(letters, contr.custom(coding))\", DataFrame({\"letters\": [\"A\", \"B\", \"C\", \"D\"]})\n",
")"
]
},
{
Expand Down
37 changes: 24 additions & 13 deletions docsite/docs/guides/formulae.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,13 @@
"source": [
"from formulaic.parser.types import Factor\n",
"\n",
"Factor(\"1\", eval_method=\"literal\") # a factor that represents the numerical constant of 1\n",
"Factor(\n",
" \"1\", eval_method=\"literal\"\n",
") # a factor that represents the numerical constant of 1\n",
"Factor(\"a\") # a factor that will be looked up from the data context\n",
"Factor(\"a + b\", eval_method=\"python\") # a factor that will return the sum of `a` and `b`"
"Factor(\n",
" \"a + b\", eval_method=\"python\"\n",
") # a factor that will return the sum of `a` and `b`"
]
},
{
Expand Down Expand Up @@ -179,10 +183,13 @@
"from formulaic import Formula\n",
"\n",
"# Unstructured formula (a simple list of terms)\n",
"Formula([\n",
" Term(factors=[Factor(\"c\"), Factor(\"d\"), Factor(\"e\")]),\n",
" Term(factors=[Factor(\"a\"), Factor(\"b\")]),\n",
"])"
"f = Formula(\n",
" [\n",
" Term(factors=[Factor(\"c\"), Factor(\"d\"), Factor(\"e\")]),\n",
" Term(factors=[Factor(\"a\"), Factor(\"b\")]),\n",
" ]\n",
")\n",
"f"
]
},
{
Expand Down Expand Up @@ -210,7 +217,7 @@
}
],
"source": [
"type(_), list(_)"
"type(f), list(f)"
]
},
{
Expand Down Expand Up @@ -266,7 +273,7 @@
" really_nested=[\n",
" Term(factors=[Factor(\"really_nested_col\")]),\n",
" ],\n",
" )\n",
" ),\n",
")\n",
"f"
]
Expand Down Expand Up @@ -295,7 +302,7 @@
}
],
"source": [
"type(_)"
"type(f)"
]
},
{
Expand Down Expand Up @@ -484,8 +491,9 @@
"[\n",
" f\"{token.token} : {token.kind.value}\"\n",
" for token in (\n",
" DefaultFormulaParser(include_intercept=False)\n",
" .get_tokens(\"y ~ 1 + b:log(c) | `d$in^df` + {e + f}\")\n",
" DefaultFormulaParser(include_intercept=False).get_tokens(\n",
" \"y ~ 1 + b:log(c) | `d$in^df` + {e + f}\"\n",
" )\n",
" )\n",
"]"
]
Expand Down Expand Up @@ -636,7 +644,6 @@
}
],
"source": [
"\n",
"Formula(\"y ~ a + b:c\", _parser=DefaultFormulaParser(include_intercept=False))"
]
},
Expand Down Expand Up @@ -727,7 +734,9 @@
"source": [
"import pandas\n",
"\n",
"data = pandas.DataFrame({\"a\": [1,2,3], \"b\": [4,5,6], \"c\": [7, 8, 9], \"A\": [\"a\", \"b\", \"c\"]})\n",
"data = pandas.DataFrame(\n",
" {\"a\": [1, 2, 3], \"b\": [4, 5, 6], \"c\": [7, 8, 9], \"A\": [\"a\", \"b\", \"c\"]}\n",
")\n",
"Formula(\"a + b:c\").get_model_matrix(data)"
]
},
Expand Down Expand Up @@ -852,6 +861,7 @@
],
"source": [
"from formulaic.materializers import PandasMaterializer\n",
"\n",
"PandasMaterializer(data).get_model_matrix(Formula(\"a + b:c\"), output=\"pandas\")"
]
},
Expand Down Expand Up @@ -885,6 +895,7 @@
],
"source": [
"import numpy\n",
"\n",
"from formulaic import ModelMatrix\n",
"\n",
"mm = Formula(\"a + b:c\").get_model_matrix(data, output=\"numpy\")\n",
Expand Down
47 changes: 29 additions & 18 deletions docsite/docs/guides/integration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,15 @@
],
"source": [
"import pandas\n",
"from formulaic import model_matrix\n",
"from statsmodels.api import OLS\n",
"data = pandas.DataFrame({\"y\": [0.1, 0.4, 3], \"a\": [1,2,3], \"b\": [\"A\", \"B\", \"C\"]})\n",
"\n",
"from formulaic import model_matrix\n",
"\n",
"data = pandas.DataFrame({\"y\": [0.1, 0.4, 3], \"a\": [1, 2, 3], \"b\": [\"A\", \"B\", \"C\"]})\n",
"y, X = model_matrix(\"y ~ a + b\", data)\n",
"model = OLS(y, X)\n",
"results = model.fit()\n",
"print(results.summary())\n",
"\n"
"print(results.summary())"
]
},
{
Expand Down Expand Up @@ -143,54 +144,64 @@
"from typing import Iterable, List, Optional\n",
"\n",
"from sklearn.base import BaseEstimator, TransformerMixin\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.pipeline import Pipeline\n",
"\n",
"from formulaic import Formula, FormulaSpec, ModelSpec\n",
"\n",
"\n",
"class FormulaicTransformer(TransformerMixin, BaseEstimator):\n",
"\n",
" def __init__(self, formula: FormulaSpec):\n",
" self.formula: Formula = Formula.from_spec(formula)\n",
" self.model_spec: Optional[ModelSpec] = None\n",
" if self.formula._has_structure:\n",
" raise ValueError(f\"Formula specification {repr(formula)} results in a structured formula, which is not supported.\")\n",
" raise ValueError(\n",
" f\"Formula specification {repr(formula)} results in a structured formula, which is not supported.\"\n",
" )\n",
"\n",
" def fit(self, X, y = None):\n",
" def fit(self, X, y=None):\n",
" \"\"\"\n",
" Generate the initial model spec by which subsequent X's will be\n",
" transformed.\n",
" \"\"\"\n",
" self.model_spec = self.formula.get_model_matrix(X).model_spec\n",
" return self\n",
"\n",
" def transform(self, X, y = None):\n",
" def transform(self, X, y=None):\n",
" \"\"\"\n",
" Transform `X` by generating a model matrix from it based on the fit\n",
" model spec.\n",
" \"\"\"\n",
" if self.model_spec is None:\n",
" raise RuntimeError(\"`FormulaicTransformer.fit()` must be called before `.transform()`.\")\n",
" raise RuntimeError(\n",
" \"`FormulaicTransformer.fit()` must be called before `.transform()`.\"\n",
" )\n",
" X_ = self.model_spec.get_model_matrix(X)\n",
" return X_\n",
"\n",
" def get_feature_names_out(self, input_features: Optional[Iterable[str]] = None) -> List[str]:\n",
" def get_feature_names_out(\n",
" self, input_features: Optional[Iterable[str]] = None\n",
" ) -> List[str]:\n",
" \"\"\"\n",
" Expose model spec column names to scikit learn to allow column transforms later in the pipeline.\n",
" \"\"\"\n",
" if self.model_spec is None:\n",
" raise RuntimeError(\"`FormulaicTransformer.fit()` must be called before columns can be assigned names.\")\n",
" raise RuntimeError(\n",
" \"`FormulaicTransformer.fit()` must be called before columns can be assigned names.\"\n",
" )\n",
" return self.model_spec.column_names\n",
"\n",
"\n",
"pipe = Pipeline([\n",
" (\"formula\", FormulaicTransformer(\"x1 + x2 + x3\")),\n",
" (\"model\", LinearRegression())\n",
"])\n",
"pipe_fit = pipe.fit(pandas.DataFrame({\"x1\": [1,2,3], \"x2\": [2, 3.4, 6], \"x3\": [7, 3, 1]}), y=pandas.Series([1,3,5]))\n",
"pipe = Pipeline(\n",
" [(\"formula\", FormulaicTransformer(\"x1 + x2 + x3\")), (\"model\", LinearRegression())]\n",
")\n",
"pipe_fit = pipe.fit(\n",
" pandas.DataFrame({\"x1\": [1, 2, 3], \"x2\": [2, 3.4, 6], \"x3\": [7, 3, 1]}),\n",
" y=pandas.Series([1, 3, 5]),\n",
")\n",
"pipe_fit\n",
"# Note: You could optionally serialize `pipe_fit` here.\n",
"# Then: Use the pipe to predict outcomes for new data.\n"
"# Then: Use the pipe to predict outcomes for new data."
]
}
],
Expand Down
15 changes: 10 additions & 5 deletions docsite/docs/guides/missing_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,19 @@
}
],
"source": [
"from pandas import Categorical, DataFrame\n",
"\n",
"from formulaic import model_matrix\n",
"from formulaic.materializers import NAAction\n",
"from pandas import DataFrame, Categorical\n",
"\n",
"df = DataFrame({\n",
" \"c\": [1, 2, None, 4, 5],\n",
" \"C\": Categorical([\"a\", \"b\", \"c\", None, \"e\"], categories=[\"a\", \"b\", \"c\", \"d\", \"e\"])\n",
"})\n",
"df = DataFrame(\n",
" {\n",
" \"c\": [1, 2, None, 4, 5],\n",
" \"C\": Categorical(\n",
" [\"a\", \"b\", \"c\", None, \"e\"], categories=[\"a\", \"b\", \"c\", \"d\", \"e\"]\n",
" ),\n",
" }\n",
")\n",
"\n",
"model_matrix(\"c + C\", df, na_action=NAAction.DROP)\n",
"# Equivlent to:\n",
Expand Down
26 changes: 15 additions & 11 deletions docsite/docs/guides/model_specs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -219,10 +219,11 @@
],
"source": [
"# Let's get ourselves a simple `ModelMatrix` instance to play with.\n",
"from formulaic import model_matrix\n",
"from pandas import DataFrame\n",
"\n",
"mm = model_matrix(\"center(a) + b\", DataFrame({\"a\": [1,2,3], \"b\": [\"A\", \"B\", \"C\"]}))\n",
"from formulaic import model_matrix\n",
"\n",
"mm = model_matrix(\"center(a) + b\", DataFrame({\"a\": [1, 2, 3], \"b\": [\"A\", \"B\", \"C\"]}))\n",
"mm"
]
},
Expand Down Expand Up @@ -399,9 +400,7 @@
],
"source": [
"mm_numpy = model_matrix(\n",
" \"center(a) + b\",\n",
" DataFrame({\"a\": [1,2,3], \"b\": [\"A\", \"B\", \"C\"]}),\n",
" output='numpy'\n",
" \"center(a) + b\", DataFrame({\"a\": [1, 2, 3], \"b\": [\"A\", \"B\", \"C\"]}), output=\"numpy\"\n",
")\n",
"mm_numpy"
]
Expand All @@ -426,7 +425,7 @@
],
"source": [
"ms_numpy = mm_numpy.model_spec\n",
"mm_numpy[:, ms_numpy.term_indices['b']]"
"mm_numpy[:, ms_numpy.term_indices[\"b\"]]"
]
},
{
Expand Down Expand Up @@ -539,7 +538,7 @@
}
],
"source": [
"ms.get_model_matrix(DataFrame({\"a\": [4,5,6], \"b\": [\"A\", \"B\", \"D\"]}))"
"ms.get_model_matrix(DataFrame({\"a\": [4, 5, 6], \"b\": [\"A\", \"B\", \"D\"]}))"
]
},
{
Expand Down Expand Up @@ -624,7 +623,7 @@
}
],
"source": [
"model_matrix(ms, data=DataFrame({\"a\": [4,5,6], \"b\": [\"A\", \"A\", \"A\"]}))"
"model_matrix(ms, data=DataFrame({\"a\": [4, 5, 6], \"b\": [\"A\", \"A\", \"A\"]}))"
]
},
{
Expand Down Expand Up @@ -660,7 +659,7 @@
"source": [
"from formulaic import ModelSpec\n",
"\n",
"ms = ModelSpec(\"a+b+c\", output='numpy', ensure_full_rank=False)\n",
"ms = ModelSpec(\"a+b+c\", output=\"numpy\", ensure_full_rank=False)\n",
"ms"
]
},
Expand All @@ -684,7 +683,10 @@
],
"source": [
"import pandas\n",
"mm = ms.get_model_matrix(pandas.DataFrame({'a': [1,2,3], 'b': [4,5,6], 'c': [7,8,9]}))\n",
"\n",
"mm = ms.get_model_matrix(\n",
" pandas.DataFrame({\"a\": [1, 2, 3], \"b\": [4, 5, 6], \"c\": [7, 8, 9]})\n",
")\n",
"mm"
]
},
Expand Down Expand Up @@ -756,7 +758,9 @@
"source": [
"from formulaic import Formula, ModelSpecs\n",
"\n",
"ModelSpecs(ModelSpec(\"a\"), substructure=ModelSpec(\"b\"), another_substructure=ModelSpec(\"c\"))"
"ModelSpecs(\n",
" ModelSpec(\"a\"), substructure=ModelSpec(\"b\"), another_substructure=ModelSpec(\"c\")\n",
")"
]
},
{
Expand Down
Loading

0 comments on commit c12f161

Please sign in to comment.