From d546e8d659d6f2f486b5b4000e2003218c711f54 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Tue, 21 Nov 2023 23:20:41 +0100 Subject: [PATCH 1/3] Add random laplace number and div_or_else function On branch feature/random-fenerators Changes to be committed: new file: quinn/math.py --- quinn/math.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 quinn/math.py diff --git a/quinn/math.py b/quinn/math.py new file mode 100644 index 00000000..203d27f9 --- /dev/null +++ b/quinn/math.py @@ -0,0 +1,50 @@ +"""Math routines for PySpark.""" +from typing import Optional, Union + +from pyspark.sql import Column +from pyspark.sql import functions as F # noqa: N812 + + +def rand_laplace( + mu: Union[float, Column], + beta: Union[float, Column], + seed: Optional[int] = None, +) -> Column: + """Generate random numbers from Laplace(mu, beta). + + :param mu: mu parameter of Laplace distribution + :param beta: beta parameter of Laplace distribution + :param seed: random seed value (optional, default None) + :returns: column with random numbers + """ + if not isinstance(mu, Column): + mu = F.lit(mu) + + if not isinstance(beta, Column): + beta = F.lit(beta) + + u = F.rand(seed) + + return ( + F.when(u < F.lit(0.5), mu + beta * F.log(2 * u)) + .otherwise(mu - beta * F.log(2 * (1 - u))) + .alias("laplace_random") + ) + + +def div_or_else( + cola: Column, + colb: Column, + default: Union[float, Column] = 0.0, +) -> Column: + """Return result of division of cola by colb or default if colb is zero. + + :param cola: dividend + :param colb: divisor + :param default: default value + :returns: result of division or zero + """ + if not isinstance(default, Column): + default = F.lit(default) + + return F.when(colb == F.lit(0.0), default).otherwise(cola / colb) From 3c777373f0444a64f2101a6c22469f9a3538bd3e Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Tue, 21 Nov 2023 23:29:04 +0100 Subject: [PATCH 2/3] Add import from __future__ On branch feature/random-fenerators Changes to be committed: modified: quinn/math.py --- quinn/math.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/quinn/math.py b/quinn/math.py index 203d27f9..0302f9ad 100644 --- a/quinn/math.py +++ b/quinn/math.py @@ -1,4 +1,6 @@ """Math routines for PySpark.""" +from __future__ import annotations + from typing import Optional, Union from pyspark.sql import Column From 4758730aca3ef2fe95740be7da941b7b7ec30e63 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 23 Nov 2023 21:49:29 +0100 Subject: [PATCH 3/3] Fix imports order & update gitignore On branch fix-linters Changes to be committed: modified: .gitignore modified: quinn/schema_helpers.py --- .gitignore | 6 +++++- quinn/schema_helpers.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 39094991..0934ac58 100644 --- a/.gitignore +++ b/.gitignore @@ -30,4 +30,8 @@ site .vscode # Emacs -.dir_locals.el \ No newline at end of file +.dir_locals.el + +# Jupyter notebooks +*.ipynb +.ipynb_checkpoints \ No newline at end of file diff --git a/quinn/schema_helpers.py b/quinn/schema_helpers.py index 922c874d..0c4985e0 100644 --- a/quinn/schema_helpers.py +++ b/quinn/schema_helpers.py @@ -2,6 +2,7 @@ import json from typing import Optional + from pyspark.sql import SparkSession from pyspark.sql import types as T # noqa: N812