Skip to content

Commit

Permalink
[SPARK-43943][SQL][PYTHON][CONNECT] Add SQL math functions to Scala a…
Browse files Browse the repository at this point in the history
…nd Python

### What changes were proposed in this pull request?
Add following functions:

* ceiling
* e
* pi
* ln
* negative
* positive
* power
* sign
* std
* width_bucket

to:

* Scala API
* Python API
* Spark Connect Scala Client
* Spark Connect Python Client

This PR also adds `negate` (which already exists in Scala API and SCSC) to Python API and SCPC.

### Why are the changes needed?
for parity

### Does this PR introduce _any_ user-facing change?
yes, new functions

### How was this patch tested?
added ut / doctest

Closes apache#41435 from zhengruifeng/sql_func_math.

Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
  • Loading branch information
zhengruifeng committed Jun 8, 2023
1 parent 4ddf83f commit f1cca85
Show file tree
Hide file tree
Showing 41 changed files with 839 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,14 @@ object functions {
*/
def skewness(columnName: String): Column = skewness(Column(columnName))

/**
* Aggregate function: alias for `stddev_samp`.
*
* @group agg_funcs
* @since 3.5.0
*/
def std(e: Column): Column = stddev(e)

/**
* Aggregate function: alias for `stddev_samp`.
*
Expand Down Expand Up @@ -1978,6 +1986,22 @@ object functions {
*/
def ceil(columnName: String): Column = ceil(Column(columnName))

/**
* Computes the ceiling of the given value of `e` to `scale` decimal places.
*
* @group math_funcs
* @since 3.5.0
*/
def ceiling(e: Column, scale: Column): Column = ceil(e, scale)

/**
* Computes the ceiling of the given value of `e` to 0 decimal places.
*
* @group math_funcs
* @since 3.5.0
*/
def ceiling(e: Column): Column = ceil(e)

/**
* Convert a number in a string column from one base to another.
*
Expand Down Expand Up @@ -2053,6 +2077,14 @@ object functions {
*/
def csc(e: Column): Column = Column.fn("csc", e)

/**
* Returns Euler's number.
*
* @group math_funcs
* @since 3.5.0
*/
def e(): Column = Column.fn("e")

/**
* Computes the exponential of the given value.
*
Expand Down Expand Up @@ -2241,6 +2273,14 @@ object functions {
def least(columnName: String, columnNames: String*): Column =
least((columnName +: columnNames).map(Column.apply): _*)

/**
* Computes the natural logarithm of the given value.
*
* @group math_funcs
* @since 3.5.0
*/
def ln(e: Column): Column = log(e)

/**
* Computes the natural logarithm of the given value.
*
Expand Down Expand Up @@ -2321,6 +2361,30 @@ object functions {
*/
def log2(columnName: String): Column = log2(Column(columnName))

/**
* Returns the negated value.
*
* @group math_funcs
* @since 3.5.0
*/
def negative(e: Column): Column = Column.fn("negative", e)

/**
* Returns Pi.
*
* @group math_funcs
* @since 3.5.0
*/
def pi(): Column = Column.fn("pi")

/**
* Returns the value.
*
* @group math_funcs
* @since 3.5.0
*/
def positive(e: Column): Column = Column.fn("positive", e)

/**
* Returns the value of the first argument raised to the power of the second argument.
*
Expand Down Expand Up @@ -2385,6 +2449,14 @@ object functions {
*/
def pow(l: Double, rightName: String): Column = pow(l, Column(rightName))

/**
* Returns the value of the first argument raised to the power of the second argument.
*
* @group math_funcs
* @since 3.5.0
*/
def power(l: Column, r: Column): Column = pow(l, r)

/**
* Returns the positive value of dividend mod divisor.
*
Expand Down Expand Up @@ -2514,6 +2586,14 @@ object functions {
def shiftrightunsigned(e: Column, numBits: Int): Column =
Column.fn("shiftrightunsigned", e, lit(numBits))

/**
* Computes the signum of the given value.
*
* @group math_funcs
* @since 3.5.0
*/
def sign(e: Column): Column = signum(e)

/**
* Computes the signum of the given value.
*
Expand Down Expand Up @@ -2702,6 +2782,27 @@ object functions {
*/
def radians(columnName: String): Column = radians(Column(columnName))

/**
* Returns the bucket number into which the value of this expression would fall after being
* evaluated. Note that input arguments must follow conditions listed below; otherwise, the
* method will return null.
*
* @param v
* value to compute a bucket number in the histogram
* @param min
* minimum value of the histogram
* @param max
* maximum value of the histogram
* @param numBucket
* the number of buckets
* @return
* the bucket number into which the value would fall after being evaluated
* @group math_funcs
* @since 3.5.0
*/
def width_bucket(v: Column, min: Column, max: Column, numBucket: Column): Column =
Column.fn("width_bucket", v, min, max, numBucket)

//////////////////////////////////////////////////////////////////////////////////////////////
// Misc functions
//////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1006,6 +1006,10 @@ class PlanGenerationTestSuite
fn.stddev("a")
}

functionTest("std") {
fn.std(fn.col("a"))
}

functionTest("stddev_samp") {
fn.stddev_samp("a")
}
Expand Down Expand Up @@ -1182,6 +1186,14 @@ class PlanGenerationTestSuite
fn.ceil(fn.col("b"), lit(2))
}

functionTest("ceiling") {
fn.ceiling(fn.col("b"))
}

functionTest("ceiling scale") {
fn.ceiling(fn.col("b"), lit(2))
}

functionTest("conv") {
fn.conv(fn.col("b"), 10, 16)
}
Expand All @@ -1202,6 +1214,10 @@ class PlanGenerationTestSuite
fn.csc(fn.col("b"))
}

functionTest("e") {
fn.e()
}

functionTest("exp") {
fn.exp("b")
}
Expand Down Expand Up @@ -1246,6 +1262,10 @@ class PlanGenerationTestSuite
fn.log("b")
}

functionTest("ln") {
fn.ln(fn.col("b"))
}

functionTest("log with base") {
fn.log(2, "b")
}
Expand All @@ -1262,10 +1282,26 @@ class PlanGenerationTestSuite
fn.log2("a")
}

functionTest("negative") {
fn.negative(fn.col("a"))
}

functionTest("pi") {
fn.pi()
}

functionTest("positive") {
fn.positive(fn.col("a"))
}

functionTest("pow") {
fn.pow("a", "b")
}

functionTest("power") {
fn.power(fn.col("a"), fn.col("b"))
}

functionTest("pmod") {
fn.pmod(fn.col("a"), fn.lit(10))
}
Expand Down Expand Up @@ -1302,6 +1338,10 @@ class PlanGenerationTestSuite
fn.signum("b")
}

functionTest("sign") {
fn.sign(fn.col("b"))
}

functionTest("sin") {
fn.sin("b")
}
Expand Down Expand Up @@ -2132,6 +2172,10 @@ class PlanGenerationTestSuite
simple.groupBy(Column("id")).pivot("a").agg(functions.count(Column("b")))
}

test("width_bucket") {
simple.select(fn.width_bucket(fn.col("b"), fn.col("b"), fn.col("b"), fn.col("a")))
}

test("test broadcast") {
left.join(fn.broadcast(right), "id")
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [CEIL(b#0) AS CEIL(b)#0L]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [ceil(cast(b#0 as decimal(30,15)), 2) AS ceil(b, 2)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [E() AS E()#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [LOG(E(), b#0) AS LOG(E(), b)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [-a#0 AS negative(a)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [PI() AS PI()#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [positive(a#0) AS (+ a)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [POWER(cast(a#0 as double), b#0) AS POWER(a, b)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [SIGNUM(b#0) AS SIGNUM(b)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Aggregate [stddev(cast(a#0 as double)) AS stddev(a)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [width_bucket(b#0, b#0, b#0, cast(a#0 as bigint)) AS width_bucket(b, b, b, a)#0L]
+- LocalRelation <empty>, [id#0L, a#0, b#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
}
},
"expressions": [{
"unresolvedFunction": {
"functionName": "ceil",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "b"
}
}]
}
}]
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
}
},
"expressions": [{
"unresolvedFunction": {
"functionName": "ceil",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "b"
}
}, {
"literal": {
"integer": 2
}
}]
}
}]
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
}
},
"expressions": [{
"unresolvedFunction": {
"functionName": "e"
}
}]
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
}
},
"expressions": [{
"unresolvedFunction": {
"functionName": "log",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "b"
}
}]
}
}]
}
}
Binary file not shown.
Loading

0 comments on commit f1cca85

Please sign in to comment.