diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index 9db0ba3f28e96..fc9eb074ca9cd 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -873,6 +873,14 @@ object functions { */ def skewness(columnName: String): Column = skewness(Column(columnName)) + /** + * Aggregate function: alias for `stddev_samp`. + * + * @group agg_funcs + * @since 3.5.0 + */ + def std(e: Column): Column = stddev(e) + /** * Aggregate function: alias for `stddev_samp`. * @@ -1978,6 +1986,22 @@ object functions { */ def ceil(columnName: String): Column = ceil(Column(columnName)) + /** + * Computes the ceiling of the given value of `e` to `scale` decimal places. + * + * @group math_funcs + * @since 3.5.0 + */ + def ceiling(e: Column, scale: Column): Column = ceil(e, scale) + + /** + * Computes the ceiling of the given value of `e` to 0 decimal places. + * + * @group math_funcs + * @since 3.5.0 + */ + def ceiling(e: Column): Column = ceil(e) + /** * Convert a number in a string column from one base to another. * @@ -2053,6 +2077,14 @@ object functions { */ def csc(e: Column): Column = Column.fn("csc", e) + /** + * Returns Euler's number. + * + * @group math_funcs + * @since 3.5.0 + */ + def e(): Column = Column.fn("e") + /** * Computes the exponential of the given value. * @@ -2241,6 +2273,14 @@ object functions { def least(columnName: String, columnNames: String*): Column = least((columnName +: columnNames).map(Column.apply): _*) + /** + * Computes the natural logarithm of the given value. + * + * @group math_funcs + * @since 3.5.0 + */ + def ln(e: Column): Column = log(e) + /** * Computes the natural logarithm of the given value. * @@ -2321,6 +2361,30 @@ object functions { */ def log2(columnName: String): Column = log2(Column(columnName)) + /** + * Returns the negated value. + * + * @group math_funcs + * @since 3.5.0 + */ + def negative(e: Column): Column = Column.fn("negative", e) + + /** + * Returns Pi. + * + * @group math_funcs + * @since 3.5.0 + */ + def pi(): Column = Column.fn("pi") + + /** + * Returns the value. + * + * @group math_funcs + * @since 3.5.0 + */ + def positive(e: Column): Column = Column.fn("positive", e) + /** * Returns the value of the first argument raised to the power of the second argument. * @@ -2385,6 +2449,14 @@ object functions { */ def pow(l: Double, rightName: String): Column = pow(l, Column(rightName)) + /** + * Returns the value of the first argument raised to the power of the second argument. + * + * @group math_funcs + * @since 3.5.0 + */ + def power(l: Column, r: Column): Column = pow(l, r) + /** * Returns the positive value of dividend mod divisor. * @@ -2514,6 +2586,14 @@ object functions { def shiftrightunsigned(e: Column, numBits: Int): Column = Column.fn("shiftrightunsigned", e, lit(numBits)) + /** + * Computes the signum of the given value. + * + * @group math_funcs + * @since 3.5.0 + */ + def sign(e: Column): Column = signum(e) + /** * Computes the signum of the given value. * @@ -2702,6 +2782,27 @@ object functions { */ def radians(columnName: String): Column = radians(Column(columnName)) + /** + * Returns the bucket number into which the value of this expression would fall after being + * evaluated. Note that input arguments must follow conditions listed below; otherwise, the + * method will return null. + * + * @param v + * value to compute a bucket number in the histogram + * @param min + * minimum value of the histogram + * @param max + * maximum value of the histogram + * @param numBucket + * the number of buckets + * @return + * the bucket number into which the value would fall after being evaluated + * @group math_funcs + * @since 3.5.0 + */ + def width_bucket(v: Column, min: Column, max: Column, numBucket: Column): Column = + Column.fn("width_bucket", v, min, max, numBucket) + ////////////////////////////////////////////////////////////////////////////////////////////// // Misc functions ////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala index d23b4ef2e0270..109219603b9da 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala @@ -1006,6 +1006,10 @@ class PlanGenerationTestSuite fn.stddev("a") } + functionTest("std") { + fn.std(fn.col("a")) + } + functionTest("stddev_samp") { fn.stddev_samp("a") } @@ -1182,6 +1186,14 @@ class PlanGenerationTestSuite fn.ceil(fn.col("b"), lit(2)) } + functionTest("ceiling") { + fn.ceiling(fn.col("b")) + } + + functionTest("ceiling scale") { + fn.ceiling(fn.col("b"), lit(2)) + } + functionTest("conv") { fn.conv(fn.col("b"), 10, 16) } @@ -1202,6 +1214,10 @@ class PlanGenerationTestSuite fn.csc(fn.col("b")) } + functionTest("e") { + fn.e() + } + functionTest("exp") { fn.exp("b") } @@ -1246,6 +1262,10 @@ class PlanGenerationTestSuite fn.log("b") } + functionTest("ln") { + fn.ln(fn.col("b")) + } + functionTest("log with base") { fn.log(2, "b") } @@ -1262,10 +1282,26 @@ class PlanGenerationTestSuite fn.log2("a") } + functionTest("negative") { + fn.negative(fn.col("a")) + } + + functionTest("pi") { + fn.pi() + } + + functionTest("positive") { + fn.positive(fn.col("a")) + } + functionTest("pow") { fn.pow("a", "b") } + functionTest("power") { + fn.power(fn.col("a"), fn.col("b")) + } + functionTest("pmod") { fn.pmod(fn.col("a"), fn.lit(10)) } @@ -1302,6 +1338,10 @@ class PlanGenerationTestSuite fn.signum("b") } + functionTest("sign") { + fn.sign(fn.col("b")) + } + functionTest("sin") { fn.sin("b") } @@ -2132,6 +2172,10 @@ class PlanGenerationTestSuite simple.groupBy(Column("id")).pivot("a").agg(functions.count(Column("b"))) } + test("width_bucket") { + simple.select(fn.width_bucket(fn.col("b"), fn.col("b"), fn.col("b"), fn.col("a"))) + } + test("test broadcast") { left.join(fn.broadcast(right), "id") } diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceiling.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceiling.explain new file mode 100644 index 0000000000000..9cf776a8dbaa7 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceiling.explain @@ -0,0 +1,2 @@ +Project [CEIL(b#0) AS CEIL(b)#0L] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceiling_scale.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceiling_scale.explain new file mode 100644 index 0000000000000..cdf8d356e47dd --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceiling_scale.explain @@ -0,0 +1,2 @@ +Project [ceil(cast(b#0 as decimal(30,15)), 2) AS ceil(b, 2)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_e.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_e.explain new file mode 100644 index 0000000000000..30a94c78310c7 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_e.explain @@ -0,0 +1,2 @@ +Project [E() AS E()#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain new file mode 100644 index 0000000000000..d3c3743b1ef40 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain @@ -0,0 +1,2 @@ +Project [LOG(E(), b#0) AS LOG(E(), b)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_negative.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_negative.explain new file mode 100644 index 0000000000000..4f047e75f06ad --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_negative.explain @@ -0,0 +1,2 @@ +Project [-a#0 AS negative(a)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_pi.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_pi.explain new file mode 100644 index 0000000000000..a3f4e78ed64b8 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_pi.explain @@ -0,0 +1,2 @@ +Project [PI() AS PI()#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain new file mode 100644 index 0000000000000..8e1df4a043575 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain @@ -0,0 +1,2 @@ +Project [positive(a#0) AS (+ a)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_power.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_power.explain new file mode 100644 index 0000000000000..c6c6c0603e3e0 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_power.explain @@ -0,0 +1,2 @@ +Project [POWER(cast(a#0 as double), b#0) AS POWER(a, b)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sign.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sign.explain new file mode 100644 index 0000000000000..807fa3300836c --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sign.explain @@ -0,0 +1,2 @@ +Project [SIGNUM(b#0) AS SIGNUM(b)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_std.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_std.explain new file mode 100644 index 0000000000000..106191e5a32ec --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_std.explain @@ -0,0 +1,2 @@ +Aggregate [stddev(cast(a#0 as double)) AS stddev(a)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/width_bucket.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/width_bucket.explain new file mode 100644 index 0000000000000..22b799481c96f --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/width_bucket.explain @@ -0,0 +1,2 @@ +Project [width_bucket(b#0, b#0, b#0, cast(a#0 as bigint)) AS width_bucket(b, b, b, a)#0L] ++- LocalRelation , [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling.json new file mode 100644 index 0000000000000..5a9961ab47f55 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling.json @@ -0,0 +1,25 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "ceil", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin new file mode 100644 index 0000000000000..3761deb1663a2 Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json new file mode 100644 index 0000000000000..bda5e85924c30 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json @@ -0,0 +1,29 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "ceil", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }, { + "literal": { + "integer": 2 + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin new file mode 100644 index 0000000000000..8db402ac167e0 Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_e.json b/connector/connect/common/src/test/resources/query-tests/queries/function_e.json new file mode 100644 index 0000000000000..c99c04a6befdb --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_e.json @@ -0,0 +1,20 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "e" + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin new file mode 100644 index 0000000000000..49f6c12fbcc72 Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json new file mode 100644 index 0000000000000..1b2d0ed0b1447 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json @@ -0,0 +1,25 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "log", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin new file mode 100644 index 0000000000000..548fb480dd27e Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_negative.json b/connector/connect/common/src/test/resources/query-tests/queries/function_negative.json new file mode 100644 index 0000000000000..e269fabe44be1 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_negative.json @@ -0,0 +1,25 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "negative", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin new file mode 100644 index 0000000000000..9c56c111ceee6 Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_pi.json b/connector/connect/common/src/test/resources/query-tests/queries/function_pi.json new file mode 100644 index 0000000000000..46474dfd8e369 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_pi.json @@ -0,0 +1,20 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "pi" + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin new file mode 100644 index 0000000000000..14f018904bfb7 Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_positive.json b/connector/connect/common/src/test/resources/query-tests/queries/function_positive.json new file mode 100644 index 0000000000000..a8b3a2d6244bb --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_positive.json @@ -0,0 +1,25 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "positive", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin new file mode 100644 index 0000000000000..5507abce8caac Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_power.json b/connector/connect/common/src/test/resources/query-tests/queries/function_power.json new file mode 100644 index 0000000000000..187636fb360c6 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_power.json @@ -0,0 +1,29 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "power", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin new file mode 100644 index 0000000000000..6e1d3b06fe87a Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sign.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sign.json new file mode 100644 index 0000000000000..bcf6ad7eb174d --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sign.json @@ -0,0 +1,25 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "signum", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin new file mode 100644 index 0000000000000..af52abfb7f25b Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_std.json b/connector/connect/common/src/test/resources/query-tests/queries/function_std.json new file mode 100644 index 0000000000000..1403817886ca0 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_std.json @@ -0,0 +1,25 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "stddev", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin new file mode 100644 index 0000000000000..8d214eea8e74e Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/width_bucket.json b/connector/connect/common/src/test/resources/query-tests/queries/width_bucket.json new file mode 100644 index 0000000000000..93d3b5297d9e1 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/width_bucket.json @@ -0,0 +1,37 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "width_bucket", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin new file mode 100644 index 0000000000000..f212e97bc1c5a Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin differ diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst index aac032dd4c3c0..3aa77971aa115 100644 --- a/python/docs/source/reference/pyspark.sql/functions.rst +++ b/python/docs/source/reference/pyspark.sql/functions.rst @@ -64,11 +64,13 @@ Math Functions bin cbrt ceil + ceiling conv cos cosh cot csc + e exp expm1 factorial @@ -76,12 +78,18 @@ Math Functions hex unhex hypot + ln log log10 log1p log2 + negate + negative + pi pmod + positive pow + power rint round bround @@ -89,6 +97,7 @@ Math Functions shiftleft shiftright shiftrightunsigned + sign signum sin sinh @@ -98,6 +107,7 @@ Math Functions degrees toRadians radians + width_bucket Datetime Functions @@ -269,6 +279,7 @@ Aggregate Functions regr_sxy regr_syy skewness + std stddev stddev_pop stddev_samp diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py index 71860d3eb570b..85863f2e11551 100644 --- a/python/pyspark/sql/connect/functions.py +++ b/python/pyspark/sql/connect/functions.py @@ -532,6 +532,9 @@ def ceil(col: "ColumnOrName") -> Column: ceil.__doc__ = pysparkfuncs.ceil.__doc__ +ceiling = ceil + + def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column: return _invoke_function("conv", _to_col(col), lit(fromBase), lit(toBase)) @@ -574,6 +577,13 @@ def degrees(col: "ColumnOrName") -> Column: degrees.__doc__ = pysparkfuncs.degrees.__doc__ +def e() -> Column: + return _invoke_function("e") + + +e.__doc__ = pysparkfuncs.e.__doc__ + + def exp(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("exp", col) @@ -642,6 +652,13 @@ def log1p(col: "ColumnOrName") -> Column: log1p.__doc__ = pysparkfuncs.log1p.__doc__ +def ln(col: "ColumnOrName") -> Column: + return _invoke_function_over_columns("ln", col) + + +ln.__doc__ = pysparkfuncs.ln.__doc__ + + def log2(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("log2", col) @@ -649,6 +666,30 @@ def log2(col: "ColumnOrName") -> Column: log2.__doc__ = pysparkfuncs.log2.__doc__ +def negative(col: "ColumnOrName") -> Column: + return _invoke_function_over_columns("negative", col) + + +negative.__doc__ = pysparkfuncs.negative.__doc__ + + +negate = negative + + +def pi() -> Column: + return _invoke_function("pi") + + +pi.__doc__ = pysparkfuncs.pi.__doc__ + + +def positive(col: "ColumnOrName") -> Column: + return _invoke_function_over_columns("positive", col) + + +positive.__doc__ = pysparkfuncs.positive.__doc__ + + def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", float]) -> Column: return _invoke_binary_math_function("pmod", dividend, divisor) @@ -656,6 +697,19 @@ def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", pmod.__doc__ = pysparkfuncs.pmod.__doc__ +def width_bucket( + v: "ColumnOrName", + min: "ColumnOrName", + max: "ColumnOrName", + numBucket: Union["ColumnOrName", int], +) -> Column: + numBucket = lit(numBucket) if isinstance(numBucket, int) else numBucket + return _invoke_function_over_columns("width_bucket", v, min, max, numBucket) + + +width_bucket.__doc__ = pysparkfuncs.width_bucket.__doc__ + + def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column: return _invoke_binary_math_function("power", col1, col2) @@ -743,6 +797,9 @@ def signum(col: "ColumnOrName") -> Column: signum.__doc__ = pysparkfuncs.signum.__doc__ +sigh = signum + + def sin(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("sin", col) @@ -1041,6 +1098,9 @@ def stddev(col: "ColumnOrName") -> Column: stddev.__doc__ = pysparkfuncs.stddev.__doc__ +std = stddev + + def stddev_samp(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("stddev_samp", col) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 25939670cc6bf..b443fb281b902 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1089,6 +1089,9 @@ def ceil(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("ceil", col) +ceiling = ceil + + @try_remote_functions def cos(col: "ColumnOrName") -> Column: """ @@ -1208,6 +1211,22 @@ def csc(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("csc", col) +@try_remote_functions +def e() -> Column: + """Returns Euler's number. + + Examples + -------- + >>> spark.range(1).select(e()).show() + +-----------------+ + | E()| + +-----------------+ + |2.718281828459045| + +-----------------+ + """ + return _invoke_function("e") + + @try_remote_functions def exp(col: "ColumnOrName") -> Column: """ @@ -1401,6 +1420,88 @@ def log1p(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("log1p", col) +@try_remote_functions +def negative(col: "ColumnOrName") -> Column: + """ + Returns the negative value. + + .. versionadded:: 3.5.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + column to calculate negative value for. + + Returns + ------- + :class:`~pyspark.sql.Column` + negative value. + + Examples + -------- + >>> spark.range(3).select(negative("id").alias("n")).show() + +---+ + | n| + +---+ + | 0| + | -1| + | -2| + +---+ + """ + return _invoke_function_over_columns("negative", col) + + +negate = negative + + +@try_remote_functions +def pi() -> Column: + """Returns Pi. + + Examples + -------- + >>> spark.range(1).select(pi()).show() + +-----------------+ + | PI()| + +-----------------+ + |3.141592653589793| + +-----------------+ + """ + return _invoke_function("pi") + + +@try_remote_functions +def positive(col: "ColumnOrName") -> Column: + """ + Returns the value. + + .. versionadded:: 3.5.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + input value column. + + Returns + ------- + :class:`~pyspark.sql.Column` + value. + + Examples + -------- + >>> df = spark.createDataFrame([(-1,), (0,), (1,)], ['v']) + >>> df.select(positive("v").alias("p")).show() + +---+ + | p| + +---+ + | -1| + | 0| + | 1| + +---+ + """ + return _invoke_function_over_columns("positive", col) + + @try_remote_functions def rint(col: "ColumnOrName") -> Column: """ @@ -1511,6 +1612,9 @@ def signum(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("signum", col) +sign = signum + + @try_remote_functions def sin(col: "ColumnOrName") -> Column: """ @@ -1917,6 +2021,9 @@ def stddev(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("stddev", col) +std = stddev + + @try_remote_functions def stddev_samp(col: "ColumnOrName") -> Column: """ @@ -2639,6 +2746,9 @@ def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) return _invoke_binary_math_function("pow", col1, col2) +power = pow + + @try_remote_functions def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", float]) -> Column: """ @@ -2687,6 +2797,57 @@ def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", return _invoke_binary_math_function("pmod", dividend, divisor) +def width_bucket( + v: "ColumnOrName", + min: "ColumnOrName", + max: "ColumnOrName", + numBucket: Union["ColumnOrName", int], +) -> Column: + """ + Returns the bucket number into which the value of this expression would fall + after being evaluated. Note that input arguments must follow conditions listed below; + otherwise, the method will return null. + + .. versionadded:: 3.5.0 + + Parameters + ---------- + v : str or :class:`~pyspark.sql.Column` + value to compute a bucket number in the histogram + min : str or :class:`~pyspark.sql.Column` + minimum value of the histogram + max : str or :class:`~pyspark.sql.Column` + maximum value of the histogram + numBucket : str, :class:`~pyspark.sql.Column` or int + the number of buckets + + Returns + ------- + :class:`~pyspark.sql.Column` + the bucket number into which the value would fall after being evaluated + + Examples + -------- + >>> df = spark.createDataFrame([ + ... (5.3, 0.2, 10.6, 5), + ... (-2.1, 1.3, 3.4, 3), + ... (8.1, 0.0, 5.7, 4), + ... (-0.9, 5.2, 0.5, 2)], + ... ['v', 'min', 'max', 'n']) + >>> df.select(width_bucket('v', 'min', 'max', 'n')).show() + +----------------------------+ + |width_bucket(v, min, max, n)| + +----------------------------+ + | 3| + | 0| + | 5| + | 3| + +----------------------------+ + """ + numBucket = lit(numBucket) if isinstance(numBucket, int) else numBucket + return _invoke_function_over_columns("width_bucket", v, min, max, numBucket) + + @try_remote_functions def row_number() -> Column: """ @@ -4255,6 +4416,35 @@ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = Non return _invoke_function("log", arg1, _to_java_column(arg2)) +@try_remote_functions +def ln(col: "ColumnOrName") -> Column: + """Returns the natural logarithm of the argument. + + .. versionadded:: 3.5.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + a column to calculate logariphm for. + + Returns + ------- + :class:`~pyspark.sql.Column` + natural logarithm of given value. + + Examples + -------- + >>> df = spark.createDataFrame([(4,)], ['a']) + >>> df.select(ln('a')).show() + +------------------+ + | ln(a)| + +------------------+ + |1.3862943611198906| + +------------------+ + """ + return _invoke_function_over_columns("ln", col) + + @try_remote_functions def log2(col: "ColumnOrName") -> Column: """Returns the base-2 logarithm of the argument. diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index c92f636830f55..150fab9bebf40 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -59,7 +59,6 @@ def test_function_parity(self): "typedlit", # Scala only "typedLit", # Scala only "monotonicallyIncreasingId", # depreciated, use monotonically_increasing_id - "negate", # equivalent to python -expression "not", # equivalent to python ~expression "udaf", # used for creating UDAF's which are not supported in PySpark ] diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 85435e7891ad3..ab14d4eb955bc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -931,6 +931,14 @@ object functions { */ def skewness(columnName: String): Column = skewness(Column(columnName)) + /** + * Aggregate function: alias for `stddev_samp`. + * + * @group agg_funcs + * @since 3.5.0 + */ + def std(e: Column): Column = stddev(e) + /** * Aggregate function: alias for `stddev_samp`. * @@ -2024,6 +2032,22 @@ object functions { */ def ceil(columnName: String): Column = ceil(Column(columnName)) + /** + * Computes the ceiling of the given value of `e` to `scale` decimal places. + * + * @group math_funcs + * @since 3.5.0 + */ + def ceiling(e: Column, scale: Column): Column = ceil(e, scale) + + /** + * Computes the ceiling of the given value of `e` to 0 decimal places. + * + * @group math_funcs + * @since 3.5.0 + */ + def ceiling(e: Column): Column = ceil(e) + /** * Convert a number in a string column from one base to another. * @@ -2088,6 +2112,14 @@ object functions { */ def csc(e: Column): Column = withExpr { Csc(e.expr) } + /** + * Returns Euler's number. + * + * @group math_funcs + * @since 3.5.0 + */ + def e(): Column = withExpr { EulerNumber() } + /** * Computes the exponential of the given value. * @@ -2282,6 +2314,14 @@ object functions { least((columnName +: columnNames).map(Column.apply): _*) } + /** + * Computes the natural logarithm of the given value. + * + * @group math_funcs + * @since 3.5.0 + */ + def ln(e: Column): Column = log(e) + /** * Computes the natural logarithm of the given value. * @@ -2362,6 +2402,30 @@ object functions { */ def log2(columnName: String): Column = log2(Column(columnName)) + /** + * Returns the negated value. + * + * @group math_funcs + * @since 3.5.0 + */ + def negative(e: Column): Column = withExpr { UnaryMinus(e.expr) } + + /** + * Returns Pi. + * + * @group math_funcs + * @since 3.5.0 + */ + def pi(): Column = withExpr { Pi() } + + /** + * Returns the value. + * + * @group math_funcs + * @since 3.5.0 + */ + def positive(e: Column): Column = withExpr { UnaryPositive(e.expr) } + /** * Returns the value of the first argument raised to the power of the second argument. * @@ -2426,6 +2490,14 @@ object functions { */ def pow(l: Double, rightName: String): Column = pow(l, Column(rightName)) + /** + * Returns the value of the first argument raised to the power of the second argument. + * + * @group math_funcs + * @since 3.5.0 + */ + def power(l: Column, r: Column): Column = pow(l, r) + /** * Returns the positive value of dividend mod divisor. * @@ -2558,6 +2630,14 @@ object functions { ShiftRightUnsigned(e.expr, lit(numBits).expr) } + /** + * Computes the signum of the given value. + * + * @group math_funcs + * @since 3.5.0 + */ + def sign(e: Column): Column = signum(e) + /** * Computes the signum of the given value. * @@ -2718,6 +2798,23 @@ object functions { */ def radians(columnName: String): Column = radians(Column(columnName)) + /** + * Returns the bucket number into which the value of this expression would fall + * after being evaluated. Note that input arguments must follow conditions listed below; + * otherwise, the method will return null. + * + * @param v value to compute a bucket number in the histogram + * @param min minimum value of the histogram + * @param max maximum value of the histogram + * @param numBucket the number of buckets + * @return the bucket number into which the value would fall after being evaluated + * @group math_funcs + * @since 3.5.0 + */ + def width_bucket(v: Column, min: Column, max: Column, numBucket: Column): Column = withExpr { + WidthBucket(v.expr, min.expr, max.expr, numBucket.expr) + } + ////////////////////////////////////////////////////////////////////////////////////////////// // Misc functions ////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala index 45b3c379a45c7..fde55e27bf380 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala @@ -428,6 +428,10 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer( sql("SELECT sign(10), signum(-11)"), Row(1, -1)) + + checkAnswer( + Seq((1, 2)).toDF().select(signum(lit(10)), signum(lit(-11))), + Row(1, -1)) } test("pow / power") { @@ -437,6 +441,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { sql("SELECT pow(1, 2), power(2, 1)"), Seq((1, 2)).toDF().select(pow(lit(1), lit(2)), pow(lit(2), lit(1))) ) + + checkAnswer( + sql("SELECT pow(1, 2), power(2, 1)"), + Seq((1, 2)).toDF().select(power(lit(1), lit(2)), power(lit(2), lit(1))) + ) } test("hex") { @@ -595,12 +604,19 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer( sql("SELECT negative(1), negative(0), negative(-1)"), Row(-1, 0, 1)) + + checkAnswer( + Seq((1, 2)).toDF().select(negative(lit(1)), negative(lit(0)), negative(lit(-1))), + Row(-1, 0, 1)) } test("positive") { val df = Seq((1, -1, "abc")).toDF("a", "b", "c") checkAnswer(df.selectExpr("positive(a)"), Row(1)) checkAnswer(df.selectExpr("positive(b)"), Row(-1)) + + checkAnswer(df.select(positive(col("a"))), Row(1)) + checkAnswer(df.select(positive(col("b"))), Row(-1)) } test("SPARK-35926: Support YearMonthIntervalType in width-bucket function") { @@ -616,6 +632,19 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { ).foreach { case ((value, start, end, num), expected) => val df = Seq((value, start, end, num)).toDF("v", "s", "e", "n") checkAnswer(df.selectExpr("width_bucket(v, s, e, n)"), Row(expected)) + checkAnswer(df.select(width_bucket(col("v"), col("s"), col("e"), col("n"))), Row(expected)) } } + + test("width_bucket with numbers") { + val df1 = Seq( + (5.3, 0.2, 10.6, 5), (-2.1, 1.3, 3.4, 3), + (8.1, 0.0, 5.7, 4), (-0.9, 5.2, 0.5, 2) + ).toDF("v", "min", "max", "n") + + checkAnswer( + df1.selectExpr("width_bucket(v, min, max, n)"), + df1.select(width_bucket(col("v"), col("min"), col("max"), col("n"))) + ) + } }