Skip to content

Commit 34a9489

Browse files
authored
Merge pull request #217 from huanshankeji/tolist-to-aslist
Replace calls of `toList()` on `Array`s with `asList()` to improve performance of the affected functions by reducing the overhead of copying the array elements for once
2 parents d787f9d + 48370b5 commit 34a9489

File tree

4 files changed

+7
-7
lines changed

4 files changed

+7
-7
lines changed

kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Dataset.kt

+4-4
Original file line numberDiff line numberDiff line change
@@ -61,19 +61,19 @@ inline fun <reified T> SparkSession.toDF(list: List<T>, vararg colNames: String)
6161
* Utility method to create dataset from *array or vararg arguments
6262
*/
6363
inline fun <reified T> SparkSession.dsOf(vararg t: T): Dataset<T> =
64-
createDataset(t.toList(), encoder<T>())
64+
createDataset(t.asList(), encoder<T>())
6565

6666
/**
6767
* Utility method to create dataframe from *array or vararg arguments
6868
*/
6969
inline fun <reified T> SparkSession.dfOf(vararg t: T): Dataset<Row> =
70-
createDataset(t.toList(), encoder<T>()).toDF()
70+
createDataset(t.asList(), encoder<T>()).toDF()
7171

7272
/**
7373
* Utility method to create dataframe from *array or vararg arguments with given column names
7474
*/
7575
inline fun <reified T> SparkSession.dfOf(colNames: Array<String>, vararg t: T): Dataset<Row> =
76-
createDataset(t.toList(), encoder<T>())
76+
createDataset(t.asList(), encoder<T>())
7777
.run { if (colNames.isEmpty()) toDF() else toDF(*colNames) }
7878

7979
/**
@@ -92,7 +92,7 @@ inline fun <reified T> List<T>.toDF(spark: SparkSession, vararg colNames: String
9292
* Utility method to create dataset from list
9393
*/
9494
inline fun <reified T> Array<T>.toDS(spark: SparkSession): Dataset<T> =
95-
toList().toDS(spark)
95+
asList().toDS(spark)
9696

9797
/**
9898
* Utility method to create dataframe from list

kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Rdd.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import java.io.Serializable
1111
fun <T> JavaSparkContext.rddOf(
1212
vararg elements: T,
1313
numSlices: Int = defaultParallelism(),
14-
): JavaRDD<T> = parallelize(elements.toList(), numSlices)
14+
): JavaRDD<T> = parallelize(elements.asList(), numSlices)
1515

1616
/**
1717
* Utility method to create an RDD from a list.

kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkSession.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ class KSparkSession(val spark: SparkSession) {
114114
* NOTE: [T] must be [Serializable].
115115
*/
116116
fun <T> rddOf(vararg elements: T, numSlices: Int = sc.defaultParallelism()): JavaRDD<T> =
117-
sc.toRDD(elements.toList(), numSlices)
117+
sc.toRDD(elements.asList(), numSlices)
118118

119119
/**
120120
* A collection of methods for registering user-defined functions (UDF).

kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ class TypeInferenceTest : ShouldSpec({
215215
should("generate valid serializer schema") {
216216
expect(encoder<Sample>().schema()) {
217217
this
218-
.feature("data type", { this.fields()?.toList() }) {
218+
.feature("data type", { this.fields()?.asList() }) {
219219
this.notToEqualNull().toContain.inOrder.only.entry {
220220
this
221221
.feature("element name", { name() }) { toEqual("optionList") }

0 commit comments

Comments
 (0)