Skip to content
This repository has been archived by the owner on May 27, 2020. It is now read-only.

Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times #182

Open
odigetti opened this issue May 28, 2018 · 0 comments

Comments

@odigetti
Copy link

I'm working on a Spark ML sentiment analysis platform using a mobile feedback APP as a data source. The Data (Feedback) re stored on MongoDB and i wanna read them with spark. I tried to connect spark to MongoDB but not lucky i keep getting this error :
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.lang.NoSuchMethodError: org.apache.spark.sql.catalyst.analysis.TypeCoercion$.findTightestCommonTypeOfTwo()Lscala/Function2;
at com.stratio.datasource.mongodb.schema.MongodbSchema.com$stratio$datasource$mongodb$schema$MongodbSchema$$compatibleType(MongodbSchema.scala:85)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:189)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:188)
at org.apache.spark.util.collection.AppendOnlyMap.changeValue(AppendOnlyMap.scala:150)
at org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.changeValue(SizeTrackingAppendOnlyMap.scala:32)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:194)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1820)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2027)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2124)
at org.apache.spark.rdd.RDD$$anonfun$aggregate$1.apply(RDD.scala:1118)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.aggregate(RDD.scala:1111)
at com.stratio.datasource.mongodb.schema.MongodbSchema.schema(MongodbSchema.scala:46)
at com.stratio.datasource.mongodb.MongodbRelation.com$stratio$datasource$mongodb$MongodbRelation$$lazySchema$lzycompute(MongodbRelation.scala:63)
at com.stratio.datasource.mongodb.MongodbRelation.com$stratio$datasource$mongodb$MongodbRelation$$lazySchema(MongodbRelation.scala:60)
at com.stratio.datasource.mongodb.MongodbRelation$$anonfun$1.apply(MongodbRelation.scala:65)
at com.stratio.datasource.mongodb.MongodbRelation$$anonfun$1.apply(MongodbRelation.scala:65)
at scala.Option.getOrElse(Option.scala:121)
at com.stratio.datasource.mongodb.MongodbRelation.(MongodbRelation.scala:65)
at com.stratio.datasource.mongodb.DefaultSource.createRelation(DefaultSource.scala:36)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:340)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:164)
at com.spml.MongoDbSparkCon.main(MongoDbSparkCon.java:36)
Caused by: java.lang.NoSuchMethodError: org.apache.spark.sql.catalyst.analysis.TypeCoercion$.findTightestCommonTypeOfTwo()Lscala/Function2;
at com.stratio.datasource.mongodb.schema.MongodbSchema.com$stratio$datasource$mongodb$schema$MongodbSchema$$compatibleType(MongodbSchema.scala:85)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:189)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:188)
at org.apache.spark.util.collection.AppendOnlyMap.changeValue(AppendOnlyMap.scala:150)
at org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.changeValue(SizeTrackingAppendOnlyMap.scala:32)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:194)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
2018-05-28 18:54:24 INFO DAGScheduler:54 - Job 0 failed: aggregate at MongodbSchema.scala:46, took 0,855790 s
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

My spark code : Im using Java

public class MongoDbSparkCon {

public static void main(String[] args) {


    JavaSparkContext sc = new JavaSparkContext("local[*]", "test spark-mongodb java");
    SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);

    Map options = new HashMap();
    options.put("host", "localhost:27017");
    options.put("database", "feedbackuib");
    options.put("collection", "Feedback");

    Dataset<Row> df = sqlContext.read().format("com.stratio.datasource.mongodb").options(options).load();
    df.registerTempTable("Feedback");
    sqlContext.sql("SELECT * FROM Feedback");
    df.show();

}}

Pom.xml :

org.apache.spark spark-core_2.11 2.2.0 org.apache.spark spark-mllib_2.11 2.3.0 org.apache.spark spark-sql_2.11 2.2.0 org.apache.spark spark-streaming_2.11 2.3.0 provided org.mongodb.spark mongo-spark-connector_2.10 2.2.2
</dependency>

<!-- https://mvnrepository.com/artifact/com.stratio.datasource/spark-mongodb -->
<dependency>
    <groupId>com.stratio.datasource</groupId>
    <artifactId>spark-mongodb_2.11</artifactId>
    <version>0.12.0</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.mongodb/mongo-java-driver -->
<dependency>
    <groupId>org.mongodb</groupId>
    <artifactId>mongo-java-driver</artifactId>
    <version>3.6.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.mongodb/casbah-core -->
<dependency>
    <groupId>org.mongodb</groupId>
    <artifactId>casbah-core_2.11</artifactId>
    <version>3.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.mongodb/casbah-commons -->
<dependency>
    <groupId>org.mongodb</groupId>
    <artifactId>casbah-commons_2.11</artifactId>
    <version>3.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.mongodb/casbah-query -->
<dependency>
    <groupId>org.mongodb</groupId>
    <artifactId>casbah-query_2.11</artifactId>
    <version>3.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.scala-lang/scala-library -->
<dependency>
    <groupId>org.scala-lang</groupId>
    <artifactId>scala-library</artifactId>
    <version>2.11.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.typesafe.akka/akka-actor -->
<dependency>
    <groupId>com.typesafe.akka</groupId>
    <artifactId>akka-actor_2.11</artifactId>
    <version>2.5.12</version>
</dependency>

Please anny help !!

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant