Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

java.lang.AssertionError: assertion failed: No plan for HiveTableRelation

I'm trying to run hive sql query in spark scala application and receive the following error "No plan for HiveTableRelation" when the app executes query against table stored on s3. Here is th code:

package com.testapp.data

import org.apache.log4j.{Logger, Level}
import com.amazonaws.auth.{AWSCredentials, BasicSessionCredentials, DefaultAWSCredentialsProviderChain}
import play.api.libs.json.{JsObject, JsString, JsValue, Json}
import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.spark.sql.SparkSession
import org.jets3t.service.S3Service
import scala.sys.process._
import java.io.File;
import org.apache.spark.sql.hive.HiveContext;

object TestEnrich {

  def main(args: Array[String]) {
    Logger.getRootLogger.setLevel(Level.INFO);
    val controllerLogger = Logger.getLogger(this.getClass)

    val dt = args(0);
    val tm = args(1);
    println(s"enrich request $dt, $tm")
    val sparkConfig = new SparkConf()
      .setAppName("enricher")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .registerKryoClasses {
        Array(
          classOf[AWSCredentials],
          classOf[BasicSessionCredentials],
          classOf[DefaultAWSCredentialsProviderChain]
        )
      }

    val sparkContext = SparkContext.getOrCreate(sparkConfig)
    val spark = SparkSession.builder.config(sparkContext.getConf).enableHiveSupport().getOrCreate()

    spark.sqlContext.setConf("spark.sql.caseSensitive", "true")

    spark.sqlContext.setConf("javax.jdo.option.ConnectionURL", "xxxx")
    spark.sqlContext.setConf("javax.jdo.option.ConnectionDriverName", "com.mysql.jdbc.Driver")
    spark.sqlContext.setConf("javax.jdo.option.ConnectionUserName", "xxxx")
    spark.sqlContext.setConf("javax.jdo.option.ConnectionPassword", "xxxx")

    spark.sparkContext.hadoopConfiguration.set("fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
    spark.sparkContext.hadoopConfiguration.set("fs.s3.access.key", "xxxx");
    spark.sparkContext.hadoopConfiguration.set("fs.s3.secret.key", "xxxx");

    SparkSession.setDefaultSession(spark)
    SparkSession.clearActiveSession()

    import spark.sql;
    import spark.implicits._;

    val hiveContext = new org.apache.spark.sql.hive.HiveContext(spark.sparkContext);
    import hiveContext._
    hiveContext.sql("show databases").show();
    hiveContext.sql("use production");
    hiveContext.sql("show tables").show();
    // error is thrown in next line
    val data = hiveContext.sql(s"select * from raw_by_ts_events_nrt where dt='$dt' and tm='$tm' limit 5");
    data.show();

    sparkContext.stop()
  }
}

here is table create statement:

CREATE TABLE hive.production.raw_by_ts_events_nrt (
   ts bigint,
   batchts bigint,
   eventid varchar,
   userid varchar,
   ...
   dt varchar,
   tm varchar
)
WITH (
   external_location = 's3a://pb-prod-raw-by-ts-events-nrt/',
   format = 'ORC',
   partitioned_by = ARRAY['dt','tm']
)

and here is the log:

18/08/28 06:50:05 INFO SessionState: Created local directory: /tmp/8efbfe25-22d0-43f2-8c65-9d4d27b1cb97_resources
18/08/28 06:50:05 INFO SessionState: Created HDFS directory: /tmp/hive/hadoop/8efbfe25-22d0-43f2-8c65-9d4d27b1cb97
18/08/28 06:50:05 INFO SessionState: Created local directory: /tmp/hadoop/8efbfe25-22d0-43f2-8c65-9d4d27b1cb97
18/08/28 06:50:05 INFO SessionState: Created HDFS directory: /tmp/hive/hadoop/8efbfe25-22d0-43f2-8c65-9d4d27b1cb97/_tmp_space.db
18/08/28 06:50:05 INFO HiveClientImpl: Warehouse location for Hive client (version 1.2.2) is hdfs:///user/spark/warehouse
18/08/28 06:50:06 INFO CodeGenerator: Code generated in 258.835732 ms
18/08/28 06:50:06 INFO CodeGenerator: Code generated in 15.359587 ms
+------------+
|databaseName|
+------------+
|     default|
|  production|
+------------+

18/08/28 06:50:06 INFO CodeGenerator: Code generated in 11.998794 ms
++
||
++
++

18/08/28 06:50:06 INFO CodeGenerator: Code generated in 22.778824 ms
18/08/28 06:50:06 INFO CodeGenerator: Code generated in 16.995158 ms
+----------+--------------------+-----------+
|  database|           tableName|isTemporary|
+----------+--------------------+-----------+
|production|raw_by_ts_events_nrt|      false|
+----------+--------------------+-----------+

18/08/28 06:50:06 INFO ContextCleaner: Cleaned accumulator 1
18/08/28 06:50:06 INFO ContextCleaner: Cleaned accumulator 2
18/08/28 06:50:06 INFO ContextCleaner: Cleaned accumulator 0
18/08/28 06:50:07 WARN Utils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf.
Exception in thread "main" java.lang.AssertionError: assertion failed: No plan for HiveTableRelation `production`.`raw_by_ts_events_nrt`, org.apache.hadoop.hive.ql.io.orc.OrcSerde, [ts#26L, batchts#27L, eventid#28, userid#29, eventname#30, pageloaduid#31, deltatime#32, adaction#33, adduration#34, aderrordescription#35, adispreload#36, admoduleisloaded#37, adnetwork#38, adplacement#39, adplayer#40, adprogress#41, adrejectreason#42, adtag#43, adtargeting#44, adtype#45, aduuid#46, articlecanonicalurl#47, articleformat#48, articleid#49, ... 113 more fields], [dt#163, tm#164]

        at scala.Predef$.assert(Predef.scala:170)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72)
        at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68)
        at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77)
        at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77)
        at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3249)
        at org.apache.spark.sql.Dataset.head(Dataset.scala:2484)
        at org.apache.spark.sql.Dataset.take(Dataset.scala:2698)
        at org.apache.spark.sql.Dataset.showString(Dataset.scala:254)
        at org.apache.spark.sql.Dataset.show(Dataset.scala:723)
        at org.apache.spark.sql.Dataset.show(Dataset.scala:682)
        at org.apache.spark.sql.Dataset.show(Dataset.scala:691)
        at com.playbuzz.data.TestEnrich$.main(TestEnrich.scala:90)
        at com.playbuzz.data.TestEnrich.main(TestEnrich.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
        at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:894)
        at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:198)
        at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:228)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

as you can see it successfully executes all previous statements and fails on select. Any help will be highly appreciated.

like image 284
Ievgen Guliaiev Avatar asked Aug 28 '18 07:08

Ievgen Guliaiev


2 Answers

If you are using spark-submit you have to include spark.sql.catalogImplementation=hive config. like this:

spark-submit --master yarn --deploy-mode cluster --conf spark.sql.catalogImplementation=hive yourApplication.jar
like image 128
Ali AzG Avatar answered Nov 15 '22 09:11

Ali AzG


I think it is bug related to ORC table from Hortonworks blog

Please check this two links

  1. Stack overflow http://stackoverflow.com/questions/38740862/not-able-to-fetch-result-from-hive-transaction-enabled-table-through-spark-sql

  2. SPARK-18355:Spark SQL fails to read data from a ORC hive table that has a new column added to it

  3. Spark-18497:ForeachSink fails with "assertion failed: No plan for EventTimeWatermark"

If #2 or #3 is the cause, there's no obvious workaround right now. You need to update to get the fix. There's some details on #1 on maybe how to avoid the problem

like image 33
Moustafa Mahmoud Avatar answered Nov 15 '22 08:11

Moustafa Mahmoud