diff --git a/iis-wf/iis-wf-affmatching/pom.xml b/iis-wf/iis-wf-affmatching/pom.xml index 8ea3863e4..508410851 100644 --- a/iis-wf/iis-wf-affmatching/pom.xml +++ b/iis-wf/iis-wf-affmatching/pom.xml @@ -33,6 +33,15 @@ test + + org.apache.hadoop + hadoop-mapreduce-client-core + + + org.apache.hadoop + hadoop-common + + org.apache.spark spark-core_2.12 diff --git a/iis-wf/iis-wf-affmatching/src/main/resources/eu/dnetlib/iis/wf/affmatching/dedup/oozie_app/workflow.xml b/iis-wf/iis-wf-affmatching/src/main/resources/eu/dnetlib/iis/wf/affmatching/dedup/oozie_app/workflow.xml index 340432d74..a1f4c9557 100644 --- a/iis-wf/iis-wf-affmatching/src/main/resources/eu/dnetlib/iis/wf/affmatching/dedup/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-affmatching/src/main/resources/eu/dnetlib/iis/wf/affmatching/dedup/oozie_app/workflow.xml @@ -41,16 +41,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -94,8 +84,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-affmatching/src/main/resources/eu/dnetlib/iis/wf/affmatching/main/oozie_app/workflow.xml b/iis-wf/iis-wf-affmatching/src/main/resources/eu/dnetlib/iis/wf/affmatching/main/oozie_app/workflow.xml index 063379c11..add69f544 100644 --- a/iis-wf/iis-wf-affmatching/src/main/resources/eu/dnetlib/iis/wf/affmatching/main/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-affmatching/src/main/resources/eu/dnetlib/iis/wf/affmatching/main/oozie_app/workflow.xml @@ -73,16 +73,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -135,8 +125,9 @@ --conf spark.yarn.driver.memoryOverhead=${sparkDriverOverhead} --conf spark.network.timeout=${sparkNetworkTimeout} --conf spark.executor.heartbeatInterval=${sparkExecutorHeartbeatInterval} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-affmatching/src/main/resources/eu/dnetlib/iis/wf/affmatching/projectbased/oozie_app/workflow.xml b/iis-wf/iis-wf-affmatching/src/main/resources/eu/dnetlib/iis/wf/affmatching/projectbased/oozie_app/workflow.xml index 713686743..0f3af1990 100644 --- a/iis-wf/iis-wf-affmatching/src/main/resources/eu/dnetlib/iis/wf/affmatching/projectbased/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-affmatching/src/main/resources/eu/dnetlib/iis/wf/affmatching/projectbased/oozie_app/workflow.xml @@ -61,16 +61,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -115,8 +105,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-citationmatching-direct/pom.xml b/iis-wf/iis-wf-citationmatching-direct/pom.xml index 85e856684..febc02494 100644 --- a/iis-wf/iis-wf-citationmatching-direct/pom.xml +++ b/iis-wf/iis-wf-citationmatching-direct/pom.xml @@ -34,6 +34,15 @@ ${project.version} + + org.apache.hadoop + hadoop-mapreduce-client-core + + + org.apache.hadoop + hadoop-common + + org.apache.spark spark-core_2.12 @@ -44,6 +53,11 @@ spark-sql_2.12 + + org.apache.spark + spark-avro_2.12 + + pl.edu.icm.spark-utils spark-utils_2.12 diff --git a/iis-wf/iis-wf-citationmatching-direct/src/main/resources/eu/dnetlib/iis/wf/citationmatching/direct/oozie_app/workflow.xml b/iis-wf/iis-wf-citationmatching-direct/src/main/resources/eu/dnetlib/iis/wf/citationmatching/direct/oozie_app/workflow.xml index 2713ff4e4..e07c300c3 100644 --- a/iis-wf/iis-wf-citationmatching-direct/src/main/resources/eu/dnetlib/iis/wf/citationmatching/direct/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-citationmatching-direct/src/main/resources/eu/dnetlib/iis/wf/citationmatching/direct/oozie_app/workflow.xml @@ -43,16 +43,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -98,8 +88,9 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.yarn.executor.memoryOverhead=${sparkExecutorOverhead} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-documentsclassification/src/main/resources/eu/dnetlib/iis/wf/documentsclassification/oozie_app/workflow.xml b/iis-wf/iis-wf-documentsclassification/src/main/resources/eu/dnetlib/iis/wf/documentsclassification/oozie_app/workflow.xml index 47657e3d9..a68babae8 100644 --- a/iis-wf/iis-wf-documentsclassification/src/main/resources/eu/dnetlib/iis/wf/documentsclassification/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-documentsclassification/src/main/resources/eu/dnetlib/iis/wf/documentsclassification/oozie_app/workflow.xml @@ -35,16 +35,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -99,8 +89,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}