Skip to content

Commit

Permalink
fix conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
stefankandic committed Jan 23, 2025
2 parents 20733cd + 1a49237 commit 9957e45
Show file tree
Hide file tree
Showing 74 changed files with 1,738 additions and 754 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_branch40.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
"SCALA_PROFILE": "scala2.13",
"PYSPARK_IMAGE_TO_TEST": "",
"PYTHON_TO_TEST": "",
"ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-free:23.5-slim"
"ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-free:23.6-slim"
}
jobs: >-
{
Expand Down
2 changes: 2 additions & 0 deletions LICENSE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ io.fabric8:kubernetes-model-storageclass
io.fabric8:zjsonpatch
io.github.java-diff-utils:java-diff-utils
io.jsonwebtoken:jjwt-api
io.jsonwebtoken:jjwt-impl
io.jsonwebtoken:jjwt-jackson
io.netty:netty-all
io.netty:netty-buffer
io.netty:netty-codec
Expand Down
5 changes: 2 additions & 3 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -345,11 +345,10 @@
</properties>
</profile>

<!-- Pull in jjwt-impl and jjwt-jackson jars -->
<profile>
<id>jjwt</id>
<id>jjwt-provided</id>
<properties>
<jjwt.deps.scope>compile</jjwt.deps.scope>
<jjwt.deps.scope>provided</jjwt.deps.scope>
</properties>
</profile>

Expand Down
12 changes: 12 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@
],
"sqlState" : "22003"
},
"ARTIFACT_ALREADY_EXISTS" : {
"message" : [
"The artifact <normalizedRemoteRelativePath> already exists. Please choose a different name for the new artifact because it cannot be overwritten."
],
"sqlState" : "42713"
},
"ASSIGNMENT_ARITY_MISMATCH" : {
"message" : [
"The number of columns or variables assigned or aliased: <numTarget> does not match the number of source expressions: <numExpr>."
Expand Down Expand Up @@ -3962,6 +3968,12 @@
],
"sqlState" : "0A000"
},
"NOT_SUPPORTED_CHANGE_SAME_COLUMN" : {
"message" : [
"ALTER TABLE ALTER/CHANGE COLUMN is not supported for changing <table>'s column <fieldName> including its nested fields multiple times in the same command."
],
"sqlState" : "0A000"
},
"NOT_SUPPORTED_COMMAND_FOR_V2_TABLE" : {
"message" : [
"<cmd> is not supported for v2 tables."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ final class DataFrameWriterImpl[T] private[sql] (ds: Dataset[T]) extends DataFra
// Cannot both be set
require(!(builder.hasPath && builder.hasTable))

builder.setMode(mode match {
builder.setMode(curmode match {
case SaveMode.Append => proto.WriteOperation.SaveMode.SAVE_MODE_APPEND
case SaveMode.Overwrite => proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
case SaveMode.Ignore => proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE
Expand Down
2 changes: 1 addition & 1 deletion connector/docker-integration-tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ the container bootstrapping. To run an individual Docker integration test, use t

Besides the default Docker images, the integration tests can be run with custom Docker images. For example,

ORACLE_DOCKER_IMAGE_NAME=gvenzl/oracle-free:23.5-slim-faststart ./build/sbt -Pdocker-integration-tests "docker-integration-tests/testOnly *OracleIntegrationSuite"
ORACLE_DOCKER_IMAGE_NAME=gvenzl/oracle-free:23.6-slim ./build/sbt -Pdocker-integration-tests "docker-integration-tests/testOnly *OracleIntegrationSuite"

The following environment variables can be used to specify the custom Docker images for different databases:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import org.apache.spark.internal.Logging

class OracleDatabaseOnDocker extends DatabaseOnDocker with Logging {
lazy override val imageName =
sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-free:23.5-slim")
sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-free:23.6-slim")
val oracle_password = "Th1s1sThe0racle#Pass"
override val env = Map(
"ORACLE_PWD" -> oracle_password, // oracle images uses this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,10 @@ import com.google.protobuf.{BoolValue, BytesValue, DoubleValue, FloatValue, Int3
import com.google.protobuf.Descriptors.{Descriptor, FieldDescriptor}
import com.google.protobuf.WireFormat

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.internal.Logging
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.types._

@DeveloperApi
object SchemaConverters extends Logging {

/**
Expand All @@ -42,13 +40,13 @@ object SchemaConverters extends Logging {
*
* @since 3.4.0
*/
def toSqlType(
private[protobuf] def toSqlType(
descriptor: Descriptor,
protobufOptions: ProtobufOptions = ProtobufOptions(Map.empty)): SchemaType = {
toSqlTypeHelper(descriptor, protobufOptions)
}

def toSqlTypeHelper(
private[protobuf] def toSqlTypeHelper(
descriptor: Descriptor,
protobufOptions: ProtobufOptions): SchemaType = {
val fields = descriptor.getFields.asScala.flatMap(
Expand All @@ -65,7 +63,7 @@ object SchemaConverters extends Logging {
// exceed the maximum recursive depth specified by the recursiveFieldMaxDepth option.
// A return of None implies the field has reached the maximum allowed recursive depth and
// should be dropped.
def structFieldFor(
private def structFieldFor(
fd: FieldDescriptor,
existingRecordNames: Map[String, Int],
protobufOptions: ProtobufOptions): Option[StructField] = {
Expand Down
6 changes: 0 additions & 6 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -622,12 +622,6 @@
<script.extension>.sh</script.extension>
</properties>
</profile>
<profile>
<id>jjwt</id>
<properties>
<jjwt.deps.scope>compile</jjwt.deps.scope>
</properties>
</profile>
<profile>
<id>sparkr</id>
<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.spark._
import org.apache.spark.api.python.PythonFunction.PythonAccumulator
import org.apache.spark.internal.{Logging, LogKeys, MDC}
import org.apache.spark.internal.LogKeys.TASK_NAME
import org.apache.spark.internal.config.{BUFFER_SIZE, EXECUTOR_CORES, Python}
import org.apache.spark.internal.config.{BUFFER_SIZE, EXECUTOR_CORES}
import org.apache.spark.internal.config.Python._
import org.apache.spark.rdd.InputFileBlockHolder
import org.apache.spark.resource.ResourceProfile.{EXECUTOR_CORES_LOCAL_PROPERTY, PYSPARK_MEMORY_LOCAL_PROPERTY}
Expand Down Expand Up @@ -90,11 +90,11 @@ private[spark] object PythonEvalType {
}
}

private object BasePythonRunner {
private[spark] object BasePythonRunner {

private lazy val faultHandlerLogDir = Utils.createTempDir(namePrefix = "faulthandler")
private[spark] lazy val faultHandlerLogDir = Utils.createTempDir(namePrefix = "faulthandler")

private def faultHandlerLogPath(pid: Int): Path = {
private[spark] def faultHandlerLogPath(pid: Int): Path = {
new File(faultHandlerLogDir, pid.toString).toPath
}
}
Expand Down Expand Up @@ -574,15 +574,15 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
JavaFiles.deleteIfExists(path)
throw new SparkException(s"Python worker exited unexpectedly (crashed): $error", e)

case eof: EOFException if !faultHandlerEnabled =>
case e: IOException if !faultHandlerEnabled =>
throw new SparkException(
s"Python worker exited unexpectedly (crashed). " +
"Consider setting 'spark.sql.execution.pyspark.udf.faulthandler.enabled' or" +
s"'${Python.PYTHON_WORKER_FAULTHANLDER_ENABLED.key}' configuration to 'true' for" +
"the better Python traceback.", eof)
s"'${PYTHON_WORKER_FAULTHANLDER_ENABLED.key}' configuration to 'true' for " +
"the better Python traceback.", e)

case eof: EOFException =>
throw new SparkException("Python worker exited unexpectedly (crashed)", eof)
case e: IOException =>
throw new SparkException("Python worker exited unexpectedly (crashed)", e)
}
}

Expand Down
21 changes: 3 additions & 18 deletions core/src/main/scala/org/apache/spark/executor/Executor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import java.lang.Thread.UncaughtExceptionHandler
import java.lang.management.ManagementFactory
import java.net.{URI, URL}
import java.nio.ByteBuffer
import java.util.{Locale, Properties, Timer, TimerTask}
import java.util.{Locale, Properties}
import java.util.concurrent._
import java.util.concurrent.atomic.AtomicBoolean
import java.util.concurrent.locks.ReentrantLock
Expand Down Expand Up @@ -209,10 +209,9 @@ private[spark] class Executor(
// The default isolation group
val defaultSessionState: IsolatedSessionState = newSessionState(JobArtifactState("default", None))

private val cacheExpiryTime = 30 * 60 * 1000
val isolatedSessionCache: Cache[String, IsolatedSessionState] = CacheBuilder.newBuilder()
.maximumSize(100)
.expireAfterAccess(cacheExpiryTime, TimeUnit.MILLISECONDS)
.expireAfterAccess(30, TimeUnit.MINUTES)
.removalListener(new RemovalListener[String, IsolatedSessionState]() {
override def onRemoval(
notification: RemovalNotification[String, IsolatedSessionState]): Unit = {
Expand Down Expand Up @@ -296,8 +295,6 @@ private[spark] class Executor(

private val pollOnHeartbeat = if (METRICS_POLLING_INTERVAL_MS > 0) false else true

private val timer = new Timer("executor-state-timer", true)

// Poller for the memory metrics. Visible for testing.
private[executor] val metricsPoller = new ExecutorMetricsPoller(
env.memoryManager,
Expand Down Expand Up @@ -448,9 +445,6 @@ private[spark] class Executor(
case NonFatal(e) =>
logWarning("Unable to stop heartbeater", e)
}
if (timer != null) {
timer.cancel()
}
ShuffleBlockPusher.stop()
if (threadPool != null) {
threadPool.shutdown()
Expand Down Expand Up @@ -565,17 +559,9 @@ private[spark] class Executor(
override def run(): Unit = {

// Classloader isolation
var maybeTimerTask: Option[TimerTask] = None
val isolatedSession = taskDescription.artifacts.state match {
case Some(jobArtifactState) =>
val state = isolatedSessionCache.get(
jobArtifactState.uuid, () => newSessionState(jobArtifactState))
maybeTimerTask = Some(new TimerTask {
// Resets the expire time till the task ends.
def run(): Unit = isolatedSessionCache.getIfPresent(jobArtifactState.uuid)
})
maybeTimerTask.foreach(timer.schedule(_, cacheExpiryTime / 10, cacheExpiryTime / 10))
state
isolatedSessionCache.get(jobArtifactState.uuid, () => newSessionState(jobArtifactState))
case _ => defaultSessionState
}

Expand Down Expand Up @@ -876,7 +862,6 @@ private[spark] class Executor(
uncaughtExceptionHandler.uncaughtException(Thread.currentThread(), t)
}
} finally {
maybeTimerTask.foreach(_.cancel)
cleanMDCForTask(taskName, mdcProperties)
runningTasks.remove(taskId)
if (taskStarted) {
Expand Down
2 changes: 2 additions & 0 deletions dev/deps/spark-deps-hadoop-3-hive-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ jettison/1.5.4//jettison-1.5.4.jar
jetty-util-ajax/11.0.24//jetty-util-ajax-11.0.24.jar
jetty-util/11.0.24//jetty-util-11.0.24.jar
jjwt-api/0.12.6//jjwt-api-0.12.6.jar
jjwt-impl/0.12.6//jjwt-impl-0.12.6.jar
jjwt-jackson/0.12.6//jjwt-jackson-0.12.6.jar
jline/2.14.6//jline-2.14.6.jar
jline/3.26.3//jline-3.26.3.jar
jna/5.14.0//jna-5.14.0.jar
Expand Down
1 change: 1 addition & 0 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,6 +1126,7 @@ def __hash__(self):
"pyspark.ml.tests.connect.test_parity_clustering",
"pyspark.ml.tests.connect.test_parity_evaluation",
"pyspark.ml.tests.connect.test_parity_feature",
"pyspark.ml.tests.connect.test_parity_pipeline",
],
excluded_python_implementations=[
"PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
Expand Down
4 changes: 0 additions & 4 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -1754,10 +1754,6 @@ Apart from these, the following properties are also available, and may be useful
<br /><code>spark.ui.filters=com.test.filter1</code>
<br /><code>spark.com.test.filter1.param.name1=foo</code>
<br /><code>spark.com.test.filter1.param.name2=bar</code>
<br />
<br />Note that some filter requires additional dependencies. For example,
the built-in <code>org.apache.spark.ui.JWSFilter</code> requires
<code>jjwt-impl</code> and <code>jjwt-jackson</code> jar files.
</td>
<td>1.0.0</td>
</tr>
Expand Down
5 changes: 2 additions & 3 deletions docs/security.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ To enable authorization, Spark Master should have
`spark.master.rest.filters=org.apache.spark.ui.JWSFilter` and
`spark.org.apache.spark.ui.JWSFilter.param.secretKey=BASE64URL-ENCODED-KEY` configurations, and
client should provide HTTP `Authorization` header which contains JSON Web Token signed by
the shared secret key. Please note that this feature requires a Spark distribution built with
`jjwt` profile.
the shared secret key.

### YARN

Expand Down Expand Up @@ -816,7 +815,7 @@ be limited to origin hosts that need to access the services.

However, like the REST Submission port, Spark also supports HTTP `Authorization` header
with a cryptographically signed JSON Web Token (JWT) for all UI ports.
To use it, a user needs the Spark distribution built with `jjwt` profile and to configure
To use it, a user needs to configure
`spark.ui.filters=org.apache.spark.ui.JWSFilter` and
`spark.org.apache.spark.ui.JWSFilter.param.secretKey=BASE64URL-ENCODED-KEY`.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,4 @@ org.apache.spark.ml.feature.MinMaxScaler
org.apache.spark.ml.feature.RobustScaler
org.apache.spark.ml.feature.StringIndexer
org.apache.spark.ml.feature.PCA
org.apache.spark.ml.feature.Word2Vec
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,4 @@ org.apache.spark.ml.feature.MinMaxScalerModel
org.apache.spark.ml.feature.RobustScalerModel
org.apache.spark.ml.feature.StringIndexerModel
org.apache.spark.ml.feature.PCAModel
org.apache.spark.ml.feature.Word2VecModel
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import org.apache.spark.sql.types.DoubleType
/**
* Abstraction for multiclass classification results for a given model.
*/
private[classification] trait ClassificationSummary extends Summary with Serializable {
private[spark] trait ClassificationSummary extends Summary with Serializable {

/**
* Dataframe output by the model's `transform` method.
Expand Down Expand Up @@ -148,7 +148,7 @@ private[classification] trait ClassificationSummary extends Summary with Seriali
/**
* Abstraction for training results.
*/
private[classification] trait TrainingSummary {
private[spark] trait TrainingSummary {

/**
* objective function (scaled loss + regularization) at each iteration.
Expand All @@ -168,7 +168,7 @@ private[classification] trait TrainingSummary {
/**
* Abstraction for binary classification results for a given model.
*/
private[classification] trait BinaryClassificationSummary extends ClassificationSummary {
private[spark] trait BinaryClassificationSummary extends ClassificationSummary {

private val sparkSession = predictions.sparkSession
import sparkSession.implicits._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1078,7 +1078,7 @@ class LogisticRegressionModel private[spark] (

// For ml connect only
@Since("4.0.0")
private[ml] def this() = this(Identifiable.randomUID("logreg"), Vectors.zeros(0), 0)
private[ml] def this() = this(Identifiable.randomUID("logreg"), Vectors.empty, 0)

/**
* A vector of model coefficients for "binomial" logistic regression. If this model was trained
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ class Word2VecModel private[ml] (

import Word2VecModel._

private[ml] def this() = this(Identifiable.randomUID("w2v"), null)

/**
* Returns a dataframe with two fields, "word" and "vector", with "word" being a String and
* and the vector the DenseVector that it is mapped to.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,7 @@ class LinearRegressionModel private[ml] (

// For ml connect only
@Since("4.0.0")
private[ml] def this() = this(Identifiable.randomUID("linReg"), Vectors.zeros(0), 0.0, 0.0)
private[ml] def this() = this(Identifiable.randomUID("linReg"), Vectors.empty, 0.0, 0.0)

override val numFeatures: Int = coefficients.size

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ private[spark] trait DecisionTreeModel {
* Abstraction for models which are ensembles of decision trees
* @tparam M Type of tree model in this ensemble
*/
private[ml] trait TreeEnsembleModel[M <: DecisionTreeModel] {
private[spark] trait TreeEnsembleModel[M <: DecisionTreeModel] {

// Note: We use getTrees since subclasses of TreeEnsembleModel will store subclasses of
// DecisionTreeModel.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.spark.annotation.Since
* @tparam T Summary instance type
*/
@Since("3.0.0")
private[ml] trait HasTrainingSummary[T] {
private[spark] trait HasTrainingSummary[T] {

private[ml] final var trainingSummary: Option[T] = None

Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@
<orc.deps.scope>compile</orc.deps.scope>
<parquet.deps.scope>compile</parquet.deps.scope>
<parquet.test.deps.scope>test</parquet.test.deps.scope>
<jjwt.deps.scope>test</jjwt.deps.scope>
<jjwt.deps.scope>compile</jjwt.deps.scope>

<spark.yarn.isHadoopProvided>false</spark.yarn.isHadoopProvided>

Expand Down Expand Up @@ -3531,7 +3531,7 @@
<id>sparkr</id>
</profile>
<profile>
<id>jjwt</id>
<id>jjwt-provided</id>
</profile>
<!-- use org.openlabtesting.leveldbjni on aarch64 platform except MacOS -->
<profile>
Expand Down
Loading

0 comments on commit 9957e45

Please sign in to comment.