diff --git a/docker/Dockerfile.pip b/docker/Dockerfile.pip index 2bac17db..dcdca3ee 100644 --- a/docker/Dockerfile.pip +++ b/docker/Dockerfile.pip @@ -18,7 +18,7 @@ ARG CUDA_VERSION=11.8.0 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 ARG PYSPARK_VERSION=3.3.1 -ARG RAPIDS_VERSION=23.10.0 +ARG RAPIDS_VERSION=23.12.0 ARG ARCH=amd64 #ARG ARCH=arm64 # Install packages to build spark-rapids-ml diff --git a/docker/Dockerfile.python b/docker/Dockerfile.python index bd45fc74..2e3fd3bf 100644 --- a/docker/Dockerfile.python +++ b/docker/Dockerfile.python @@ -17,7 +17,7 @@ ARG CUDA_VERSION=11.8.0 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 -ARG CUML_VERSION=23.10 +ARG CUML_VERSION=23.12 # Install packages to build spark-rapids-ml RUN apt update -y \ diff --git a/jvm/README.md b/jvm/README.md index c72cf88f..af9ab16d 100644 --- a/jvm/README.md +++ b/jvm/README.md @@ -94,8 +94,8 @@ repository, usually in your `~/.m2/repository`. Add the artifact jar to the Spark, for example: ```bash -ML_JAR="target/rapids-4-spark-ml_2.12-23.10.0-SNAPSHOT.jar" -PLUGIN_JAR="~/.m2/repository/com/nvidia/rapids-4-spark_2.12/23.10.0/rapids-4-spark_2.12-23.10.0.jar" +ML_JAR="target/rapids-4-spark-ml_2.12-23.12.0-SNAPSHOT.jar" +PLUGIN_JAR="~/.m2/repository/com/nvidia/rapids-4-spark_2.12/23.12.0/rapids-4-spark_2.12-23.12.1.jar" $SPARK_HOME/bin/spark-shell --master $SPARK_MASTER \ --driver-memory 20G \ diff --git a/notebooks/aws-emr/init-bootstrap-action.sh b/notebooks/aws-emr/init-bootstrap-action.sh index d2096831..f1bd4276 100755 --- a/notebooks/aws-emr/init-bootstrap-action.sh +++ b/notebooks/aws-emr/init-bootstrap-action.sh @@ -8,7 +8,7 @@ sudo chmod a+rwx -R /sys/fs/cgroup/devices sudo yum install -y gcc openssl-devel bzip2-devel libffi-devel tar gzip wget make mysql-devel sudo bash -c "wget https://www.python.org/ftp/python/3.9.9/Python-3.9.9.tgz && tar xzf Python-3.9.9.tgz && cd Python-3.9.9 && ./configure --enable-optimizations && make altinstall" -RAPIDS_VERSION=23.10.0 +RAPIDS_VERSION=23.12.0 # install scikit-learn sudo /usr/local/bin/pip3.9 install scikit-learn diff --git a/notebooks/databricks/README.md b/notebooks/databricks/README.md index 97f475ce..9c425dbe 100644 --- a/notebooks/databricks/README.md +++ b/notebooks/databricks/README.md @@ -44,7 +44,7 @@ If you already have a Databricks account, you can run the example notebooks on a spark.task.resource.gpu.amount 1 spark.databricks.delta.preview.enabled true spark.python.worker.reuse true - spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-23.10.0.jar:/databricks/spark/python + spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-23.12.1.jar:/databricks/spark/python spark.sql.execution.arrow.maxRecordsPerBatch 100000 spark.rapids.memory.gpu.minAllocFraction 0.0001 spark.plugins com.nvidia.spark.SQLPlugin diff --git a/notebooks/databricks/init-pip-cuda-11.8.sh b/notebooks/databricks/init-pip-cuda-11.8.sh index b7b2169e..39fb0ac6 100644 --- a/notebooks/databricks/init-pip-cuda-11.8.sh +++ b/notebooks/databricks/init-pip-cuda-11.8.sh @@ -4,8 +4,8 @@ SPARK_RAPIDS_ML_ZIP=/dbfs/path/to/zip/file # IMPORTANT: specify RAPIDS_VERSION fully 23.10.0 and not 23.10 # also in general, RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.8.0 and not 23.08.0) # while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.2 and not 23.8.2) -RAPIDS_VERSION=23.10.0 -SPARK_RAPIDS_VERSION=23.10.0 +RAPIDS_VERSION=23.12.0 +SPARK_RAPIDS_VERSION=23.12.1 curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}-cuda11.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar diff --git a/notebooks/dataproc/README.md b/notebooks/dataproc/README.md index 46208292..21a622dc 100644 --- a/notebooks/dataproc/README.md +++ b/notebooks/dataproc/README.md @@ -29,7 +29,7 @@ If you already have a Dataproc account, you can run the example notebooks on a D - Create a cluster with at least two single-gpu workers. **Note**: in addition to the initialization script from above, this also uses the standard [initialization actions](https://github.com/GoogleCloudDataproc/initialization-actions) for installing the GPU drivers and RAPIDS: ``` export CUDA_VERSION=11.8 - export RAPIDS_VERSION=23.10.0 + export RAPIDS_VERSION=23.12.0 gcloud dataproc clusters create $USER-spark-rapids-ml \ --image-version=2.1-ubuntu \ diff --git a/notebooks/dataproc/spark_rapids_ml.sh b/notebooks/dataproc/spark_rapids_ml.sh index 5a860a60..d7690a93 100644 --- a/notebooks/dataproc/spark_rapids_ml.sh +++ b/notebooks/dataproc/spark_rapids_ml.sh @@ -1,6 +1,6 @@ #!/bin/bash -RAPIDS_VERSION=23.10.0 +RAPIDS_VERSION=23.12.0 # patch existing packages mamba install "llvmlite<0.40,>=0.39.0dev0" "numba>=0.56.2" diff --git a/notebooks/umap.ipynb b/notebooks/umap.ipynb index b29f84a9..43c1f19e 100644 --- a/notebooks/umap.ipynb +++ b/notebooks/umap.ipynb @@ -652,7 +652,7 @@ "import os\n", "import requests\n", "\n", - "SPARK_RAPIDS_VERSION = \"23.08.1\"\n", + "SPARK_RAPIDS_VERSION = \"23.12.1\"\n", "cuda_version = \"12\"\n", "rapids_jar = f\"rapids-4-spark_2.12-{SPARK_RAPIDS_VERSION}.jar\"\n", "\n", diff --git a/python/README.md b/python/README.md index dac24b9d..dcc6e339 100644 --- a/python/README.md +++ b/python/README.md @@ -8,9 +8,9 @@ For simplicity, the following instructions just use Spark local mode, assuming a First, install RAPIDS cuML per [these instructions](https://rapids.ai/start.html). Example for CUDA Toolkit 11.8: ```bash -conda create -n rapids-23.10 \ +conda create -n rapids-23.12 \ -c rapidsai -c conda-forge -c nvidia \ - cuml=23.10 python=3.9 cuda-version=11.8 + cuml=23.12 python=3.9 cuda-version=11.8 ``` **Note**: while testing, we recommend using conda or docker to simplify installation and isolate your environment while experimenting. Once you have a working environment, you can then try installing directly, if necessary. @@ -19,7 +19,7 @@ conda create -n rapids-23.10 \ Once you have the conda environment, activate it and install the required packages. ```bash -conda activate rapids-23.10 +conda activate rapids-23.12 ## for development access to notebooks, tests, and benchmarks git clone --branch main https://github.com/NVIDIA/spark-rapids-ml.git diff --git a/python/benchmark/databricks/gpu_cluster_spec.sh b/python/benchmark/databricks/gpu_cluster_spec.sh index f8bbc47f..e1f0a91d 100644 --- a/python/benchmark/databricks/gpu_cluster_spec.sh +++ b/python/benchmark/databricks/gpu_cluster_spec.sh @@ -9,7 +9,7 @@ cat <=0.39.0dev0" "numba>=0.56.2" diff --git a/python/pyproject.toml b/python/pyproject.toml index e8c3d4eb..37080c95 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "spark-rapids-ml" -version = "23.10.0" +version = "23.12.0" authors = [ { name="Jinfeng Li", email="jinfeng@nvidia.com" }, { name="Bobby Wang", email="bobwang@nvidia.com" }, diff --git a/python/run_benchmark.sh b/python/run_benchmark.sh index 60e27804..6082bcae 100755 --- a/python/run_benchmark.sh +++ b/python/run_benchmark.sh @@ -99,7 +99,7 @@ EOF if [[ $cluster_type == "gpu_etl" ]] then -SPARK_RAPIDS_VERSION=23.10.0 +SPARK_RAPIDS_VERSION=23.12.1 rapids_jar=${rapids_jar:-rapids-4-spark_2.12-$SPARK_RAPIDS_VERSION.jar} if [ ! -f $rapids_jar ]; then echo "downloading spark rapids jar"