Skip to content

Commit

Permalink
Merge pull request #750 from NVIDIA/branch-24.10
Browse files Browse the repository at this point in the history
[auto-merge] branch-24.10 to branch-24.12 [skip ci] [bot]
  • Loading branch information
nvauto authored Oct 8, 2024
2 parents 7f1bd49 + 1f6498e commit 1019b1f
Show file tree
Hide file tree
Showing 11 changed files with 14 additions and 11 deletions.
4 changes: 2 additions & 2 deletions ci/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
&& conda config --set solver libmamba

# install cuML
ARG CUML_VER=24.08
RUN conda install -y -c rapidsai -c conda-forge -c nvidia cuml=$CUML_VER cuvs=$CUML_VER python=3.9 cuda-version=11.8 \
ARG CUML_VER=24.10
RUN conda install -y -c rapidsai-nightly -c conda-forge -c nvidia cuml=$CUML_VER cuvs=$CUML_VER python=3.10 cuda-version=11.8 numpy~=1.0 \
&& conda clean --all -f -y
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
project = 'spark-rapids-ml'
copyright = '2024, NVIDIA'
author = 'NVIDIA'
release = '24.08.0'
release = '24.10.0'

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
2 changes: 1 addition & 1 deletion notebooks/databricks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ If you already have a Databricks account, you can run the example notebooks on a
spark.task.resource.gpu.amount 1
spark.databricks.delta.preview.enabled true
spark.python.worker.reuse true
spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-24.06.1.jar:/databricks/spark/python
spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-24.08.1.jar:/databricks/spark/python
spark.sql.execution.arrow.maxRecordsPerBatch 100000
spark.rapids.memory.gpu.minAllocFraction 0.0001
spark.plugins com.nvidia.spark.SQLPlugin
Expand Down
2 changes: 1 addition & 1 deletion notebooks/databricks/init-pip-cuda-11.8.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ SPARK_RAPIDS_ML_ZIP=/dbfs/path/to/zip/file
# also in general, RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.8.0 and not 23.08.0)
# while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.2 and not 23.8.2)
RAPIDS_VERSION=24.8.0
SPARK_RAPIDS_VERSION=24.06.1
SPARK_RAPIDS_VERSION=24.08.1

curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}-cuda11.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar

Expand Down
2 changes: 1 addition & 1 deletion python/benchmark/databricks/gpu_etl_cluster_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ cat <<EOF
"spark.task.cpus": "1",
"spark.databricks.delta.preview.enabled": "true",
"spark.python.worker.reuse": "true",
"spark.executorEnv.PYTHONPATH": "/databricks/jars/rapids-4-spark_2.12-24.06.1.jar:/databricks/spark/python",
"spark.executorEnv.PYTHONPATH": "/databricks/jars/rapids-4-spark_2.12-24.08.1.jar:/databricks/spark/python",
"spark.sql.files.minPartitionNum": "2",
"spark.sql.execution.arrow.maxRecordsPerBatch": "10000",
"spark.executor.cores": "8",
Expand Down
2 changes: 1 addition & 1 deletion python/benchmark/databricks/init-pip-cuda-11.8.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ BENCHMARK_ZIP=/dbfs/path/to/benchmark.zip
# also, in general, RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.8.0 and not 23.08.0)
# while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.2 and not 23.8.2)
RAPIDS_VERSION=24.8.0
SPARK_RAPIDS_VERSION=24.06.1
SPARK_RAPIDS_VERSION=24.08.1

curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}-cuda11.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar

Expand Down
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "spark-rapids-ml"
version = "24.8.0"
version = "24.10.0"
authors = [
{ name="Jinfeng Li", email="[email protected]" },
{ name="Bobby Wang", email="[email protected]" },
Expand Down
2 changes: 1 addition & 1 deletion python/run_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ EOF

if [[ $cluster_type == "gpu_etl" ]]
then
SPARK_RAPIDS_VERSION=24.06.1
SPARK_RAPIDS_VERSION=24.08.1
rapids_jar=${rapids_jar:-rapids-4-spark_2.12-$SPARK_RAPIDS_VERSION.jar}
if [ ! -f $rapids_jar ]; then
echo "downloading spark rapids jar"
Expand Down
2 changes: 1 addition & 1 deletion python/src/spark_rapids_ml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
__version__ = "24.08.0"
__version__ = "24.10.0"

import pandas as pd
import pyspark
Expand Down
2 changes: 1 addition & 1 deletion python/src/spark_rapids_ml/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ def _construct_kmeans() -> CumlT:
kmeans = CumlKMeansMG(output_type="cudf", **cuml_alg_params)
from spark_rapids_ml.utils import cudf_to_cuml_array

kmeans.n_cols = n_cols
kmeans.n_features_in_ = n_cols
kmeans.dtype = np.dtype(dtype)
kmeans.cluster_centers_ = cudf_to_cuml_array(
np.array(cluster_centers_).astype(dtype), order=array_order
Expand Down
3 changes: 3 additions & 0 deletions python/tests/test_approximate_nearest_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,9 @@ def assert_row_equal(r1: Row, r2: Row) -> None:
)
@pytest.mark.parametrize("data_shape", [(10000, 50)], ids=idfn)
@pytest.mark.parametrize("data_type", [np.float32])
@pytest.mark.skip(
reason="ivfpq has become unstable in 24.10. need to address in future pr"
)
def test_ivfpq(
algorithm: str,
feature_type: str,
Expand Down

0 comments on commit 1019b1f

Please sign in to comment.