diff --git a/README.md b/README.md
index f5b814bd9..b246553db 100644
--- a/README.md
+++ b/README.md
@@ -50,20 +50,20 @@ brew services stop djl-serving
For Ubuntu
```
-curl -O https://publish.djl.ai/djl-serving/djl-serving_0.23.0-1_all.deb
-sudo dpkg -i djl-serving_0.23.0-1_all.deb
+curl -O https://publish.djl.ai/djl-serving/djl-serving_0.24.0-1_all.deb
+sudo dpkg -i djl-serving_0.24.0-1_all.deb
```
For Windows
We are considering to create a `chocolatey` package for Windows. For the time being, you can
-download djl-serving zip file from [here](https://publish.djl.ai/djl-serving/serving-0.23.0.zip).
+download djl-serving zip file from [here](https://publish.djl.ai/djl-serving/serving-0.24.0.zip).
```
-curl -O https://publish.djl.ai/djl-serving/serving-0.23.0.zip
-unzip serving-0.23.0.zip
+curl -O https://publish.djl.ai/djl-serving/serving-0.24.0.zip
+unzip serving-0.24.0.zip
# start djl-serving
-serving-0.23.0\bin\serving.bat
+serving-0.24.0\bin\serving.bat
```
### Docker
diff --git a/benchmark/README.md b/benchmark/README.md
index abcbb0c0e..d9b9d1795 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -43,25 +43,25 @@ sudo snap alias djlbench djl-bench
- Or download .deb package from S3
```
-curl -O https://publish.djl.ai/djl-bench/0.23.0/djl-bench_0.23.0-1_all.deb
-sudo dpkg -i djl-bench_0.23.0-1_all.deb
+curl -O https://publish.djl.ai/djl-bench/0.24.0/djl-bench_0.24.0-1_all.deb
+sudo dpkg -i djl-bench_0.24.0-1_all.deb
```
For macOS, centOS or Amazon Linux 2
-You can download djl-bench zip file from [here](https://publish.djl.ai/djl-bench/0.23.0/benchmark-0.23.0.zip).
+You can download djl-bench zip file from [here](https://publish.djl.ai/djl-bench/0.24.0/benchmark-0.24.0.zip).
```
-curl -O https://publish.djl.ai/djl-bench/0.23.0/benchmark-0.23.0.zip
-unzip benchmark-0.23.0.zip
-rm benchmark-0.23.0.zip
-sudo ln -s $PWD/benchmark-0.23.0/bin/benchmark /usr/bin/djl-bench
+curl -O https://publish.djl.ai/djl-bench/0.24.0/benchmark-0.24.0.zip
+unzip benchmark-0.24.0.zip
+rm benchmark-0.24.0.zip
+sudo ln -s $PWD/benchmark-0.24.0/bin/benchmark /usr/bin/djl-bench
```
For Windows
We are considering to create a `chocolatey` package for Windows. For the time being, you can
-download djl-bench zip file from [here](https://publish.djl.ai/djl-bench/0.23.0/benchmark-0.23.0.zip).
+download djl-bench zip file from [here](https://publish.djl.ai/djl-bench/0.24.0/benchmark-0.24.0.zip).
Or you can run benchmark using gradle:
diff --git a/engines/python/README.md b/engines/python/README.md
index 2527d2534..10094d4aa 100644
--- a/engines/python/README.md
+++ b/engines/python/README.md
@@ -29,13 +29,13 @@ The javadocs output is generated in the `build/doc/javadoc` folder.
## Installation
You can pull the Python engine from the central Maven repository by including the following dependency:
-- ai.djl.python:python:0.23.0
+- ai.djl.python:python:0.24.0
```xml
ai.djl.python
python
- 0.23.0
+ 0.24.0
runtime
```
diff --git a/serving/docker/Dockerfile b/serving/docker/Dockerfile
index 490afeb0f..00e624215 100644
--- a/serving/docker/Dockerfile
+++ b/serving/docker/Dockerfile
@@ -10,7 +10,7 @@
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
# the specific language governing permissions and limitations under the License.
FROM ubuntu:20.04 AS base
-ARG djl_version=0.24.0~SNAPSHOT
+ARG djl_version=0.24.0
COPY scripts scripts/
RUN mkdir -p /opt/djl/conf && \
diff --git a/serving/docker/README.md b/serving/docker/README.md
index 3eead9eec..1625b0cbb 100644
--- a/serving/docker/README.md
+++ b/serving/docker/README.md
@@ -32,7 +32,7 @@ mkdir models
cd models
curl -O https://resources.djl.ai/test-models/pytorch/bert_qa_jit.tar.gz
-docker run -it --rm -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.23.0
+docker run -it --rm -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.24.0
```
### GPU
@@ -42,7 +42,7 @@ mkdir models
cd models
curl -O https://resources.djl.ai/test-models/pytorch/bert_qa_jit.tar.gz
-docker run -it --runtime=nvidia --shm-size 2g -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.23.0-pytorch-cu118
+docker run -it --runtime=nvidia --shm-size 2g -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.24.0-pytorch-cu118
```
### AWS Inferentia
@@ -52,5 +52,5 @@ mkdir models
cd models
curl -O https://resources.djl.ai/test-models/pytorch/resnet18_inf2_2_4.tar.gz
-docker run --device /dev/neuron0 -it --rm -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.23.0-pytorch-inf2
+docker run --device /dev/neuron0 -it --rm -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.24.0-pytorch-inf2
```
diff --git a/serving/docker/aarch64.Dockerfile b/serving/docker/aarch64.Dockerfile
index a16c4dda3..9ab740f00 100644
--- a/serving/docker/aarch64.Dockerfile
+++ b/serving/docker/aarch64.Dockerfile
@@ -10,7 +10,7 @@
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
# the specific language governing permissions and limitations under the License.
FROM arm64v8/ubuntu:20.04
-ARG djl_version=0.24.0~SNAPSHOT
+ARG djl_version=0.24.0
ARG torch_version=2.0.1
EXPOSE 8080
diff --git a/serving/docker/deepspeed.Dockerfile b/serving/docker/deepspeed.Dockerfile
index d05ebeeb1..ab2a04e8d 100644
--- a/serving/docker/deepspeed.Dockerfile
+++ b/serving/docker/deepspeed.Dockerfile
@@ -11,18 +11,18 @@
# the specific language governing permissions and limitations under the License.
ARG version=11.8.0-cudnn8-devel-ubuntu20.04
FROM nvidia/cuda:$version
-ARG djl_version=0.24.0~SNAPSHOT
+ARG djl_version=0.24.0
ARG python_version=3.9
ARG torch_version=2.0.1
ARG torch_vision_version=0.15.2
ARG vllm_version=0.2.0
-ARG deepspeed_wheel="https://publish.djl.ai/deepspeed/deepspeed-nightly-py2.py3-none-any.whl"
+ARG deepspeed_wheel="https://publish.djl.ai/deepspeed/deepspeed-0.10.0-py2.py3-none-any.whl"
ARG flash_attn_wheel="https://publish.djl.ai/flash_attn/flash_attn_1-1.0.9-cp39-cp39-linux_x86_64.whl"
ARG dropout_layer_norm_wheel="https://publish.djl.ai/flash_attn/dropout_layer_norm-0.1-cp39-cp39-linux_x86_64.whl"
ARG rotary_emb_wheel="https://publish.djl.ai/flash_attn/rotary_emb-0.1-cp39-cp39-linux_x86_64.whl"
ARG flash_attn_2_wheel="https://publish.djl.ai/flash_attn/flash_attn-2.0.1-cp39-cp39-linux_x86_64.whl"
ARG lmi_vllm_wheel="https://publish.djl.ai/lmi_vllm/lmi_vllm-0.1.1-cp39-cp39-linux_x86_64.whl"
-ARG lmi_dist_wheel="https://publish.djl.ai/lmi_dist/lmi_dist-nightly-py3-none-any.whl"
+ARG lmi_dist_wheel="https://publish.djl.ai/lmi_dist/lmi_dist-0.9.4-py3-none-any.whl"
ARG seq_scheduler_wheel="https://publish.djl.ai/seq_scheduler/seq_scheduler-0.1.0-py3-none-any.whl"
ARG peft_wheel="https://publish.djl.ai/peft/peft-0.5.0alpha-py3-none-any.whl"
ARG mmaploader_wheel="https://publish.djl.ai/mmaploader/mmaploader-nightly-py3-none-any.whl"
diff --git a/serving/docker/fastertransformer.Dockerfile b/serving/docker/fastertransformer.Dockerfile
index 0ae63e282..f896901b6 100644
--- a/serving/docker/fastertransformer.Dockerfile
+++ b/serving/docker/fastertransformer.Dockerfile
@@ -11,7 +11,7 @@
# the specific language governing permissions and limitations under the License.
ARG version=11.8.0-cudnn8-devel-ubuntu20.04
FROM nvidia/cuda:$version
-ARG djl_version=0.24.0~SNAPSHOT
+ARG djl_version=0.24.0
ARG python_version=3.9
ARG ft_version="llama"
ARG triton_version="r23.04"
diff --git a/serving/docker/pytorch-cu118.Dockerfile b/serving/docker/pytorch-cu118.Dockerfile
index 5f2859739..6b12382de 100644
--- a/serving/docker/pytorch-cu118.Dockerfile
+++ b/serving/docker/pytorch-cu118.Dockerfile
@@ -13,7 +13,7 @@ ARG version=11.8.0-cudnn8-devel-ubuntu20.04
FROM nvidia/cuda:$version as base
-ARG djl_version=0.24.0~SNAPSHOT
+ARG djl_version=0.24.0
ARG torch_version=2.0.1
ARG torch_vision_version=0.15.2
ARG python_version=3.9
diff --git a/serving/docker/pytorch-inf2.Dockerfile b/serving/docker/pytorch-inf2.Dockerfile
index f0b87818a..7431ca9d3 100644
--- a/serving/docker/pytorch-inf2.Dockerfile
+++ b/serving/docker/pytorch-inf2.Dockerfile
@@ -10,7 +10,7 @@
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
# the specific language governing permissions and limitations under the License.
FROM ubuntu:20.04
-ARG djl_version=0.24.0~SNAPSHOT
+ARG djl_version=0.24.0
ARG torch_version=1.13.1
ARG python_version=3.8
ARG torch_neuronx_version=1.13.1.1.11.0
diff --git a/wlm/README.md b/wlm/README.md
index 95a59269a..b0df8e789 100644
--- a/wlm/README.md
+++ b/wlm/README.md
@@ -56,7 +56,7 @@ You can pull the server from the central Maven repository by including the follo
ai.djl.serving
wlm
- 0.23.0
+ 0.24.0
```