Remove references to text-generation-inference

stackhpc · Dec 13, 2023 · b7bb1c0 · b7bb1c0
1 parent 608f851
commit b7bb1c0
Show file tree

Hide file tree

Showing 5 changed files with 8 additions and 7 deletions.
diff --git a/templates/NOTES.txt b/templates/NOTES.txt
@@ -1 +1 @@
-The LLM app allows users to deploy machine learning models using [text-generation-inference](https://github.com/huggingface/text-generation-inference) as a model serving backend and [gradio](https://github.com/gradio-app/gradio) as a web interface.
+The LLM app allows users to deploy machine learning models using [vLLM](https://docs.vllm.ai/en/latest/) as a model serving backend and [gradio](https://github.com/gradio-app/gradio) as a web interface.
diff --git a/templates/api/deployment.yml b/templates/api/deployment.yml
@@ -58,7 +58,7 @@ spec:
         # TODO: Make this configurable (e.g. hostPath or PV)
         - name: data
           {{- .Values.api.cacheVolume | toYaml | nindent 10 }}
-        # Suggested in text-generation-inference docs
+        # Suggested in vLLM docs
         - name: shm
           emptyDir:
             medium: Memory

diff --git a/values.yaml b/values.yaml
@@ -33,16 +33,15 @@ api:
     version: "6876068"
   # Service config 
   service:
-    name: text-generation-inference
+    name: llm-backend
     type: ClusterIP
     zenith:
       enabled: false
       skipAuth: false
       label: Inference API
       iconUrl:
       description: |
-        The raw inference API endpoints for the deployed LLM. 
-        Public API docs are available [here](https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference)
+        The raw inference API endpoints for the deployed LLM.
   # Config for huggingface model cache volume
   # This is mounted at /root/.cache/huggingface in the api deployment
   cacheVolume:

diff --git a/web-app-utils/example_app_playful.py b/web-app-utils/example_app_playful.py
@@ -3,7 +3,8 @@
 from api_startup_check import wait_for_backend
 
 # NOTE: This url should match the chart's api service name & namespace
-backend_url = "http://text-generation-inference.default.svc"
+#TODO: Detect namespace automatically?
+backend_url = "http://llm-backend.default.svc"
 wait_for_backend(backend_url)
 
 prompt = """

diff --git a/web-app-utils/example_app_vanilla.py b/web-app-utils/example_app_vanilla.py
@@ -3,7 +3,8 @@
 from api_startup_check import wait_for_backend
 
 # NOTE: This url should match the chart's api service name & namespace
-backend_url = "http://text-generation-inference.default.svc"
+#TODO: Detect namespace automatically?
+backend_url = "http://llm-backend.default.svc"
 wait_for_backend(backend_url)
 
 prompt = """
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		The LLM app allows users to deploy machine learning models using [text-generation-inference](https://github.com/huggingface/text-generation-inference) as a model serving backend and [gradio](https://github.com/gradio-app/gradio) as a web interface.
		The LLM app allows users to deploy machine learning models using [vLLM](https://docs.vllm.ai/en/latest/) as a model serving backend and [gradio](https://github.com/gradio-app/gradio) as a web interface.