diff --git a/templates/ui/app-config-map.yml b/templates/ui/app-config-map.yml
index d4f1c63..59185bc 100644
--- a/templates/ui/app-config-map.yml
+++ b/templates/ui/app-config-map.yml
@@ -5,4 +5,55 @@ metadata:
   labels:
     {{- include "azimuth-llm.labels" . | nindent 4 }}
 data:
-{{ (.Files.Glob "web-app/*").AsConfig | indent 2 }}
\ No newline at end of file
+  app.py: |
+    import huggingface_hub
+    from huggingface_hub import InferenceClient
+    import gradio as gr
+    from startup import wait_for_backend
+
+    backend_url = "http://{{ .Values.api.service.name }}.{{ .Release.Namespace }}.svc"
+    wait_for_backend(backend_url)
+
+    client = InferenceClient(model=backend_url)
+
+    def inference(message, history):
+        
+        if message == "":
+            yield ""
+
+        partial_message = ""
+        try:
+            for token in client.text_generation(message, max_new_tokens=500, stream=True):
+                partial_message += token
+                # Strip text marker from generated output
+                partial_message = partial_message.replace('<|endoftext|>', '')
+                yield partial_message
+        except huggingface_hub.inference._text_generation.ValidationError as e:
+            raise gr.Error("Context length exceeded. Please clear the chat window.")
+
+    gr.ChatInterface(
+        inference,
+        chatbot=gr.Chatbot(
+            height=500,
+            show_copy_button=True,
+        ),
+        title="Azimuth LLM",
+        description="This is the demo UI for the Azimuth LLM application.",
+        textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
+        retry_btn="Retry",
+        undo_btn="Undo",
+        clear_btn="Clear",
+    ).queue().launch(server_name="0.0.0.0")
+  startup.py: |
+    import requests, time
+
+    def wait_for_backend(url):
+        ready = False
+        while not ready:
+            try:
+                ready = (requests.get(f'{url}/health').status_code == 200)
+                print('Waiting for backend API to start')
+                time.sleep(5)
+            except requests.exceptions.ConnectionError as e:
+                pass
+        return
\ No newline at end of file
diff --git a/web-app/app.py b/web-app/app.py
deleted file mode 100644
index 20d6422..0000000
--- a/web-app/app.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import huggingface_hub
-from huggingface_hub import InferenceClient
-import gradio as gr
-from startup import wait_for_backend
-
-backend_url = "http://text-generation-inference.default.svc"
-wait_for_backend(backend_url)
-
-
-client = InferenceClient(model=backend_url)
-
-def inference(message, history):
-    
-    if message == "":
-        yield ""
-
-    partial_message = ""
-    try:
-        for token in client.text_generation(message, max_new_tokens=500, stream=True):
-            partial_message += token
-            # Strip text marker from generated output
-            partial_message = partial_message.replace('<|endoftext|>', '')
-            yield partial_message
-    except huggingface_hub.inference._text_generation.ValidationError as e:
-        # yield "Context length exceeded. Please clear the chat window."
-        raise gr.Error("Context length exceeded. Please clear the chat window.")
-
-gr.ChatInterface(
-    inference,
-    chatbot=gr.Chatbot(
-        height=500,
-        show_copy_button=True,
-        # layout='panel',
-    ),
-    textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
-    # description="This is the demo for Gradio UI consuming TGI endpoint.",
-    title="Azimuth LLM",
-    # examples=["What is OpenStack?", "Who are StackHPC?", "Give me the k8s pod yaml for an ubuntu container."],
-    retry_btn="Retry",
-    undo_btn="Undo",
-    clear_btn="Clear",
-).queue().launch(server_name="0.0.0.0")
\ No newline at end of file
diff --git a/web-app/startup.py b/web-app/startup.py
deleted file mode 100644
index 465a038..0000000
--- a/web-app/startup.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import requests, time
-
-def wait_for_backend(url):
-    ready = False
-    while not ready:
-        try:
-            ready = (requests.get(f'{url}/health').status_code == 200)
-            print('Waiting for backend API to start')
-            time.sleep(5)
-        except requests.exceptions.ConnectionError as e:
-            pass
-    return
\ No newline at end of file