Skip to content

Commit

Permalink
Enable dedicate endpoint for pytorch llama3 deployment
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 699259509
  • Loading branch information
vertex-mg-bot authored and copybara-github committed Dec 19, 2024
1 parent d89b29f commit d1708a5
Showing 1 changed file with 6 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
{
"cell_type": "code",
"execution_count": null,
"language": "python",
"metadata": {
"cellView": "form",
"id": "YXFGIp1l-qtT"
Expand Down Expand Up @@ -131,8 +132,6 @@
")\n",
"\n",
"models, endpoints = {}, {}\n",
"# Dedicated endpoint not supported yet\n",
"use_dedicated_endpoint = False\n",
"\n",
"# Get the default cloud project id.\n",
"PROJECT_ID = os.environ[\"GOOGLE_CLOUD_PROJECT\"]\n",
Expand Down Expand Up @@ -220,6 +219,9 @@
"# The pre-built serving docker images.\n",
"VLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20240721_0916_RC00\"\n",
"\n",
"# @markdown Set use_dedicated_endpoint to False if you don't want to use [dedicated endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment#create-dedicated-endpoint).\n",
"use_dedicated_endpoint = True # @param {type:\"boolean\"}\n",
"\n",
"\n",
"def deploy_model_vllm(\n",
" model_name: str,\n",
Expand Down Expand Up @@ -516,6 +518,8 @@
},
"outputs": [],
"source": [
"if use_dedicated_endpoint:\n",
" DEDICATED_ENDPOINT_DNS = endpoints[\"vllm_gpu\"].gca_resource.dedicated_endpoint_dns\n",
"ENDPOINT_RESOURCE_NAME = \"projects/{}/locations/{}/endpoints/{}\".format(\n",
" PROJECT_ID, REGION, endpoints[\"vllm_gpu\"].name\n",
")\n",
Expand Down

0 comments on commit d1708a5

Please sign in to comment.