Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added minimal-py312-cuda12.4-inference environment #3765

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: minimal-py312-cuda12.4-inference

version: auto
type: environment
spec: spec.yaml
extra_config: environment.yaml
test:
pytest:
enabled: true
pip_requirements: tests/requirements.txt
tests_dir: tests
categories: ["Inference"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM mcr.microsoft.com/azureml/inference-base-cuda12.4-ubuntu22.04:{{latest-image-tag}}

WORKDIR /
ENV AZUREML_CONDA_ENVIRONMENT_PATH=/azureml-envs/minimal
ENV AZUREML_CONDA_DEFAULT_ENVIRONMENT=$AZUREML_CONDA_ENVIRONMENT_PATH

# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH

ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH

# Create conda environment
USER root
RUN sed -i '66,148d' /var/runit/gunicorn/run
COPY conda_dependencies.yaml .
RUN conda env create -p $AZUREML_CONDA_ENVIRONMENT_PATH -f conda_dependencies.yaml -q && \
rm conda_dependencies.yaml && \
conda run -p $AZUREML_CONDA_ENVIRONMENT_PATH pip cache purge && \
conda clean -a -y
USER dockeruser

CMD [ "runsvdir", "/var/runit" ]
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: minimal
channels:
- conda-forge
- anaconda
dependencies:
- python=3.12
- pip=24.0
- pip:
- azureml-inference-server-http=={{latest-pypi-version}}
- numpy=={{latest-pypi-version}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
image:
name: azureml/curated/minimal-py312-cuda12.4-inference
os: linux
context:
dir: context
dockerfile: Dockerfile
template_files:
- Dockerfile
- conda_dependencies.yaml
publish:
location: mcr
visibility: public
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json

description: >-
AzureML minimal/Ubuntu 22.04/Python 3.12 cuda12.4 environment.

name: "{{asset.name}}"
version: "{{asset.version}}"

build:
path: "{{image.context.path}}"
dockerfile_path: "{{image.dockerfile.path}}"

os_type: linux

tags:
OS: Ubuntu22.04
Inferencing: ""
Preview: ""
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Tests running a sample job in the minimal 22.04 py312 cuda12.4 environment."""
import os
import time
from pathlib import Path
from azure.ai.ml import command, MLClient
from azure.ai.ml._restclient.models import JobStatus
from azure.ai.ml.entities import Environment, BuildContext
from azure.identity import AzureCliCredential

BUILD_CONTEXT = Path("../context")
JOB_SOURCE_CODE = "src"
TIMEOUT_MINUTES = os.environ.get("timeout_minutes", 30)
STD_LOG = Path("artifacts/user_logs/std_log.txt")


def test_minimal_gpu_inference():
"""Tests a sample job using minimal 22.04 py312 cuda12.4 as the environment."""
this_dir = Path(__file__).parent

subscription_id = os.environ.get("subscription_id")
resource_group = os.environ.get("resource_group")
workspace_name = os.environ.get("workspace")

ml_client = MLClient(
AzureCliCredential(), subscription_id, resource_group, workspace_name
)

env_name = "minimal_gpu_inference"

env_docker_context = Environment(
build=BuildContext(path=this_dir / BUILD_CONTEXT),
name=env_name,
description="minimal 22.04 py312 cuda12.4 inference environment created from a Docker context.",
)
returned_env = ml_client.environments.create_or_update(env_docker_context)

# create the command
job = command(
code=this_dir / JOB_SOURCE_CODE, # local path where the code is stored
command="python main.py --score ${{inputs.score}}",
inputs=dict(
score="valid_score.py",
),
environment=returned_env,
compute=os.environ.get("gpu_cluster"),
display_name="minimal-gpu-inference-example",
description="A test run of the minimal 22.04 py312 cuda12.4 inference curated environment",
experiment_name="minimalGPUInferenceExperiment"
)

returned_job = ml_client.create_or_update(job)
assert returned_job is not None

# Poll until final status is reached or timed out
timeout = time.time() + (TIMEOUT_MINUTES * 60)
while time.time() <= timeout:
job = ml_client.jobs.get(returned_job.name)
status = job.status
if status in [JobStatus.COMPLETED, JobStatus.FAILED]:
break
time.sleep(30) # sleep 30 seconds
else:
# Timeout
ml_client.jobs.cancel(returned_job.name)
raise Exception(f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. "
f"Last status was {status}.")

if status == JobStatus.FAILED:
ml_client.jobs.download(returned_job.name)
if STD_LOG.exists():
print(f"*** BEGIN {STD_LOG} ***")
with open(STD_LOG, "r") as f:
print(f.read(), end="")
print(f"*** END {STD_LOG} ***")
else:
ml_client.jobs.stream(returned_job.name)

assert status == JobStatus.COMPLETED
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
azure-ai-ml==1.2.0
marshmallow==3.23.1
azure.identity
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Validate minimal inference gpu environment by running azmlinfsrv."""

# imports
import os
import subprocess
import requests
from datetime import datetime, timedelta
import time
import argparse


def main(args):
"""Start inference server and post scoring request."""
# start the server
server_process = start_server("/var/tmp", ["--entry_script", args.score, "--port", "8081"])

# score a request
req = score_with_post()
server_process.kill()

print(req)


def start_server(log_directory, args, timeout=timedelta(seconds=15)):
"""Start inference server with options."""
stderr_file = open(os.path.join(log_directory, "stderr.txt"), "w")
stdout_file = open(os.path.join(log_directory, "stdout.txt"), "w")

env = os.environ.copy()
server_process = subprocess.Popen(["azmlinfsrv"] + args, stdout=stdout_file, stderr=stderr_file, env=env)

max_time = datetime.now() + timeout

while datetime.now() < max_time:
time.sleep(0.25)
req = None
try:
req = requests.get("http://127.0.0.1:8081", timeout=10)
except Exception as e:
print(e)

if req is not None and req.ok:
break

# Ensure the server is still running
status = server_process.poll()
if status is not None:
break

print(log_directory, "stderr.txt")
print(log_directory, "stdout.txt")

return server_process


def score_with_post(headers=None, data=None):
"""Post scoring request to the server."""
url = "http://127.0.0.1:8081/score"
return requests.post(url=url, headers=headers, data=data)


def parse_args():
"""Parse input arguments."""
# setup arg parser
parser = argparse.ArgumentParser()

# add arguments
parser.add_argument("--score", type=str)

# parse args
args = parser.parse_args()

# return args
return args


# run script
if __name__ == "__main__":
# parse args
args = parse_args()

# run main function
main(args)
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""A basic entry script."""

# imports
import uuid
import os
from datetime import datetime
from azureml_inference_server_http.api.aml_response import AMLResponse
from azureml_inference_server_http.api.aml_request import rawhttp


def init():
"""Sample init function."""
print("Initializing")


@rawhttp
def run(input_data):
"""Sample run function."""
print('A new request received~~~')
try:
r = dict()
r['request_id'] = str(uuid.uuid4())
r['now'] = datetime.now().strftime("%Y/%m/%d %H:%M:%S %f")
r['pid'] = os.getpid()
r['message'] = "this is a sample"

return AMLResponse(r, 200, json_str=True)
except Exception as e:
error = str(e)

return AMLResponse({'error': error}, 500, json_str=True)

Check warning

Code scanning / CodeQL

Information exposure through an exception Medium test

Stack trace information
flows to this location and may be exposed to an external user.

Copilot Autofix AI about 14 hours ago

To fix the problem, we should avoid sending the exception details directly to the user. Instead, we should log the detailed error message on the server and return a generic error message to the user. This approach ensures that sensitive information is not exposed while still allowing developers to debug issues using the server logs.

  1. Modify the exception handling in the run function to log the exception details.
  2. Return a generic error message to the user instead of the exception details.
Suggested changeset 1
assets/inference/environments/minimal-py312-cuda12.4-inference/tests/src/valid_score.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/src/valid_score.py b/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/src/valid_score.py
--- a/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/src/valid_score.py
+++ b/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/src/valid_score.py
@@ -31,4 +31,6 @@
     except Exception as e:
-        error = str(e)
+        import traceback
+        error_details = traceback.format_exc()
+        print(f"Error occurred: {error_details}")
 
-        return AMLResponse({'error': error}, 500, json_str=True)
+        return AMLResponse({'error': 'An internal error has occurred.'}, 500, json_str=True)
EOF
@@ -31,4 +31,6 @@
except Exception as e:
error = str(e)
import traceback
error_details = traceback.format_exc()
print(f"Error occurred: {error_details}")

return AMLResponse({'error': error}, 500, json_str=True)
return AMLResponse({'error': 'An internal error has occurred.'}, 500, json_str=True)
Copilot is powered by AI and may make mistakes. Always verify output.
Positive Feedback
Negative Feedback

Provide additional feedback

Please help us improve GitHub Copilot by sharing more details about this comment.

Please select one or more of the options
Loading