Azure · Tarun-Chevula · Jan 16, 2025 · Jan 23, 2025 · Jan 23, 2025
@@ -0,0 +1,12 @@
+name: minimal-py312-cuda12.4-inference
+
+version: auto
+type: environment
+spec: spec.yaml
+extra_config: environment.yaml
+test:
+  pytest:
+    enabled: true
+    pip_requirements: tests/requirements.txt
+    tests_dir: tests
+categories: ["Inference"]
@@ -0,0 +1,22 @@
+FROM mcr.microsoft.com/azureml/inference-base-cuda12.4-ubuntu22.04:{{latest-image-tag}}
+
+WORKDIR /
+ENV AZUREML_CONDA_ENVIRONMENT_PATH=/azureml-envs/minimal
+ENV AZUREML_CONDA_DEFAULT_ENVIRONMENT=$AZUREML_CONDA_ENVIRONMENT_PATH
+
+# Prepend path to AzureML conda environment
+ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
+
+ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH
+
+# Create conda environment
+USER root
+RUN sed -i '66,148d' /var/runit/gunicorn/run
+COPY conda_dependencies.yaml .
+RUN conda env create -p $AZUREML_CONDA_ENVIRONMENT_PATH -f conda_dependencies.yaml -q && \
+    rm conda_dependencies.yaml && \
+    conda run -p $AZUREML_CONDA_ENVIRONMENT_PATH pip cache purge && \
+    conda clean -a -y   
+USER dockeruser
+
+CMD [ "runsvdir", "/var/runit" ]
@@ -0,0 +1,10 @@
+name: minimal
+channels:
+- conda-forge
+- anaconda
+dependencies:
+- python=3.12
+- pip=24.0
+- pip:
+  - azureml-inference-server-http=={{latest-pypi-version}}
+  - numpy=={{latest-pypi-version}}
@@ -0,0 +1,12 @@
+image:
+  name: azureml/curated/minimal-py312-cuda12.4-inference
+  os: linux
+  context:
+    dir: context
+    dockerfile: Dockerfile
+    template_files:
+    - Dockerfile
+    - conda_dependencies.yaml
+  publish:
+    location: mcr
+    visibility: public
@@ -0,0 +1,18 @@
+$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
+
+description: >-
+  AzureML minimal/Ubuntu 22.04/Python 3.12 cuda12.4 environment.
+
+name: "{{asset.name}}"
+version: "{{asset.version}}"
+
+build:
+  path: "{{image.context.path}}"
+  dockerfile_path: "{{image.dockerfile.path}}"
+
+os_type: linux
+
+tags:
+  OS: Ubuntu22.04
+  Inferencing: ""
+  Preview: ""
@@ -0,0 +1,81 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests running a sample job in the minimal 22.04 py312 cuda12.4 environment."""
+import os
+import time
+from pathlib import Path
+from azure.ai.ml import command, MLClient
+from azure.ai.ml._restclient.models import JobStatus
+from azure.ai.ml.entities import Environment, BuildContext
+from azure.identity import AzureCliCredential
+
+BUILD_CONTEXT = Path("../context")
+JOB_SOURCE_CODE = "src"
+TIMEOUT_MINUTES = os.environ.get("timeout_minutes", 30)
+STD_LOG = Path("artifacts/user_logs/std_log.txt")
+
+
+def test_minimal_gpu_inference():
+    """Tests a sample job using minimal 22.04 py312 cuda12.4 as the environment."""
+    this_dir = Path(__file__).parent
+
+    subscription_id = os.environ.get("subscription_id")
+    resource_group = os.environ.get("resource_group")
+    workspace_name = os.environ.get("workspace")
+
+    ml_client = MLClient(
+        AzureCliCredential(), subscription_id, resource_group, workspace_name
+    )
+
+    env_name = "minimal_gpu_inference"
+
+    env_docker_context = Environment(
+        build=BuildContext(path=this_dir / BUILD_CONTEXT),
+        name=env_name,
+        description="minimal 22.04 py312 cuda12.4 inference environment created from a Docker context.",
+    )
+    returned_env = ml_client.environments.create_or_update(env_docker_context)
+
+    # create the command
+    job = command(
+        code=this_dir / JOB_SOURCE_CODE,  # local path where the code is stored
+        command="python main.py --score ${{inputs.score}}",
+        inputs=dict(
+            score="valid_score.py",
+        ),
+        environment=returned_env,
+        compute=os.environ.get("gpu_cluster"),
+        display_name="minimal-gpu-inference-example",
+        description="A test run of the minimal 22.04 py312 cuda12.4 inference curated environment",
+        experiment_name="minimalGPUInferenceExperiment"
+    )
+
+    returned_job = ml_client.create_or_update(job)
+    assert returned_job is not None
+
+    # Poll until final status is reached or timed out
+    timeout = time.time() + (TIMEOUT_MINUTES * 60)
+    while time.time() <= timeout:
+        job = ml_client.jobs.get(returned_job.name)
+        status = job.status
+        if status in [JobStatus.COMPLETED, JobStatus.FAILED]:
+            break
+        time.sleep(30)  # sleep 30 seconds
+    else:
+        # Timeout
+        ml_client.jobs.cancel(returned_job.name)
+        raise Exception(f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. "
+                        f"Last status was {status}.")
+
+    if status == JobStatus.FAILED:
+        ml_client.jobs.download(returned_job.name)
+        if STD_LOG.exists():
+            print(f"*** BEGIN {STD_LOG} ***")
+            with open(STD_LOG, "r") as f:
+                print(f.read(), end="")
+            print(f"*** END {STD_LOG} ***")
+        else:
+            ml_client.jobs.stream(returned_job.name)
+
+    assert status == JobStatus.COMPLETED
@@ -0,0 +1,3 @@
+azure-ai-ml==1.2.0
+marshmallow==3.23.1
+azure.identity
@@ -0,0 +1,86 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Validate minimal inference gpu environment by running azmlinfsrv."""
+
+# imports
+import os
+import subprocess
+import requests
+from datetime import datetime, timedelta
+import time
+import argparse
+
+
+def main(args):
+    """Start inference server and post scoring request."""
+    # start the server
+    server_process = start_server("/var/tmp", ["--entry_script", args.score, "--port", "8081"])
+
+    # score a request
+    req = score_with_post()
+    server_process.kill()
+
+    print(req)
+
+
+def start_server(log_directory, args, timeout=timedelta(seconds=15)):
+    """Start inference server with options."""
+    stderr_file = open(os.path.join(log_directory, "stderr.txt"), "w")
+    stdout_file = open(os.path.join(log_directory, "stdout.txt"), "w")
+
+    env = os.environ.copy()
+    server_process = subprocess.Popen(["azmlinfsrv"] + args, stdout=stdout_file, stderr=stderr_file, env=env)
+
+    max_time = datetime.now() + timeout
+
+    while datetime.now() < max_time:
+        time.sleep(0.25)
+        req = None
+        try:
+            req = requests.get("http://127.0.0.1:8081", timeout=10)
+        except Exception as e:
+            print(e)
+
+        if req is not None and req.ok:
+            break
+
+        # Ensure the server is still running
+        status = server_process.poll()
+        if status is not None:
+            break
+
+    print(log_directory, "stderr.txt")
+    print(log_directory, "stdout.txt")
+
+    return server_process
+
+
+def score_with_post(headers=None, data=None):
+    """Post scoring request to the server."""
+    url = "http://127.0.0.1:8081/score"
+    return requests.post(url=url, headers=headers, data=data)
+
+
+def parse_args():
+    """Parse input arguments."""
+    # setup arg parser
+    parser = argparse.ArgumentParser()
+
+    # add arguments
+    parser.add_argument("--score", type=str)
+
+    # parse args
+    args = parser.parse_args()
+
+    # return args
+    return args
+
+
+# run script
+if __name__ == "__main__":
+    # parse args
+    args = parse_args()
+
+    # run main function
+    main(args)
@@ -0,0 +1,34 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""A basic entry script."""
+
+# imports
+import uuid
+import os
+from datetime import datetime
+from azureml_inference_server_http.api.aml_response import AMLResponse
+from azureml_inference_server_http.api.aml_request import rawhttp
+
+
+def init():
+    """Sample init function."""
+    print("Initializing")
+
+
+@rawhttp
+def run(input_data):
+    """Sample run function."""
+    print('A new request received~~~')
+    try:
+        r = dict()
+        r['request_id'] = str(uuid.uuid4())
+        r['now'] = datetime.now().strftime("%Y/%m/%d %H:%M:%S %f")
+        r['pid'] = os.getpid()
+        r['message'] = "this is a sample"
+
+        return AMLResponse(r, 200, json_str=True)
+    except Exception as e:
+        error = str(e)
+
+        return AMLResponse({'error': error}, 500, json_str=True)
@@ -31,4 +31,6 @@
    except Exception as e:
-        error = str(e)
+        import traceback
+        error_details = traceback.format_exc()
+        print(f"Error occurred: {error_details}")
-        return AMLResponse({'error': error}, 500, json_str=True)
+        return AMLResponse({'error': 'An internal error has occurred.'}, 500, json_str=True)
@@ -31,4 +31,6 @@
    except Exception as e:
-        error = str(e)
+        import traceback
+        error_details = traceback.format_exc()
+        print(f"Error occurred: {error_details}")

-        return AMLResponse({'error': error}, 500, json_str=True)
+        return AMLResponse({'error': 'An internal error has occurred.'}, 500, json_str=True)