Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add mlflow test to verify mlflow score script #3604

Merged
merged 13 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM mcr.microsoft.com/azureml/inference-base-2004:{{latest-image-tag}}

WORKDIR /
ENV AZUREML_CONDA_ENVIRONMENT_PATH=/azureml-envs/minimal
ENV AZUREML_CONDA_ENVIRONMENT_PATH=/azureml-envs/mlflow
ENV AZUREML_CONDA_DEFAULT_ENVIRONMENT=$AZUREML_CONDA_ENVIRONMENT_PATH

# Prepend path to AzureML conda environment
Expand All @@ -13,8 +13,6 @@ ENV AML_APP_ROOT="/var/mlflow_resources"
ENV AZUREML_ENTRY_SCRIPT="mlflow_score_script.py"

USER root
# Copying of mlmonitoring will add once testing is completed.
# COPY mlmonitoring /var/mlflow_resources/mlmonitoring

# We'll copy the HF scripts as well to enable better handling for v2 packaging. This will not require changes to the
# packages installed in the image, as the expectation is that these will all be brought along with the model.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,8 @@ dependencies:
- python=3.9.13
- pip
- pip:
- azureml-inference-server-http=={{latest-pypi-version}}
- azureml-inference-server-http=={{latest-pypi-version}}
- azureml-ai-monitoring=={{latest-pypi-version}}
- numpy
- mlflow
- azureml-contrib-services
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from mlflow.models import Model
from mlflow.pyfunc import load_model
from mlflow.pyfunc.scoring_server import _get_jsonable_obj
from mlmonitoring import Collector
from azureml.ai.monitoring import Collector
from mlflow.types.utils import _infer_schema
from mlflow.types.schema import Schema, ColSpec, DataType
from mlflow.exceptions import MlflowException
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
STD_LOG = Path("artifacts/user_logs/std_log.txt")


def test_minimal_cpu_inference():
"""Tests a sample job using minimal 20.04 py39 cpu as the environment."""
def test_mlflow_cpu_inference():
"""Tests a sample job using mlflow 20.04 py39 cpu as the environment."""
this_dir = Path(__file__).parent

subscription_id = os.environ.get("subscription_id")
Expand All @@ -28,27 +28,30 @@ def test_minimal_cpu_inference():
AzureCliCredential(), subscription_id, resource_group, workspace_name
)

env_name = "minimal_cpu_inference"
env_name = "mlflow_py39_inference"

env_docker_context = Environment(
build=BuildContext(path=this_dir / BUILD_CONTEXT),
name="minimal_cpu_inference",
description="minimal 20.04 py39 cpu inference environment created from a Docker context.",
name="mlflow_py39_inference",
description="mlflow 20.04 py39 cpu inference environment created from a Docker context.",
)
ml_client.environments.create_or_update(env_docker_context)

# create the command
job = command(
code=this_dir / JOB_SOURCE_CODE, # local path where the code is stored
command="python main.py --score ${{inputs.score}}",
command="python main.py --model_dir ${{inputs.model_dir}} "
"--score ${{inputs.score}} --score_input ${{inputs.score_input}}",
inputs=dict(
score="valid_score.py",
score="/var/mlflow_resources/mlflow_score_script.py",
score_input="sample_2_0_input.txt",
model_dir="mlflow_2_0_model_folder"
),
environment=f"{env_name}@latest",
compute=os.environ.get("cpu_cluster"),
display_name="minimal-cpu-inference-example",
description="A test run of the minimal 20.04 py39 cpu inference curated environment",
experiment_name="minimalCPUInferenceExperiment"
display_name="mlflow-py39-inference-example",
description="A test run of the mlflow 20.04 py39 cpu inference curated environment",
experiment_name="mlflow39InferenceExperiment"
)

returned_job = ml_client.create_or_update(job)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""Validate minimal inference cpu environment by running azmlinfsrv."""

# imports
import json
import os
import subprocess
import requests
Expand All @@ -15,21 +16,32 @@
def main(args):
"""Start inference server and post scoring request."""
# start the server
server_process = start_server("/var/tmp", ["--entry_script", args.score, "--port", "8081"])
server_process = start_server("/var/tmp",
["--entry_script", args.score, "--port", "8081"],
args.model_dir)

# score a request
req = score_with_post()
with open(args.score_input) as f:
payload_data = json.load(f)

headers = {"Content-Type": "application/json"}
res = score_with_post(headers=headers, data=payload_data)
server_process.kill()

print(req)
print_file_contents("/var/tmp", "stderr.txt")
print_file_contents("/var/tmp", "stdout.txt")
print(res)


def start_server(log_directory, args, timeout=timedelta(seconds=15)):
def start_server(log_directory, args, model_dir, timeout=timedelta(seconds=60)):
"""Start inference server with options."""
stderr_file = open(os.path.join(log_directory, "stderr.txt"), "w")
stdout_file = open(os.path.join(log_directory, "stdout.txt"), "w")

env = os.environ.copy()
env["AZUREML_MODEL_DIR"] = os.path.dirname(os.path.abspath(__file__))
env["MLFLOW_MODEL_FOLDER"] = model_dir
print(os.path.abspath(__file__))
server_process = subprocess.Popen(["azmlinfsrv"] + args, stdout=stdout_file, stderr=stderr_file, env=env)

max_time = datetime.now() + timeout
Expand All @@ -50,9 +62,6 @@ def start_server(log_directory, args, timeout=timedelta(seconds=15)):
if status is not None:
break

print(log_directory, "stderr.txt")
print(log_directory, "stdout.txt")

return server_process


Expand All @@ -62,13 +71,27 @@ def score_with_post(headers=None, data=None):
return requests.post(url=url, headers=headers, data=data)


def print_file_contents(log_directory, file_name):
"""Print out file contents."""
print(log_directory, file_name)
file_path = os.path.join(log_directory, file_name)
try:
with open(file_path, 'r') as file:
contents = file.read()
print(contents)
except FileNotFoundError:
print("file path is not valid.")


def parse_args():
"""Parse input arguments."""
# setup arg parser
parser = argparse.ArgumentParser()

# add arguments
parser.add_argument("--score", type=str)
parser.add_argument("--model_dir", type=str)
parser.add_argument("--score_input", type=str)

# parse args
args = parser.parse_args()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
flavors:
python_function:
cloudpickle_version: 2.1.0
env:
conda: conda.yaml
virtualenv: python_env.yaml
loader_module: mlflow.pyfunc.model
python_model: python_model.pkl
python_version: 3.9.13
mlflow_version: 2.0.1
model_uuid: 687fb8fa7a044a1cb8ee79b5f76368f8
saved_input_example_info:
artifact_path: input_example.json
pandas_orient: split
type: dataframe
signature:
inputs: '[{"name": "a", "type": "double"}, {"name": "b", "type": "long"}, {"name":
"c", "type": "string"}]'
outputs: '[{"name": "a", "type": "double"}]'
utc_time_created: '2022-11-18 22:14:59.029851'
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
channels:
- conda-forge
- anaconda
dependencies:
- python=3.9.13
- pip
- pip:
- mlflow
- cloudpickle==2.1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"columns": ["a", "b", "c"], "data": [[3.0, 1, "foo"]]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
python: 3.9.13
build_dependencies:
- pip
- setuptools==65.2.0
- wheel==0.37.1
dependencies:
- -r requirements.txt
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
mlflow
cloudpickle==2.1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"input_data":{"columns":["a", "b", "c"],"index":[0],"data":[[3.0, 1, "foo"]]}}

This file was deleted.

Loading