-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinference.py
66 lines (50 loc) · 2.34 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
import sys
import os
import boto3, sagemaker
from sagemaker.sklearn.model import SKLearnModel
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.processing import ProcessingInput, ProcessingOutput
# start session
sagemaker_session = sagemaker.Session()
# get role with sagemaker, s3, redshift permissions
iam = boto3.client('iam')
role = iam.get_role(RoleName='datascience-sagemaker-s3-redshift')['Role']['Arn']
# bucket folder name (prefix)
bucket = sagemaker_session.default_bucket()
# preprocessing data
print('Starting preprocessing job')
# get data for inference
data = os.path.join('s3://', bucket, 'titanic_example', 'data.csv')
sklearn_processor = SKLearnProcessor(framework_version='0.23-1',
role=role,
instance_type='ml.t3.medium',
instance_count=1)
sklearn_processor.run(code='modules/preprocessing.py',
inputs=[ProcessingInput(
source=data,
destination='/opt/ml/processing/input')],
outputs=[ProcessingOutput(output_name='processed_data',
source='/opt/ml/processing/data')],
arguments=['--inference', 'true'])
preprocessing_job_description = sklearn_processor.jobs[-1].describe()
processed_data = os.path.join('s3://', bucket, preprocessing_job_description['ProcessingJobName'], 'output', 'processed_data', 'data.csv')
# load training job name
with open ("training_job_name.txt", "r") as job:
job_name = job.read().splitlines()[0]
# load model for inference
model_artifact = os.path.join('s3://', bucket, job_name, 'output', 'model.tar.gz') # fancy name for pickle
model = SKLearnModel(model_data=model_artifact,
role=role,
framework_version='0.23-1',
entry_point='modules/model.py')
transformer = model.transformer(
instance_count=1,
instance_type="ml.m4.xlarge",
assemble_with="Line",
accept="text/csv",
output_path=os.path.join(output_path, 'data.csv.out'))
prediction = transformer.transform(data=processed_data, content_type='text/csv')
print(processed_data)
print(prediction)