-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredictfrom_model.py
105 lines (66 loc) · 3.48 KB
/
predictfrom_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import numpy as np
from log.applogger import Applogger
from Datagetter_prediction.Dataingestion import data
from file_operation.file_operation import file_op
from Predictioin_file.Prediction_validation import prediction_validation
from Preprocessing.Preprocessing_file import preprocess
import pandas as pd
from prediction_validation import pred
class predictfrom_model:
def __init__(self,path):
self.log=Applogger()
self.p=preprocess()
self.preprocess=prediction_validation(path)
self.data=data()
self.file=file_op()
def predict(self):
try:
self.file_object=open('Prediction_Logs/Prediction_Logs.txt','a+')
self.preprocess.deletePredictionFile() # deletes the existing prediction file from last run!
self.log.log(self.file_object, 'Start of Prediction')
data = self.data.get()
# code change
# wafer_names=data['Wafer']
# data=data.drop(labels=['Wafer'],axis=1)
data = self.p.removecolumn(data,
['policy_number', 'policy_bind_date', 'policy_state', 'insured_zip',
'incident_location', 'incident_date', 'incident_state', 'incident_city',
'insured_hobbies', 'auto_make', 'auto_model', 'auto_year', 'age',
'total_claim_amount']) # remove the column as it doesn't contribute to prediction.
data.replace('?', np.NaN, inplace=True) # replacing '?' with NaN values for imputation
# check if missing values are present in the dataset
is_null_present, cols_with_missing_values = self.p.null_present(data)
# if missing values are there, replace them appropriately.
if (is_null_present):
data = self.p.imputemissingvalue(data, cols_with_missing_values) # missing value imputation
# encode categorical data
data = self.p.encode_cat(data)
data = self.p.scale_numerical(data)
kmeans =self.file.load_model('KMeans')
##Code changed
clusters = kmeans.predict(data)
data['clusters'] = clusters
clusters = data['clusters'].unique()
predictions = []
for i in clusters:
cluster_data = data[data['clusters'] == i]
cluster_data = cluster_data.drop(['clusters'], axis=1)
model_name = self.file.find_correct_model(i)
model = self.file.load_model(model_name)
result = (model.predict(cluster_data))
for res in result:
if res == 0:
predictions.append('N')
else:
predictions.append('Y')
final = pd.DataFrame(list(zip(predictions)), columns=['Predictions'])
path = "Prediction_Output_File/Predictions.csv"
final.to_csv("Prediction_Output_File/Predictions.csv", header=True,
mode='a+') # appends result to prediction file
self.log_writer.log(self.file_object, 'End of Prediction')
except Exception as e:
log_file=open("Prediction_Logs/Prediction_Logs.txt",'w')
self.log.log(log_file,str(e))
log_file.close()
#c=predictfrom_model(r'C:\Users\91639\Desktop\insurancefraud\Prediction_FileFromDB')
#c.predict()