-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathMongo_Con.py
126 lines (105 loc) · 4.46 KB
/
Mongo_Con.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from io import open
from pymongo import MongoClient
import pymongo
import numpy as np
from Variable import *
class DB_manager:
client = MongoClient('localhost', 27017)
db = client.test
def import_training_data(self, training_file):
self.db.training_data.delete_many({})
self.db.training_data.create_index("training_set.type")
with open(training_file) as f:
lines = f.readlines()
for line in lines:
columns = line.split(',')
dic = {}
for attr in attr_list:
element = columns[attr_list.index(attr)]
if element.isdigit():
element = int(element)
elif self.isfloat(element):
element = float(element)
dic[attr] = element
self.db.training_data.insert_one({"training_set": dic})
def import_test_data(self, test_file):
self.db.test_data.delete_many({})
with open(test_file) as f:
lines = f.readlines()
for line in lines:
columns = line.split(',')
dic = {}
for attr in attr_list:
element = columns[attr_list.index(attr)]
if element.isdigit():
element = int(element)
elif self.isfloat(element):
element = float(element)
dic[attr] = element
self.db.test_data.insert_one({"test_set": dic})
def CART_fetch_data(self):
# training_cursor = self.db.training_data.find({"training_set.src_bytes": {"$gt": 1000}})
# test_cursor = self.db.test_data.find({"test_set.src_bytes": {"$gt": 1000}})
training_cursor = self.db.training_data.find()
test_cursor = self.db.test_data.find()
cursor = training_cursor.sort('training_set.type', pymongo.ASCENDING)
dataset = []
dataTarget = []
for document in cursor:
tmp = []
for attr in attr_list:
if attr is not 'type':
try:
tmp.append(document['training_set'][attr].encode('ascii'))
except:
tmp.append(document['training_set'][attr])
dataset.append(tmp)
dataTarget.append(int(document['training_set']['type']))
training_len = len(dataset)
for document in test_cursor:
tmp = []
for attr in attr_list:
if attr is not 'type':
try:
tmp.append(document['test_set'][attr].encode('ascii'))
except:
tmp.append(document['test_set'][attr])
dataset.append(tmp)
dataTarget.append(int(document['test_set']['type']))
return np.array(dataset), np.array(dataTarget), training_len
def MLP_fetch_data(self):
# training_cursor = self.db.training_data.find({"training_set.src_bytes": {"$gt": 10000}})
# test_cursor = self.db.test_data.find({"test_set.src_bytes": {"$gt": 100000}})
training_cursor = self.db.training_data.find()
test_cursor = self.db.test_data.find()
cursor = training_cursor.sort('training_set.type', pymongo.ASCENDING)
dataset = []
dataTarget = []
for document in cursor:
tmp_dic = {}
for attr in attr_list:
if attr is not 'type':
try:
tmp_dic[attr] = document['training_set'][attr].encode('ascii')
except:
tmp_dic[attr] = document['training_set'][attr]
dataset.append(tmp_dic)
dataTarget.append(int(document['training_set']['type']))
training_len = len(dataset)
for document in test_cursor:
tmp_dic = {}
for attr in attr_list:
if attr is not 'type':
try:
tmp_dic[attr] = document['test_set'][attr].encode('ascii')
except:
tmp_dic[attr] = document['test_set'][attr]
dataset.append(tmp_dic)
dataTarget.append(int(document['test_set']['type']))
return np.array(dataset), np.array(dataTarget), training_len
def isfloat(self, value):
try:
float(value)
return True
except ValueError:
return False