-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkdd_cnn.py
130 lines (111 loc) · 4.87 KB
/
kdd_cnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import tensorflow._api.v2.compat.v1 as tf
tf.disable_v2_behavior()
import random
import csv
def next_batch(feature_list,label_list,size):
feature_batch_temp=[]
label_batch_temp=[]
f_list = random.sample(range(len(feature_list)), size)
for i in f_list:
feature_batch_temp.append(feature_list[i])
for i in f_list:
label_batch_temp.append(label_list[i])
return feature_batch_temp,label_batch_temp
def weight_variable(shape):
#定义一个shape形状的weights张量
Weights = tf.Variable(tf.truncated_normal(shape, stddev=0.1),name='W')
return Weights
def bias_variable(shape):
#定义一个shape形状的bias张量
biases = tf.Variable(tf.constant(0.1, shape=shape),name='b')
return biases
def conv2d(x, W):
#卷积计算函数
# stride [1, x步长, y步长, 1]
# padding:SAME/FULL/VALID(边距处理方式)
h_conv2d = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
return h_conv2d
def max_pool_2x2(x):
# max池化函数
# ksize [1, x边长, y边长,1] 池化窗口大小
# stride [1, x步长, y步长, 1]
# padding:SAME/FULL/VALID(边距处理方式)
h_pool = tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
return h_pool
def load_data():
global feature
global label
global feature_full
global label_full
feature=[]
label=[]
feature_full=[]
label_full=[]
file_path ='kddcup.data_10_percent_corrected.csv'
with (open(file_path,'r')) as data_from:
csv_reader=csv.reader(data_from)
for rowi in csv_reader:
label_list=[0]*23
feature.append(rowi[:36])
label_list[int(rowi[41])]=1
label.append(label_list)
file_path_full ='kddcup.data_10_percent_corrected.csv'
with (open(file_path_full,'r')) as data_from_full:
csv_reader_full=csv.reader(data_from_full)
for rowj in csv_reader_full:
label_list_full=[0]*23
feature_full.append(rowj[:36])
label_list_full[int(rowj[41])]=1
label_full.append(label_list_full)
if __name__ == '__main__':
global feature
global label
global feature_full
global label_full
# load数据
load_data()
feature_test = feature
feature_train =feature_full
label_test = label
label_train= label_full
# 定义用以输入的palceholder
xs = tf.placeholder(tf.float32, [None, 36]) # 6x6
ys = tf.placeholder(tf.float32, [None, 23])
keep_prob = tf.placeholder(tf.float32)
x_image = tf.reshape(xs, [-1, 6, 6, 1]) # -1表示不约束这个位置 1表示信道1(灰度图仅有一个信道)
## 第一个卷积层 ##
W_conv1 = weight_variable([3,3,1,32]) # 卷积窗 3x3, 输入厚度 1, 输出厚度 32
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.sigmoid(conv2d(x_image,W_conv1) + b_conv1) # 输出大小: 6x6x32
h_pool1 = max_pool_2x2(h_conv1) # 输出大小: 3x3x32
## 第一个全连接层 ##
# 带有dropout
W_fc1 = weight_variable([3*3*32,1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool1, [-1,3*3*32])
h_fc1 = tf.nn.sigmoid(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
## 第二个全连接层 ##
W_fc2 = weight_variable([1024, 23])
b_fc2 = bias_variable([23])
prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
# 计算loss/cost
cross_entropy = -tf.reduce_sum(ys * tf.log(prediction)) # loss
# 计算accuracy
correct_prediction = tf.equal(tf.argmax(prediction,1), tf.argmax(ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 使用Adam优化器来实现梯度最速下降
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
with tf.Session() as sess:
# 初始化所有张量
sess.run(tf.global_variables_initializer())
for step in range(501):
feature_train_batch, label_train_batch = next_batch(feature_train, label_train,1000) # 随机梯度下降训练,每次选大小为1000的batch
feature_test_batch, label_test_batch = next_batch(feature_test, label_test,1000) # 随机梯度下降训练,每次选大小为1000的batch
sess.run(train_step, feed_dict={xs: feature_train_batch, ys: label_train_batch, keep_prob: 0.5})
if step % 50 == 0:
print(step,
sess.run(tf.argmax(prediction, 1)[7:27], feed_dict={xs: feature_test, ys: label_test, keep_prob: 1}),
sess.run(tf.argmax(ys, 1)[7:27], feed_dict={xs: feature_test, ys: label_test, keep_prob: 1}),
sess.run(accuracy, feed_dict={xs: feature_test, ys: label_test, keep_prob: 1}),
sess.run(accuracy, feed_dict={xs: feature_train_batch, ys: label_train_batch, keep_prob: 1}))