-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassifierAgents.py
313 lines (253 loc) · 12.7 KB
/
classifierAgents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# classifierAgents.py
# parsons/07-oct-2017
#
# Version 1.0
#
# Some simple agents to work with the PacMan AI projects from:
#
# http://ai.berkeley.edu/
#
# These use a simple API that allow us to control Pacman's interaction with
# the environment adding a layer on top of the AI Berkeley code.
#
# As required by the licensing agreement for the PacMan AI we have:
#
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# ([email protected]) and Dan Klein ([email protected]).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel ([email protected]).
# The agents here are extensions written by Simon Parsons, based on the code in
# pacmanAgents.py
# This solution to the coursework project, assigned by Simon Parsons, is written by Nghi Bao Le.
# Information on the coursework project is in the README.
#
from pacman import Directions
from game import Agent
import api
import random
import game
import util
import sys
import os
import csv
import numpy as np
from sklearn import tree
#
# ClassifierAgent
#
# An agent that runs a classifier to decide what to do.
class ClassifierAgent(Agent):
# Constructor. This gets run when the agent starts up.
def __init__(self):
print "Initialising"
# Take a string of digits and convert to an array of
# numbers. Exploits the fact that we know the digits are in the
# range 0-4.
#
# There are undoubtedly more elegant and general ways to do this,
# exploiting ASCII codes.
def convertToArray(self, numberString):
numberArray = []
for i in range(len(numberString) - 1):
if numberString[i] == '0':
numberArray.append(0)
elif numberString[i] == '1':
numberArray.append(1)
elif numberString[i] == '2':
numberArray.append(2)
elif numberString[i] == '3':
numberArray.append(3)
elif numberString[i] == '4':
numberArray.append(4)
return numberArray
# General description of Multi-class Support Vector Machine:
#
# We train a Kx(D + 1) weights matrix, where K is the number of classes, and D is the dimensions
# of the feature vector plus the bias dimension. This matrix will evaluate a feature vector
# and give out K scores that are associated with K classes. The class that has the highest
# score will be assigned to that feature vector.
#
# The key thing here is how to train the weights matrix. We will use a formulation
# of the loss function stated here: http://cs231n.github.io/linear-classify/ or
# (Weston and Watkin 1999): https://www.elen.ucl.ac.be/Proceedings/esann/esannpdf/es1999-461.pdf
#
# This formulation in the primal can be optimized using Gradient Descent.
#
# The loss, also called a hinge loss can be generally described as follows:
#
# The function add cost when the scores of the wrong class is close to score of the correct
# class. When the score of the wrong class is differs from the correct class by more than
# a defined margin, the function does not incur cost.
#
# Basically, the training process push to W matrix into assigning higher scores to the
# correct class, and lower scores to the wrong class.
#
# The hinge loss require a hyperparameter which is the margin. The higher the margin the
# more we want the scores of the wrong class differs from the correct class.
#
# First we code out the lost function, and then we optimize the loss using gradient descent
# This is the loss function:
def TotalLoss(self, X, y, W, margin = 1):
# We calculate the score matrix:
score_matrix = np.matmul(W, X.transpose()) # The score_matrix is 4x100
# Every column of score_matrix is a score vector associated with each feature vector
# and every score vector contains four scores, each associated with each class
# Below is the vector of the scores of the correct class for each feature vector
score_correct_class = score_matrix[y, np.arange(score_matrix.shape[1])]
# Now we calculate the loss matrix:
loss = np.maximum(0,(score_matrix - score_correct_class) + margin)
# At this point, the loss matrix is 4x100 with each column correspond to each feature vector
# and each row is the score correspond to each class
# We now need to sum each column but drop the loss associated with the correct class
# We drop those losses by assign their values to 0
loss[y, np.arange(score_matrix.shape[1])] = 0
# We average the sum of each column to get the data loss
return np.mean(np.sum(loss, axis = 0))
# This loss function is the data loss stated in the link: http://cs231n.github.io/linear-classify/
#This is the training function:
def TrainSVMClassifier(self, X, y, learning_rate = 0.001, iterations = 100, margin = 1):
# Save useful information:
number_of_classes = len(np.unique(y))
number_of_dimensions = X.shape[1]
number_of_samples = X.shape[0]
# Initialize random weights matrix with mean 0 and 0.1 standard deviation
self.W = np.random.normal(0, 0.1, (number_of_classes, number_of_dimensions))
# This part to calculate inital loss of the random weights matrix is similar
# to the total loss function above:
score_matrix = np.matmul(self.W, X.transpose())
score_correct_class = score_matrix[y, np.arange(score_matrix.shape[1])]
loss_matrix = np.maximum(0,(score_matrix - score_correct_class) + margin)
loss_matrix[y, np.arange(score_matrix.shape[1])] = 0
##### The gradient of the loss function is as follows:
#
# 1. With respect to the weights of the correct class, we will count the total number of
# classes that does add to the loss function and scale the feature vector by that number.
#
# 2. With respect to the weights of the incorrect classes, if that incorrect class does contribute
# to the loss then the gradient is just the feature vector, if that incorrect does not contribute
# to the loss then the gradient is 0.
#
#####
## First, we calculate gradient with respect to the weights of the correct class:
# This is to simulate the indicator function: any class that adds to the loss will be
# assign a value of 1:
loss_matrix[loss_matrix > 0] = 1
# Now we get the gradient matrix for the weights of the correct class:
gradient_correct_class = - np.sum(loss_matrix, axis= 0) * X.transpose()
# gradient_correct_class is 25x100 matrix of scaled feature vector, which is our gradient for correct class weights.
## Second, for the gradient of the incorrect class weights, the calculation is detailed
# in the 'Update the incorrect class weights' part in the training 'for' loop below
# Make a copy of the weight matrix in order to save the best weights matrix that minimize
# the loss:
self.W_best = np.copy(self.W)
##### The training 'for' loop:
for a in np.arange(iterations):
for i in np.arange(number_of_samples):
#### Update the correct class weights:
# Now to update the correct class weights with gradient_correct_class, we have to pick out for
# each vector the correct weights of the correct class and add them appropriately:
self.W[y[i]] = self.W[y[i]] - learning_rate * gradient_correct_class[:,i]
#### Update the incorrect class weights:
# Notice count_add_loss's row index where the value is 1 tells us which weights vector
# to add the feature vector into. And each column index is associated with each feature vector
#
# So the plan is to create 100 matrix each corresponding to each feature vector, where
# each matrix has the dimension of 4x25, with the feature vector at the appropriate row
# to add in to weights that belongs to the incorrect class
self.W = self.W - learning_rate * np.matmul(loss_matrix[:,[i]], X[[i]])
# Save the best weight matrix
if self.TotalLoss(X, y, self.W) <= self.TotalLoss(X, y, self.W_best):
self.W_best = self.W
def PredictMove(self, x):
# This function accepts a KxD weight matrix, and a feature vector and return
# a number representing a move.
return np.argmax(np.matmul(self.W_best, np.array(x).transpose()))
# This gets run on startup. Has access to state information.
#
# Here we use it to load the training data.
def registerInitialState(self, state):
# open datafile, extract content into an array, and close.
self.datafile = open('good-moves.txt', 'r')
content = self.datafile.readlines()
self.datafile.close()
# Now extract data, which is in the form of strings, into an
# array of numbers, and separate into matched data and target
# variables.
self.data = []
self.target = []
# Turn content into nested lists
for i in range(len(content)):
lineAsArray = self.convertToArray(content[i])
dataline = []
for j in range(len(lineAsArray) - 1):
dataline.append(lineAsArray[j])
self.data.append(dataline)
targetIndex = len(lineAsArray) - 1
self.target.append(lineAsArray[targetIndex])
# data and target are both arrays of arbitrary length.
#
# data is an array of arrays of integers (0 or 1) indicating state.
#
# target is an array of imtegers 0-3 indicating the action
# taken in that state.
# *********************************************
#
# Any other code you want to run on startup goes here.
#
# Convert data to numpy array
self.data = np.array(self.data)
self.target = np.array(self.target)
# Append a bias vector value at the end of each feature vector
self.data = np.append(self.data,np.zeros((len(self.data),1)), axis = 1)
self.TrainSVMClassifier(self.data, self.target)
#
# *********************************************
# Tidy up when Pacman dies
def final(self, state):
print "I'm done!"
# *********************************************
#
# Any code you want to run at the end goes here.
print "My loss is:" + str(self.TotalLoss(self.data, self.target, self.W_best))
#
# *********************************************
# Turn the numbers from the feature set into actions:
def convertNumberToMove(self, number):
if number == 0:
return Directions.NORTH
elif number == 1:
return Directions.EAST
elif number == 2:
return Directions.SOUTH
elif number == 3:
return Directions.WEST
# Here we just run the classifier to decide what to do
def getAction(self, state):
# How we access the features.
features = api.getFeatureVector(state)
# *****************************************************
#
# Here you should insert code to call the classifier to
# decide what to do based on features and use it to decide
# what action to take.
features = np.append(features, 0)
number = self.PredictMove(features)
move = self.convertNumberToMove(number)
#
# *******************************************************
# Get the actions we can try.
legal = api.legalActions(state)
# Add some randomness to not get pacman completely stuck in the corner.
if move not in legal:
move = random.choice(legal)
# getAction has to return a move. Here we pass "STOP" to the
# API to ask Pacman to stay where they are. We need to pass
# the set of legal moves to teh API so it can do some safety
# checking.
return api.makeMove(move, legal)