-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenv.py
66 lines (52 loc) · 1.9 KB
/
env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#Define Environment
import gym
from gym import spaces
import numpy as np
class SineEnv(gym.Env):
#"""Custom Environment that follows gym interface"""
#metadata = {'render.modes': ['human']}
def __init__(self, num_actions=1, num_states=2, lower_action_bound = -1 , higher_action_bound = 1):
super(SineEnv, self).__init__( )
self.val = 0
self.reward = 0
self.episode_lenght = 100
self.current_step = 0
#Define action and observation space
#They must be gym.spaces objects
#actions we can take: down, stay, up
#self.action_space = spaces.Discrete(3)
#actions we can take: numerical value between action_bound
self.action_space = spaces.Box(low=lower_action_bound, high=higher_action_bound, shape=(num_actions,))
# Example for using image as input:
#self.observation_space = spaces.Discrete(0)
#1-D number
self.observation_space = spaces.Box(low=0, high=255, shape=(num_states,))
# (HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8)
def step(self, action, sine_value):
# Execute one time step within the environment
self.current_step += 1
obs = sine_value
self.val = self.val + action
if obs != self.val:
self.reward = 1/np.power((obs - self.val),2)
elif obs == self.val: #avoid singularity
print("val == obs")
self.reward = 10000
else:
print("this should never be printed 1")
if self.current_step >= self.episode_lenght:
done = True
elif self.current_step < self.episode_lenght:
done = False
else:
print("this should never be printed 2")
info = done
return obs, self.val, self.reward, done, info
def reset(self):
# Reset the state of the environment to an initial state
self.state = (0,0)
self.current_step = 0
self.reward = 0
return np.array(self.state)
def render(self, mode='human', close=False):
print(self.reward)