-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrad_inverter.py
22 lines (19 loc) · 1.37 KB
/
grad_inverter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import tensorflow as tf
class grad_inverter:
def __init__(self, action_bounds, sess): #[lower, upper]
self.sess = sess
self.action_size = len(action_bounds[0])
self.action_input = tf.placeholder(tf.float32, [None, self.action_size])
self.pmax = tf.constant(action_bounds[1], dtype=tf.float32)
self.pmin = tf.constant(action_bounds[0], dtype=tf.float32)
self.prange = tf.constant([upper - lower for lower, upper in zip(action_bounds[0], action_bounds[1])], dtype=tf.float32)
self.pdiff_max = tf.div(-self.action_input + self.pmax, self.prange)
self.pdiff_min = tf.div(self.action_input - self.pmin, self.prange)
self.zeros_act_grad_filter = tf.zeros([self.action_size])
self.act_grad = tf.placeholder(tf.float32, [None, self.action_size])
# self.grad_inverter = tf.select(tf.greater(self.act_grad, self.zeros_act_grad_filter), tf.mul(self.act_grad, self.pdiff_max), tf.mul(self.act_grad, self.pdiff_min))
self.grad_inverter = tf.where(tf.greater(self.act_grad, self.zeros_act_grad_filter),
tf.multiply(self.act_grad, self.pdiff_max),
tf.multiply(self.act_grad, self.pdiff_min))
def invert(self, grad, action):
return self.sess.run(self.grad_inverter, feed_dict={self.action_input: action, self.act_grad: grad})