Merge pull request #19 from arabian9ts/bgr-transfer

BGR transfer
arabian9ts · May 13, 2018 · 995a014 · 995a014 · zxb-silence · Feb 20, 2019
2 parents 8065904 + 771582c
commit 995a014
Show file tree

Hide file tree

Showing 5 changed files with 183 additions and 72 deletions.
diff --git a/inference.py b/inference.py
@@ -0,0 +1,48 @@
+"""
+inference script
+
+date: 3/17
+author: arabian9ts
+"""
+
+import cv2
+import sys
+from util.util import *
+from model.ssd300 import *
+
+def inference(image_name):
+    if image_name is None:
+        return Exception('not specified image name to be drawed')
+
+    fontType = cv2.FONT_HERSHEY_SIMPLEX
+    img, w, h, _, = preprocess('./voc2007/'+image_name)
+    pred_confs, pred_locs = ssd.infer(images=[img])
+    locs, labels = ssd.ssd.detect_objects(pred_confs, pred_locs)
+    img = deprocess(img, w, h)
+    if len(labels) and len(locs):
+        for label, loc in zip(labels, locs):
+            loc = center2corner(loc)
+            loc = convert2diagonal_points(loc)
+            cv2.rectangle(img, (int(loc[0]*w), int(loc[1]*h)), (int(loc[2]*w), int(loc[3]*h)), (0, 0, 255), 1)
+            cv2.putText(img, str(int(label)), (int(loc[0]*w), int(loc[1]*h)), fontType, 0.7, (0, 0, 255), 1)
+
+    return img
+
+
+# detect objects on a specified image.
+if 2 == len(sys.argv):
+    sess = tf.Session()
+    # tensorflow session
+    ssd = SSD300(sess)
+    sess.run(tf.global_variables_initializer())
+
+    # parameter saver
+    saver = tf.train.Saver()
+    saver.restore(sess, './checkpoints/params.ckpt')
+    img = inference(sys.argv[1])
+    cv2.imwrite('./evaluated/'+sys.argv[1], img)
+    cv2.namedWindow("img", cv2.WINDOW_NORMAL)
+    cv2.imshow("img", img)
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
+    sys.exit()
diff --git a/model/SSD300.py b/model/SSD300.py
@@ -47,12 +47,13 @@ def __init__(self, sess):
         # provides matching method
         self.matcher = Matcher(fmap_shapes, self.dboxes)
 
-    # evaluate loss
-    def eval(self, images, actual_data, is_training):
-        if not is_training:
-            feature_maps, pred_confs, pred_locs = self.sess.run(self.pred_set, feed_dict={self.input: images})
-            return pred_confs, pred_locs
+    # inference process
+    def infer(self, images):
+        feature_maps, pred_confs, pred_locs = self.sess.run(self.pred_set, feed_dict={self.input: images})
+        return pred_confs, pred_locs
 
+    # training process
+    def train(self, images, actual_data):
         # ================ RESET / EVAL ================ #
         positives = []
         negatives = []
@@ -96,4 +97,4 @@ def prepare_loss(pred_confs, pred_locs, actual_labels, actual_locs):
         self.sess.run(self.train_step, \
         feed_dict={self.input: images, self.pos: positives, self.neg: negatives, self.gt_labels: ex_gt_labels, self.gt_boxes: ex_gt_boxes})
 
-        return pred_confs, pred_locs, batch_loc, batch_conf, batch_loss
+        return pred_confs, pred_locs, batch_loc, batch_conf, batch_loss
diff --git a/model/ssd300.py b/model/ssd300.py
@@ -0,0 +1,100 @@
+"""
+SSD300 is SSD wrapper class.
+
+date: 10/18
+author: arabian9ts
+"""
+
+import tensorflow as tf
+import numpy as np
+
+from model.ssd import *
+from matcher import Matcher
+from model.computation import *
+from model.default_box import *
+
+
+class SSD300:
+    def __init__(self, sess):
+        """
+        initialize SSD model as SSD300 whose input size is  300x300
+        """
+        self.sess = sess
+
+        # define input placeholder and initialize ssd instance
+        self.input = tf.placeholder(shape=[None, 300, 300, 3], dtype=tf.float32)
+        self.ssd = SSD()
+
+        # build ssd network => feature-maps and confs and locs tensor is returned
+        fmaps, confs, locs = self.ssd.build(self.input, is_training=True)
+
+        # zip running set of tensor
+        self.pred_set = [fmaps, confs, locs]
+
+        # required param from default-box and loss function
+        fmap_shapes = [map.get_shape().as_list() for map in fmaps]
+        # print('fmap shapes is '+str(fmap_shapes))
+        self.dboxes = generate_boxes(fmap_shapes)
+        print(len(self.dboxes))
+
+        # required placeholder for loss
+        loss, loss_conf, loss_loc, self.pos, self.neg, self.gt_labels, self.gt_boxes = self.ssd.loss(len(self.dboxes))
+        self.train_set = [loss, loss_conf, loss_loc]
+        # optimizer = tf.train.AdamOptimizer(0.05)
+        optimizer = tf.train.AdamOptimizer(learning_rate=1e-3, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False, name='Adam')
+        self.train_step = optimizer.minimize(loss)
+
+        # provides matching method
+        self.matcher = Matcher(fmap_shapes, self.dboxes)
+
+    # inference process
+    def infer(self, images):
+        feature_maps, pred_confs, pred_locs = self.sess.run(self.pred_set, feed_dict={self.input: images})
+        return pred_confs, pred_locs
+
+    # training process
+    def train(self, images, actual_data):
+        # ================ RESET / EVAL ================ #
+        positives = []
+        negatives = []
+        ex_gt_labels = []
+        ex_gt_boxes = []
+        # ===================== END ===================== #
+
+        # call prepare_loss per image
+        # because matching method works with only one image
+        def prepare_loss(pred_confs, pred_locs, actual_labels, actual_locs):
+            pos_list, neg_list, t_gtl, t_gtb = self.matcher.matching(pred_confs, pred_locs, actual_labels, actual_locs)
+            positives.append(pos_list)
+            negatives.append(neg_list)
+            ex_gt_labels.append(t_gtl)
+            ex_gt_boxes.append(t_gtb)
+
+
+        feature_maps, pred_confs, pred_locs = self.sess.run(self.pred_set, feed_dict={self.input: images})
+
+        for i in range(len(images)):
+            actual_labels = []
+            actual_locs = []
+            # extract ground truth info
+            for obj in actual_data[i]:
+                loc = obj[:4]
+                label = np.argmax(obj[4:])
+
+                # transform location for voc2007
+                loc = convert2wh(loc)
+                loc = corner2center(loc)
+
+                actual_locs.append(loc)
+                actual_labels.append(label)
+
+            prepare_loss(pred_confs[i], pred_locs[i], actual_labels, actual_locs)
+
+        batch_loss, batch_conf, batch_loc = \
+        self.sess.run(self.train_set, \
+        feed_dict={self.input: images, self.pos: positives, self.neg: negatives, self.gt_labels: ex_gt_labels, self.gt_boxes: ex_gt_boxes})
+
+        self.sess.run(self.train_step, \
+        feed_dict={self.input: images, self.pos: positives, self.neg: negatives, self.gt_labels: ex_gt_labels, self.gt_boxes: ex_gt_boxes})
+
+        return pred_confs, pred_locs, batch_loc, batch_conf, batch_loss
diff --git a/train.py → trainer.py b/train.py → trainer.py
@@ -13,8 +13,6 @@
 import os
 os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 
-import cv2
-import sys
 import datetime
 import tensorflow as tf
 import numpy as np
@@ -24,11 +22,11 @@
 
 from util.util import *
 from tqdm import trange
-from model.SSD300 import *
+from model.ssd300 import *
 
 # ====================== Training Parameters ====================== #
 BATCH_SIZE = 10
-EPOCH = 100
+EPOCH = 200
 EPOCH_LOSSES = []
 SHUFFLED_INDECES = []
 # ============================== END ============================== #
@@ -57,58 +55,21 @@ def next_batch():
         for idx in indices:
             # make images mini batch
 
-            img = load_image('voc2007/'+keys[idx])
+            img, _, _, _, = preprocess('voc2007/'+keys[idx])
 
             actual_data.append(data[keys[idx]])
             mini_batch.append(img)
 
         buff.append((mini_batch, actual_data))
 
 
-    def draw_marker(image_name, save):
-        if image_name is None:
-            return Exception('not specified image name to be drawed')
-
-        img = cv2.imread('./voc2007/'+image_name, 1)
-        h = img.shape[0]
-        w = img.shape[1]
-        fontType = cv2.FONT_HERSHEY_SIMPLEX
-        reshaped = cv2.resize(img, (300, 300))
-        reshaped = reshaped / 255
-        pred_confs, pred_locs = ssd.eval(images=[reshaped], actual_data=None, is_training=False)
-        locs, labels = ssd.ssd.detect_objects(pred_confs, pred_locs)
-        if len(labels) and len(locs):
-            for label, loc in zip(labels, locs):
-                loc = center2corner(loc)
-                loc = convert2diagonal_points(loc)
-                cv2.rectangle(img, (int(loc[0]*w), int(loc[1]*h)), (int(loc[2]*w), int(loc[3]*h)), (0, 0, 255), 1)
-                cv2.putText(img, str(int(label)), (int(loc[0]*w), int(loc[1]*h)), fontType, 0.7, (0, 0, 255), 1)
-
-        if save:
-            if not os.path.exists('./evaluated'):
-                os.mkdir('./evaluated')
-            cv2.imwrite('./evaluated/'+image_name, img)
-
-        return img
-
-
     # tensorflow session
     ssd = SSD300(sess)
     sess.run(tf.global_variables_initializer())
 
     # parameter saver
     saver = tf.train.Saver()
 
-    # eval and predict object on a specified image.
-    if 2 == len(sys.argv):
-        saver.restore(sess, './checkpoints/params.ckpt')
-        img = draw_marker(sys.argv[1], save=False)
-        cv2.namedWindow("img", cv2.WINDOW_NORMAL)
-        cv2.imshow("img", img)
-        cv2.waitKey(0)
-        cv2.destroyAllWindows()
-        sys.exit()
-
     # saver.restore(sess, './checkpoints/params.ckpt')
 
     SHUFFLED_INDECES = list(np.random.permutation(len(keys)))
@@ -124,30 +85,17 @@ def draw_marker(image_name, save):
         for ba in trange(BATCH):
             batch, actual = buff.pop(0)
             threading.Thread(name='load', target=next_batch).start()
-            _, _, batch_loc, batch_conf, batch_loss = ssd.eval(batch, actual, True)
+            _, _, batch_loc, batch_conf, batch_loss = ssd.train(batch, actual)
             BATCH_LOSSES.append(batch_loss)
 
             # print('BATCH: {0} / EPOCH: {1}, LOSS: {2}'.format(ba+1, ep+1, batch_loss))
         EPOCH_LOSSES.append(np.mean(BATCH_LOSSES))
         print('\n*** AVERAGE: '+str(EPOCH_LOSSES[-1])+' ***')
-
         saver.save(sess, './checkpoints/params.ckpt')
-
-
-        print('\n*** TEST ***')
-        id = np.random.choice(len(keys))
-        name = keys[id]
-        draw_marker(image_name=name, save=True)
-        print('\nSaved Evaled Image')
-
-
         print('\n========== EPOCH: '+str(ep+1)+' END ==========')
 
     print('\nEND LEARNING')
 
-
-    saver.save(sess, './params_final.ckpt')
-
     plt.xlabel('Epoch')
     plt.ylabel('Loss')
     plt.plot(np.array(range(EPOCH)), EPOCH_LOSSES)

diff --git a/util/util.py b/util/util.py
@@ -6,19 +6,33 @@
 author: arabian9ts
 """
 
-import numpy
-import skimage
-import skimage.io
-import skimage.transform
+import numpy as np
+from scipy.misc import imread, imresize
 
-def load_image(path):
+def preprocess(path):
     """
     load specified image
 
     Args: image path
-    Return: resized image
+    Return: resized image, its size and channel
     """
-    img = skimage.io.imread(path)
-    img = img / 255.
-    resized_img = skimage.transform.resize(img, (300, 300))
-    return numpy.array(resized_img, dtype=numpy.float32)
+    img = imread(path)
+    h, w, c = img.shape
+    img = imresize(img, (300, 300))
+    img = img[:, :, ::-1].astype('float32')
+    img /= 255.
+    return img, w, h, c
+
+
+def deprocess(x, w, h):
+    """
+    restore processed image
+
+    Args: processed image
+    Return: restored image
+    """
+    # x = x[:, :, ::-1]
+    x *= 255.
+    x = np.clip(x, 0, 255).astype('uint8')
+    x = imresize(x, (h, w))
+    return x