diff --git a/Dockerfile b/Dockerfile index ff33db0..b62e132 100755 --- a/Dockerfile +++ b/Dockerfile @@ -21,7 +21,7 @@ ENV LIBRARY_PATH /usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/local/cuda/l # python3 modules RUN wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py && \ - pip3 install --upgrade --no-cache-dir wheel six setuptools cython numpy scipy==1.2.0 \ + pip3 install --upgrade --no-cache-dir wheel six setuptools cython numpy imageio \ matplotlib seaborn scikit-learn scikit-image pillow requests \ jupyterlab networkx h5py pandas plotly protobuf tqdm tensorboardX colorama setproctitle && \ pip3 install https://download.pytorch.org/whl/cu90/torch-1.0.0-cp35-cp35m-linux_x86_64.whl diff --git a/README.md b/README.md index 10d3583..6940e5d 100755 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ L1 and L2 losses with multi-scale support are available in [losses.py](./losses. Currently, the code supports python 3 * numpy * PyTorch ( == 0.4.1, for <= 0.4.0 see branch [python36-PyTorch0.4](https://github.com/NVIDIA/flownet2-pytorch/tree/python36-PyTorch0.4)) -* scipy +* imageio * scikit-image * tensorboardX * colorama, tqdm, setproctitle diff --git a/datasets.py b/datasets.py index ebb7bcc..89ad7f9 100755 --- a/datasets.py +++ b/datasets.py @@ -8,7 +8,7 @@ from glob import glob import utils.frame_utils as frame_utils -from scipy.misc import imread, imresize +from imageio import imread class StaticRandomCrop(object): def __init__(self, image_size, crop_size): diff --git a/run_a_pair.py b/run_a_pair.py index 0e6aea2..ebec5ca 100644 --- a/run_a_pair.py +++ b/run_a_pair.py @@ -2,44 +2,29 @@ import numpy as np import argparse -from Networks.FlowNet2 import FlowNet2 # the path is depended on where you create this module -from frame_utils import read_gen # the path is depended on where you create this module +from models import FlowNet2 +from utils.frame_utils import read_gen -if __name__ == '__main__': - # obtain the necessary args for construct the flownet framework - parser = argparse.ArgumentParser() - parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') - parser.add_argument("--rgb_max", type=float, default=255.) - - args = parser.parse_args() +class Args(): + fp16 = False + rgb_max = 255. +def get_flow(img1, img2, weights): # initial a Net + args = Args() net = FlowNet2(args).cuda() # load the state_dict - dict = torch.load("/home/hjj/PycharmProjects/flownet2_pytorch/FlowNet2_checkpoint.pth.tar") + dict = torch.load(weights) net.load_state_dict(dict["state_dict"]) # load the image pair, you can find this operation in dataset.py - pim1 = read_gen("/home/hjj/flownet2-master/data/FlyingChairs_examples/0000007-img0.ppm") - pim2 = read_gen("/home/hjj/flownet2-master/data/FlyingChairs_examples/0000007-img1.ppm") + pim1 = read_gen(img1) + pim2 = read_gen(img2) images = [pim1, pim2] images = np.array(images).transpose(3, 0, 1, 2) im = torch.from_numpy(images.astype(np.float32)).unsqueeze(0).cuda() # process the image pair to obtian the flow result = net(im).squeeze() - - - # save flow, I reference the code in scripts/run-flownet.py in flownet2-caffe project - def writeFlow(name, flow): - f = open(name, 'wb') - f.write('PIEH'.encode('utf-8')) - np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f) - flow = flow.astype(np.float32) - flow.tofile(f) - f.flush() - f.close() - - data = result.data.cpu().numpy().transpose(1, 2, 0) - writeFlow("/home/hjj/flownet2-master/data/FlyingChairs_examples/0000007-img.flo", data) + return data diff --git a/utils/frame_utils.py b/utils/frame_utils.py old mode 100755 new mode 100644 index 9294f7b..f5389e1 --- a/utils/frame_utils.py +++ b/utils/frame_utils.py @@ -1,6 +1,6 @@ import numpy as np from os.path import * -from scipy.misc import imread +from imageio import imread from . import flow_utils def read_gen(file_name): @@ -16,3 +16,134 @@ def read_gen(file_name): elif ext == '.flo': return flow_utils.readFlow(file_name).astype(np.float32) return [] + +UNKNOWN_FLOW_THRESH = 1e7 +def flow_to_image(flow): + """ + Convert flow into middlebury color code image + :param flow: optical flow map + :return: optical flow image in middlebury color + """ + u = flow[:, :, 0] + v = flow[:, :, 1] + + maxu = -999. + maxv = -999. + minu = 999. + minv = 999. + + idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH) + u[idxUnknow] = 0 + v[idxUnknow] = 0 + + maxu = max(maxu, np.max(u)) + minu = min(minu, np.min(u)) + + maxv = max(maxv, np.max(v)) + minv = min(minv, np.min(v)) + + rad = np.sqrt(u ** 2 + v ** 2) + maxrad = max(-1, np.max(rad)) + + u = u/(maxrad + np.finfo(float).eps) + v = v/(maxrad + np.finfo(float).eps) + + img = compute_color(u, v) + + idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2) + img[idx] = 0 + + return np.uint8(img) + +def compute_color(u, v): + """ + compute optical flow color map + :param u: optical flow horizontal map + :param v: optical flow vertical map + :return: optical flow in color code + """ + [h, w] = u.shape + img = np.zeros([h, w, 3]) + nanIdx = np.isnan(u) | np.isnan(v) + u[nanIdx] = 0 + v[nanIdx] = 0 + + colorwheel = make_color_wheel() + ncols = np.size(colorwheel, 0) + + rad = np.sqrt(u**2+v**2) + + a = np.arctan2(-v, -u) / np.pi + + fk = (a+1) / 2 * (ncols - 1) + 1 + + k0 = np.floor(fk).astype(int) + + k1 = k0 + 1 + k1[k1 == ncols+1] = 1 + f = fk - k0 + + for i in range(0, np.size(colorwheel,1)): + tmp = colorwheel[:, i] + col0 = tmp[k0-1] / 255 + col1 = tmp[k1-1] / 255 + col = (1-f) * col0 + f * col1 + + idx = rad <= 1 + col[idx] = 1-rad[idx]*(1-col[idx]) + notidx = np.logical_not(idx) + + col[notidx] *= 0.75 + img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx))) + + return img + + +def make_color_wheel(): + """ + Generate color wheel according Middlebury color code + :return: Color wheel + """ + RY = 15 + YG = 6 + GC = 4 + CB = 11 + BM = 13 + MR = 6 + + ncols = RY + YG + GC + CB + BM + MR + + colorwheel = np.zeros([ncols, 3]) + + col = 0 + + # RY + colorwheel[0:RY, 0] = 255 + colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY)) + col += RY + + # YG + colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG)) + colorwheel[col:col+YG, 1] = 255 + col += YG + + # GC + colorwheel[col:col+GC, 1] = 255 + colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC)) + col += GC + + # CB + colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB)) + colorwheel[col:col+CB, 2] = 255 + col += CB + + # BM + colorwheel[col:col+BM, 2] = 255 + colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM)) + col += + BM + + # MR + colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR)) + colorwheel[col:col+MR, 0] = 255 + + return colorwheel