From 5e6cfbda10902eda5a05cc2e4ebe58dad5727a3a Mon Sep 17 00:00:00 2001 From: Shin Donghwan Date: Tue, 4 Jun 2019 18:18:59 +0900 Subject: [PATCH 01/31] Update README.md Add "TODO" 1. Edit README 2. Reconstruct Repository 3. Add KTS(Kernel Temporal Segmentation) --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1272153..ca93882 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,11 @@ Please remember to specify the naming format of your video frames on this [line] ## How to use your own data We preprocess data by extracting image features for videos and save them to `h5` file. The file format looks like [this](https://github.com/KaiyangZhou/vsumm-reinforce/issues/1#issuecomment-363492711). After that, you can make split via `create_split.py`. If you wanna train policy network using the entire dataset, just do `train_keys = dataset.keys()`. [Here](https://github.com/KaiyangZhou/pytorch-vsumm-reinforce/blob/master/main.py#L75) is the code where we initialize dataset. If you have any problems, feel free to contact me by email or raise an `issue`. +## TODO +1. Edit README +2. Reconstruct Repository +3. Add KTS(Kernel Temporal Segmentation) + ## Citation ``` @article{zhou2017reinforcevsumm, @@ -81,4 +86,4 @@ We preprocess data by extracting image features for videos and save them to `h5` journal={arXiv:1801.00054}, year={2017} } -``` \ No newline at end of file +``` From c9eac2801e8033c7d312b6a9441175bd284cd6bc Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 18:42:47 +0900 Subject: [PATCH 02/31] Commit: CNN --- networks/CNN.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 networks/CNN.py diff --git a/networks/CNN.py b/networks/CNN.py new file mode 100644 index 0000000..0695e5e --- /dev/null +++ b/networks/CNN.py @@ -0,0 +1,48 @@ +import torch.nn as nn +from torchvision import transforms, models +from torch.autograd import Variable + +""" +pre-trained ResNet +""" + +class ResNet(nn.Module): + """ + Args: + fea_type: string, resnet101 or resnet 152 + """ + + def __init__(self, fea_type = 'resnet152'): + super(ResNet, self).__init__() + self.fea_type = fea_type + # rescale and normalize transformation + self.transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + + if fea_type == 'resnet101': + resnet = models.resnet101(pretrained=True) # dim of pool5 is 2048 + elif fea_type == 'resnet152': + resnet = models.resnet152(pretrained=True) + else: + raise Exception('No such ResNet!') + + resnet.float() + resnet.cuda() + resnet.eval() + + module_list = list(resnet.children()) + self.conv5 = nn.Sequential(*module_list[: -2]) + self.pool5 = module_list[-2] + + # rescale and normalize image, then pass it through ResNet + def forward(self, x): + x = self.transform(x) + x = x.unsqueeze(0) # reshape the single image s.t. it has a batch dim + x = Variable(x).cuda() + res_conv5 = self.conv5(x) + res_pool5 = self.pool5(res_conv5) + res_pool5 = res_pool5.view(res_pool5.size(0), -1) + + return res_pool5 \ No newline at end of file From d7c6c514f5729394a49650c4baa5890e3b62e1d0 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 18:43:00 +0900 Subject: [PATCH 03/31] Commit: DSN --- networks/DSN.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 networks/DSN.py diff --git a/networks/DSN.py b/networks/DSN.py new file mode 100644 index 0000000..9b02433 --- /dev/null +++ b/networks/DSN.py @@ -0,0 +1,25 @@ +import torch as T +import torch.nn as nn +from torch.nn import functional as F + +__all__ = ['DSN'] + +class DSN(nn.Module): + """ Deep Summarization Network """ + + def __init__(self, in_dim=1024, hid_dim=256, num_layers=1, cell='lstm'): + super(DSN, self).__init__() + assert cell in ['lstm', 'gru'], "cell must be either 'lstm' or 'gru" + + if cell == 'lstm': + self.rnn = nn.LSTM(in_dim, hid_dim, num_layers=num_layers, bidirectional=True, batch_first=True) + elif cell == 'gru': + self.rnn = nn.GRU(in_dim, hid_dim, num_layers=num_layers, bidirectional=True, batch_first=True) + + self.fc = nn.Linear(hid_dim*2, 1) + + def forward(self, x): + h, _ = self.rnn(x) + p = T.sigmoid(self.fc(h)) + + return p \ No newline at end of file From 0390ab2faa97db79a68830cd584bcea2da6e6ea5 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 18:43:11 +0900 Subject: [PATCH 04/31] Commit: RL --- networks/RL.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 networks/RL.py diff --git a/networks/RL.py b/networks/RL.py new file mode 100644 index 0000000..a3c5cb3 --- /dev/null +++ b/networks/RL.py @@ -0,0 +1,67 @@ +import torch + +def compute_reward(seq, actions, ignore_far_sim=True, temp_dist_thre=20, use_gpu=False): + """ + Compute Diversity reward and Representativeness reward + + Args: + seq: sequence of features, shape (1, seq_len, dim) + actions: binary action sequence, shape (1, seq_len, 1) + ignore_far_sim (bool): whether to ignore temporally distant similarity (default: True) + temp_dist_thre (int): threshold for ignoring temporally distant similarity (default: 20) + use_gpu (bool): whether to use GPU + + """ + + _seq = seq.detach() + _actions = actions.detach() + + # get selected frames indices + pick_indices = actions.squeeze().nonzero().squeeze() + num_picks = len(pick_indices) if pick_indices.ndimension() > 0 else 1 + + if num_picks == 0: + # give zero reward is no frames are selected + reward = torch.tensor(0, ) + if use_gpu: reward = reward.cuda() + return reward + + _seq = seq.squeeze() + n = _seq.size(0) + + # compute diversity reward + # Rdiv = 1 / (Y * (Y-1)) * SUM(SUM( d(xt,xt') )) + # d(xt,xt') = 1 - ( xtT*xt' / (||xt|| * ||xt'||) ) + if num_picks == 1: + reward_div = torch.tensor(0, ) + if use_gpu: reward_div.cuda() + + else: + normed_seq = _seq / _seq.norm(p=2, dim=1, keepdim=True) + dissim_mat = 1 - torch.matmul(normed_seq, normed_seq.t()) # dissimilarity matrix [Eq.4] + + # Y : Selected frames indices + # pick_idx : Y + dissim_submat = dissim_mat[pick_indices, :][: ,pick_indices] + + if ignore_far_sim: + # ignore temporally distant similarity + pick_mat = pick_indices.expand(num_picks, num_picks) + temp_dist_mat = torch.abs(pick_mat - pick_mat.t()) + dissim_submat[temp_dist_mat > temp_dist_thre] = 1. + + reward_div = dissim_submat.sum() / (num_picks* (num_picks - 1.)) # diversity reward [Eq.3] + + # compute representativeness reward + dist_mat = torch.pow(_seq, 2).sum(dim=1, keepdim=True).expand(n, n) + dist_mat = dist_mat + dist_mat.t() + dist_mat.addmm_(1, -2, _seq, _seq.t()) + + dist_mat = dist_mat[:, pick_indices] + dist_mat = dist_mat.min(1, keepdim=True)[0] + + reward_rep = torch.exp(-dist_mat.mean()) # representativeness reward [Eq.5] + + reward = (reward_div + reward_rep) * 0.5 + + return reward \ No newline at end of file From 805cd41764a7f1ba35d1448b38c92cf1f5b1d592 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 18:43:29 +0900 Subject: [PATCH 05/31] Commit: Config --- config/config.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 config/config.py diff --git a/config/config.py b/config/config.py new file mode 100644 index 0000000..fdf5d36 --- /dev/null +++ b/config/config.py @@ -0,0 +1,32 @@ +# ============ TRAIN CONFIG ============== +# Dataset options +DATASET = 'summe_dataset.h5' # path to h5 dataset (required) +SPLIT = 'datasets/summe_splits.json' # path to split file (required) +SPLIT_ID = 0 # split index (default: 0) +METRIC = 'summe' # evaluation metric ['tvsum', 'summe']) + +# Model options +INPUT_DIM = 1024 # input dimension (default: 1024) +HIDDEN_DIM = 256 # hidden unit dimension of DSN (default: 256) +NUM_LAYERS = 1 # number of RNN layers (default: 1) +RNN_CELL = 'lstm' # RNN cell type (default: lstm) + +# Optimization options +LR = 1e-05 # learning rate (default: 1e-05) +WEIGHT_DECAY = 1e-05 # weight decay rate (default: 1e-05) +MAX_EPOCH = 60 # maximum epoch for training (default: 60) +STEP_SIZE = 30 # how many steps to decay learning rate (default: 30) +GAMMA = 0.1 # learning rate decay (default: 0.1) +NUM_EPISODE = 5 # number of episodes (default: 5) +BETA = 0.01 # weight for summary length penalty term (default: 0.01) + +# Misc +SEED = 1 # random seed (default: 1) +GPU = '0' # which gpu devices to use (default: 0) +USE_CPU = False # use cpu device +EVALUATE = False # whether to do evaluation only +TEST = True # whether to do evaluation only +RESUME = False # path to resume file +VERBOSE = True # whether to show detailed test results +SAVE_DIR = 'log/summe-split0' # path to save output (default: log/) +SAVE_RESULTS = True # whether to save output results \ No newline at end of file From 8fe18d5f3432666a4c333384c7daad2ba351b074 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 18:44:43 +0900 Subject: [PATCH 06/31] Delete: to reconstruct --- create_split.py | 60 ------------- knapsack.py | 77 ---------------- main.py | 208 ------------------------------------------- models.py | 21 ----- parse_json.py | 34 ------- parse_json.sh | 11 --- parse_log.py | 43 --------- rewards.py | 56 ------------ summary2video.py | 44 --------- utils.py | 94 ------------------- visualize_results.py | 38 -------- vsum_tools.py | 113 ----------------------- 12 files changed, 799 deletions(-) delete mode 100644 create_split.py delete mode 100755 knapsack.py delete mode 100644 main.py delete mode 100644 models.py delete mode 100644 parse_json.py delete mode 100644 parse_json.sh delete mode 100644 parse_log.py delete mode 100644 rewards.py delete mode 100644 summary2video.py delete mode 100755 utils.py delete mode 100644 visualize_results.py delete mode 100644 vsum_tools.py diff --git a/create_split.py b/create_split.py deleted file mode 100644 index e7559a4..0000000 --- a/create_split.py +++ /dev/null @@ -1,60 +0,0 @@ -from __future__ import print_function -import os -import os.path as osp -import argparse -import h5py -import math -import numpy as np - -from utils import write_json - -parser = argparse.ArgumentParser("Code to create splits in json form") -parser.add_argument('-d', '--dataset', type=str, required=True, help="path to h5 dataset (required)") -parser.add_argument('--save-dir', type=str, default='datasets', help="path to save output json file (default: 'datasets/')") -parser.add_argument('--save-name', type=str, default='splits', help="name to save as, excluding extension (default: 'splits')") -parser.add_argument('--num-splits', type=int, default=5, help="how many splits to generate (default: 5)") -parser.add_argument('--train-percent', type=float, default=0.8, help="percentage of training data (default: 0.8)") - -args = parser.parse_args() - -def split_random(keys, num_videos, num_train): - """Random split""" - train_keys, test_keys = [], [] - rnd_idxs = np.random.choice(range(num_videos), size=num_train, replace=False) - for key_idx, key in enumerate(keys): - if key_idx in rnd_idxs: - train_keys.append(key) - else: - test_keys.append(key) - - assert len(set(train_keys) & set(test_keys)) == 0, "Error: train_keys and test_keys overlap" - - return train_keys, test_keys - -def create(): - print("==========\nArgs:{}\n==========".format(args)) - print("Goal: randomly split data for {} times, {:.1%} for training and the rest for testing".format(args.num_splits, args.train_percent)) - print("Loading dataset from {}".format(args.dataset)) - dataset = h5py.File(args.dataset, 'r') - keys = dataset.keys() - num_videos = len(keys) - num_train = int(math.ceil(num_videos * args.train_percent)) - num_test = num_videos - num_train - print("Split breakdown: # total videos {}. # train videos {}. # test videos {}".format(num_videos, num_train, num_test)) - splits = [] - - for split_idx in range(args.num_splits): - train_keys, test_keys = split_random(keys, num_videos, num_train) - splits.append({ - 'train_keys': train_keys, - 'test_keys': test_keys, - }) - - saveto = osp.join(args.save_dir, args.save_name + '.json') - write_json(splits, saveto) - print("Splits saved to {}".format(saveto)) - - dataset.close() - -if __name__ == '__main__': - create() \ No newline at end of file diff --git a/knapsack.py b/knapsack.py deleted file mode 100755 index 633d6ce..0000000 --- a/knapsack.py +++ /dev/null @@ -1,77 +0,0 @@ -import numpy as np - -''' ------------------------------------------------- -Use dynamic programming (DP) to solve 0/1 knapsack problem -Time complexity: O(nW), where n is number of items and W is capacity - -Author: Kaiyang Zhou -Website: https://kaiyangzhou.github.io/ ------------------------------------------------- -knapsack_dp(values,weights,n_items,capacity,return_all=False) - -Input arguments: - 1. values: a list of numbers in either int or float, specifying the values of items - 2. weights: a list of int numbers specifying weights of items - 3. n_items: an int number indicating number of items - 4. capacity: an int number indicating the knapsack capacity - 5. return_all: whether return all info, defaulty is False (optional) - -Return: - 1. picks: a list of numbers storing the positions of selected items - 2. max_val: maximum value (optional) ------------------------------------------------- -''' -def knapsack_dp(values,weights,n_items,capacity,return_all=False): - check_inputs(values,weights,n_items,capacity) - - table = np.zeros((n_items+1,capacity+1),dtype=np.float32) - keep = np.zeros((n_items+1,capacity+1),dtype=np.float32) - - for i in xrange(1,n_items+1): - for w in xrange(0,capacity+1): - wi = weights[i-1] # weight of current item - vi = values[i-1] # value of current item - if (wi <= w) and (vi + table[i-1,w-wi] > table[i-1,w]): - table[i,w] = vi + table[i-1,w-wi] - keep[i,w] = 1 - else: - table[i,w] = table[i-1,w] - - picks = [] - K = capacity - - for i in xrange(n_items,0,-1): - if keep[i,K] == 1: - picks.append(i) - K -= weights[i-1] - - picks.sort() - picks = [x-1 for x in picks] # change to 0-index - - if return_all: - max_val = table[n_items,capacity] - return picks,max_val - return picks - -def check_inputs(values,weights,n_items,capacity): - # check variable type - assert(isinstance(values,list)) - assert(isinstance(weights,list)) - assert(isinstance(n_items,int)) - assert(isinstance(capacity,int)) - # check value type - assert(all(isinstance(val,int) or isinstance(val,float) for val in values)) - assert(all(isinstance(val,int) for val in weights)) - # check validity of value - assert(all(val >= 0 for val in weights)) - assert(n_items > 0) - assert(capacity > 0) - -if __name__ == '__main__': - values = [2,3,4] - weights = [1,2,3] - n_items = 3 - capacity = 3 - picks = knapsack_dp(values,weights,n_items,capacity) - print picks diff --git a/main.py b/main.py deleted file mode 100644 index 19c642d..0000000 --- a/main.py +++ /dev/null @@ -1,208 +0,0 @@ -from __future__ import print_function -import os -import os.path as osp -import argparse -import sys -import h5py -import time -import datetime -import numpy as np -from tabulate import tabulate - -import torch -import torch.nn as nn -import torch.backends.cudnn as cudnn -from torch.optim import lr_scheduler -from torch.distributions import Bernoulli - -from utils import Logger, read_json, write_json, save_checkpoint -from models import * -from rewards import compute_reward -import vsum_tools - -parser = argparse.ArgumentParser("Pytorch code for unsupervised video summarization with REINFORCE") -# Dataset options -parser.add_argument('-d', '--dataset', type=str, required=True, help="path to h5 dataset (required)") -parser.add_argument('-s', '--split', type=str, required=True, help="path to split file (required)") -parser.add_argument('--split-id', type=int, default=0, help="split index (default: 0)") -parser.add_argument('-m', '--metric', type=str, required=True, choices=['tvsum', 'summe'], - help="evaluation metric ['tvsum', 'summe']") -# Model options -parser.add_argument('--input-dim', type=int, default=1024, help="input dimension (default: 1024)") -parser.add_argument('--hidden-dim', type=int, default=256, help="hidden unit dimension of DSN (default: 256)") -parser.add_argument('--num-layers', type=int, default=1, help="number of RNN layers (default: 1)") -parser.add_argument('--rnn-cell', type=str, default='lstm', help="RNN cell type (default: lstm)") -# Optimization options -parser.add_argument('--lr', type=float, default=1e-05, help="learning rate (default: 1e-05)") -parser.add_argument('--weight-decay', type=float, default=1e-05, help="weight decay rate (default: 1e-05)") -parser.add_argument('--max-epoch', type=int, default=60, help="maximum epoch for training (default: 60)") -parser.add_argument('--stepsize', type=int, default=30, help="how many steps to decay learning rate (default: 30)") -parser.add_argument('--gamma', type=float, default=0.1, help="learning rate decay (default: 0.1)") -parser.add_argument('--num-episode', type=int, default=5, help="number of episodes (default: 5)") -parser.add_argument('--beta', type=float, default=0.01, help="weight for summary length penalty term (default: 0.01)") -# Misc -parser.add_argument('--seed', type=int, default=1, help="random seed (default: 1)") -parser.add_argument('--gpu', type=str, default='0', help="which gpu devices to use") -parser.add_argument('--use-cpu', action='store_true', help="use cpu device") -parser.add_argument('--evaluate', action='store_true', help="whether to do evaluation only") -parser.add_argument('--save-dir', type=str, default='log', help="path to save output (default: 'log/')") -parser.add_argument('--resume', type=str, default='', help="path to resume file") -parser.add_argument('--verbose', action='store_true', help="whether to show detailed test results") -parser.add_argument('--save-results', action='store_true', help="whether to save output results") - -args = parser.parse_args() - -torch.manual_seed(args.seed) -os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu -use_gpu = torch.cuda.is_available() -if args.use_cpu: use_gpu = False - -def main(): - if not args.evaluate: - sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) - else: - sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) - print("==========\nArgs:{}\n==========".format(args)) - - if use_gpu: - print("Currently using GPU {}".format(args.gpu)) - cudnn.benchmark = True - torch.cuda.manual_seed_all(args.seed) - else: - print("Currently using CPU") - - print("Initialize dataset {}".format(args.dataset)) - dataset = h5py.File(args.dataset, 'r') - num_videos = len(dataset.keys()) - splits = read_json(args.split) - assert args.split_id < len(splits), "split_id (got {}) exceeds {}".format(args.split_id, len(splits)) - split = splits[args.split_id] - train_keys = split['train_keys'] - test_keys = split['test_keys'] - print("# total videos {}. # train videos {}. # test videos {}".format(num_videos, len(train_keys), len(test_keys))) - - print("Initialize model") - model = DSN(in_dim=args.input_dim, hid_dim=args.hidden_dim, num_layers=args.num_layers, cell=args.rnn_cell) - print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0)) - - optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) - if args.stepsize > 0: - scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) - - if args.resume: - print("Loading checkpoint from '{}'".format(args.resume)) - checkpoint = torch.load(args.resume) - model.load_state_dict(checkpoint) - else: - start_epoch = 0 - - if use_gpu: - model = nn.DataParallel(model).cuda() - - if args.evaluate: - print("Evaluate only") - evaluate(model, dataset, test_keys, use_gpu) - return - - print("==> Start training") - start_time = time.time() - model.train() - baselines = {key: 0. for key in train_keys} # baseline rewards for videos - reward_writers = {key: [] for key in train_keys} # record reward changes for each video - - for epoch in range(start_epoch, args.max_epoch): - idxs = np.arange(len(train_keys)) - np.random.shuffle(idxs) # shuffle indices - - for idx in idxs: - key = train_keys[idx] - seq = dataset[key]['features'][...] # sequence of features, (seq_len, dim) - seq = torch.from_numpy(seq).unsqueeze(0) # input shape (1, seq_len, dim) - if use_gpu: seq = seq.cuda() - probs = model(seq) # output shape (1, seq_len, 1) - - cost = args.beta * (probs.mean() - 0.5)**2 # minimize summary length penalty term [Eq.11] - m = Bernoulli(probs) - epis_rewards = [] - for _ in range(args.num_episode): - actions = m.sample() - log_probs = m.log_prob(actions) - reward = compute_reward(seq, actions, use_gpu=use_gpu) - expected_reward = log_probs.mean() * (reward - baselines[key]) - cost -= expected_reward # minimize negative expected reward - epis_rewards.append(reward.item()) - - optimizer.zero_grad() - cost.backward() - torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0) - optimizer.step() - baselines[key] = 0.9 * baselines[key] + 0.1 * np.mean(epis_rewards) # update baseline reward via moving average - reward_writers[key].append(np.mean(epis_rewards)) - - epoch_reward = np.mean([reward_writers[key][epoch] for key in train_keys]) - print("epoch {}/{}\t reward {}\t".format(epoch+1, args.max_epoch, epoch_reward)) - - write_json(reward_writers, osp.join(args.save_dir, 'rewards.json')) - evaluate(model, dataset, test_keys, use_gpu) - - elapsed = round(time.time() - start_time) - elapsed = str(datetime.timedelta(seconds=elapsed)) - print("Finished. Total elapsed time (h:m:s): {}".format(elapsed)) - - model_state_dict = model.module.state_dict() if use_gpu else model.state_dict() - model_save_path = osp.join(args.save_dir, 'model_epoch' + str(args.max_epoch) + '.pth.tar') - save_checkpoint(model_state_dict, model_save_path) - print("Model saved to {}".format(model_save_path)) - - dataset.close() - -def evaluate(model, dataset, test_keys, use_gpu): - print("==> Test") - with torch.no_grad(): - model.eval() - fms = [] - eval_metric = 'avg' if args.metric == 'tvsum' else 'max' - - if args.verbose: table = [["No.", "Video", "F-score"]] - - if args.save_results: - h5_res = h5py.File(osp.join(args.save_dir, 'result.h5'), 'w') - - for key_idx, key in enumerate(test_keys): - seq = dataset[key]['features'][...] - seq = torch.from_numpy(seq).unsqueeze(0) - if use_gpu: seq = seq.cuda() - probs = model(seq) - probs = probs.data.cpu().squeeze().numpy() - - cps = dataset[key]['change_points'][...] - num_frames = dataset[key]['n_frames'][()] - nfps = dataset[key]['n_frame_per_seg'][...].tolist() - positions = dataset[key]['picks'][...] - user_summary = dataset[key]['user_summary'][...] - - machine_summary = vsum_tools.generate_summary(probs, cps, num_frames, nfps, positions) - fm, _, _ = vsum_tools.evaluate_summary(machine_summary, user_summary, eval_metric) - fms.append(fm) - - if args.verbose: - table.append([key_idx+1, key, "{:.1%}".format(fm)]) - - if args.save_results: - h5_res.create_dataset(key + '/score', data=probs) - h5_res.create_dataset(key + '/machine_summary', data=machine_summary) - h5_res.create_dataset(key + '/gtscore', data=dataset[key]['gtscore'][...]) - h5_res.create_dataset(key + '/fm', data=fm) - - if args.verbose: - print(tabulate(table)) - - if args.save_results: h5_res.close() - - mean_fm = np.mean(fms) - print("Average F-score {:.1%}".format(mean_fm)) - - return mean_fm - -if __name__ == '__main__': - main() diff --git a/models.py b/models.py deleted file mode 100644 index 532e8be..0000000 --- a/models.py +++ /dev/null @@ -1,21 +0,0 @@ -import torch -import torch.nn as nn -from torch.nn import functional as F - -__all__ = ['DSN'] - -class DSN(nn.Module): - """Deep Summarization Network""" - def __init__(self, in_dim=1024, hid_dim=256, num_layers=1, cell='lstm'): - super(DSN, self).__init__() - assert cell in ['lstm', 'gru'], "cell must be either 'lstm' or 'gru'" - if cell == 'lstm': - self.rnn = nn.LSTM(in_dim, hid_dim, num_layers=num_layers, bidirectional=True, batch_first=True) - else: - self.rnn = nn.GRU(in_dim, hid_dim, num_layers=num_layers, bidirectional=True, batch_first=True) - self.fc = nn.Linear(hid_dim*2, 1) - - def forward(self, x): - h, _ = self.rnn(x) - p = F.sigmoid(self.fc(h)) - return p \ No newline at end of file diff --git a/parse_json.py b/parse_json.py deleted file mode 100644 index 8f4e4b2..0000000 --- a/parse_json.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -import argparse -import re -import os.path as osp -import matplotlib -matplotlib.use('Agg') -from matplotlib import pyplot as plt -from utils import read_json - -""" -Parse json file (.json) to extract rewards for specific videos. - -How to use: -# image will be saved in path: blah_blah_blah -$ python parse_json.py -p blah_blah_blah/rewards.json -i 0 -""" - -parser = argparse.ArgumentParser() -parser.add_argument('-p', '--path', type=str, required=True, help="path to rewards.json; output saved to the same dir") -parser.add_argument('-i', '--idx', type=int, default=0, help="choose which video to visualize, index starts from 0 (default: 0)") -args = parser.parse_args() - -reward_writers = read_json(args.path) -keys = reward_writers.keys() -assert args.idx < len(keys) -key = keys[args.idx] -rewards = reward_writers[key] - -plt.plot(rewards) -plt.xlabel('epoch') -plt.ylabel('reward') -plt.title("{}".format(key)) -plt.savefig(osp.join(osp.dirname(args.path), 'epoch_reward_' + str(args.idx) + '.png')) -plt.close() \ No newline at end of file diff --git a/parse_json.sh b/parse_json.sh deleted file mode 100644 index c8002a4..0000000 --- a/parse_json.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!bin/sh - -# Note: index starts from 0, so if you wanna visualize all videos, -# say there are 10 videos, NUM should be 9 -NUM=39; - -for i in $(seq 0 $NUM); -do - echo "do: parse_json.py -p path_to/rewards.json -i $i" - python parse_json.py -p log/rewards.json -i $i -done \ No newline at end of file diff --git a/parse_log.py b/parse_log.py deleted file mode 100644 index 9c6baee..0000000 --- a/parse_log.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -import argparse -import re -import os.path as osp -import matplotlib -matplotlib.use('Agg') -from matplotlib import pyplot as plt - -""" -Parse log file (.txt) to extract rewards. - -How to use: -# image will be saved in path: blah_blah_blah -$ python parse_log.py -p blah_blah_blah/log_train.txt -""" - -parser = argparse.ArgumentParser() -parser.add_argument('-p', '--path', type=str, required=True, help="path to log.txt; output saved to the same dir") -args = parser.parse_args() - -if not osp.exists(args.path): - raise ValueError("Given path is invalid: {}".format(args.path)) - -if osp.splitext(osp.basename(args.path))[-1] != '.txt': - raise ValueError("File found does not end with .txt: {}".format(args.path)) - -regex_reward = re.compile('reward ([\.\deE+-]+)') -rewards = [] - -with open(args.path, 'r') as f: - lines = f.readlines() - for line in lines: - reward_match = regex_reward.search(line) - if reward_match: - reward = float(reward_match.group(1)) - rewards.append(reward) - -plt.plot(rewards) -plt.xlabel('epoch') -plt.ylabel('reward') -plt.title("Overall rewards") -plt.savefig(osp.join(osp.dirname(args.path), 'overall_reward.png')) -plt.close() diff --git a/rewards.py b/rewards.py deleted file mode 100644 index 18118ac..0000000 --- a/rewards.py +++ /dev/null @@ -1,56 +0,0 @@ -import torch -import sys - -def compute_reward(seq, actions, ignore_far_sim=True, temp_dist_thre=20, use_gpu=False): - """ - Compute diversity reward and representativeness reward - - Args: - seq: sequence of features, shape (1, seq_len, dim) - actions: binary action sequence, shape (1, seq_len, 1) - ignore_far_sim (bool): whether to ignore temporally distant similarity (default: True) - temp_dist_thre (int): threshold for ignoring temporally distant similarity (default: 20) - use_gpu (bool): whether to use GPU - """ - _seq = seq.detach() - _actions = actions.detach() - pick_idxs = _actions.squeeze().nonzero().squeeze() - num_picks = len(pick_idxs) if pick_idxs.ndimension() > 0 else 1 - - if num_picks == 0: - # give zero reward is no frames are selected - reward = torch.tensor(0.) - if use_gpu: reward = reward.cuda() - return reward - - _seq = _seq.squeeze() - n = _seq.size(0) - - # compute diversity reward - if num_picks == 1: - reward_div = torch.tensor(0.) - if use_gpu: reward_div = reward_div.cuda() - else: - normed_seq = _seq / _seq.norm(p=2, dim=1, keepdim=True) - dissim_mat = 1. - torch.matmul(normed_seq, normed_seq.t()) # dissimilarity matrix [Eq.4] - dissim_submat = dissim_mat[pick_idxs,:][:,pick_idxs] - if ignore_far_sim: - # ignore temporally distant similarity - pick_mat = pick_idxs.expand(num_picks, num_picks) - temp_dist_mat = torch.abs(pick_mat - pick_mat.t()) - dissim_submat[temp_dist_mat > temp_dist_thre] = 1. - reward_div = dissim_submat.sum() / (num_picks * (num_picks - 1.)) # diversity reward [Eq.3] - - # compute representativeness reward - dist_mat = torch.pow(_seq, 2).sum(dim=1, keepdim=True).expand(n, n) - dist_mat = dist_mat + dist_mat.t() - dist_mat.addmm_(1, -2, _seq, _seq.t()) - dist_mat = dist_mat[:,pick_idxs] - dist_mat = dist_mat.min(1, keepdim=True)[0] - #reward_rep = torch.exp(torch.FloatTensor([-dist_mat.mean()]))[0] # representativeness reward [Eq.5] - reward_rep = torch.exp(-dist_mat.mean()) - - # combine the two rewards - reward = (reward_div + reward_rep) * 0.5 - - return reward diff --git a/summary2video.py b/summary2video.py deleted file mode 100644 index 21ba48f..0000000 --- a/summary2video.py +++ /dev/null @@ -1,44 +0,0 @@ -import h5py -import cv2 -import os -import os.path as osp -import numpy as np -import argparse - -parser = argparse.ArgumentParser() -parser.add_argument('-p', '--path', type=str, required=True, help="path to h5 result file") -parser.add_argument('-d', '--frm-dir', type=str, required=True, help="path to frame directory") -parser.add_argument('-i', '--idx', type=int, default=0, help="which key to choose") -parser.add_argument('--fps', type=int, default=30, help="frames per second") -parser.add_argument('--width', type=int, default=640, help="frame width") -parser.add_argument('--height', type=int, default=480, help="frame height") -parser.add_argument('--save-dir', type=str, default='log', help="directory to save") -parser.add_argument('--save-name', type=str, default='summary.mp4', help="video name to save (ends with .mp4)") -args = parser.parse_args() - -def frm2video(frm_dir, summary, vid_writer): - for idx, val in enumerate(summary): - if val == 1: - # here frame name starts with '000001.jpg' - # change according to your need - frm_name = str(idx+1).zfill(6) + '.jpg' - frm_path = osp.join(frm_dir, frm_name) - frm = cv2.imread(frm_path) - frm = cv2.resize(frm, (args.width, args.height)) - vid_writer.write(frm) - -if __name__ == '__main__': - if not osp.exists(args.save_dir): - os.mkdir(args.save_dir) - vid_writer = cv2.VideoWriter( - osp.join(args.save_dir, args.save_name), - cv2.VideoWriter_fourcc(*'MP4V'), - args.fps, - (args.width, args.height), - ) - h5_res = h5py.File(args.path, 'r') - key = h5_res.keys()[args.idx] - summary = h5_res[key]['machine_summary'][...] - h5_res.close() - frm2video(args.frm_dir, summary, vid_writer) - vid_writer.release() \ No newline at end of file diff --git a/utils.py b/utils.py deleted file mode 100755 index c4da3c7..0000000 --- a/utils.py +++ /dev/null @@ -1,94 +0,0 @@ -from __future__ import absolute_import -import os -import sys -import errno -import shutil -import json -import os.path as osp - -import torch - -def mkdir_if_missing(directory): - if not osp.exists(directory): - try: - os.makedirs(directory) - except OSError as e: - if e.errno != errno.EEXIST: - raise - -class AverageMeter(object): - """Computes and stores the average and current value. - - Code imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262 - """ - def __init__(self): - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - -def save_checkpoint(state, fpath='checkpoint.pth.tar'): - mkdir_if_missing(osp.dirname(fpath)) - torch.save(state, fpath) - -class Logger(object): - """ - Write console output to external text file. - Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/logging.py. - """ - def __init__(self, fpath=None): - self.console = sys.stdout - self.file = None - if fpath is not None: - mkdir_if_missing(os.path.dirname(fpath)) - self.file = open(fpath, 'w') - - def __del__(self): - self.close() - - def __enter__(self): - pass - - def __exit__(self, *args): - self.close() - - def write(self, msg): - self.console.write(msg) - if self.file is not None: - self.file.write(msg) - - def flush(self): - self.console.flush() - if self.file is not None: - self.file.flush() - os.fsync(self.file.fileno()) - - def close(self): - self.console.close() - if self.file is not None: - self.file.close() - -def read_json(fpath): - with open(fpath, 'r') as f: - obj = json.load(f) - return obj - -def write_json(obj, fpath): - mkdir_if_missing(osp.dirname(fpath)) - with open(fpath, 'w') as f: - json.dump(obj, f, indent=4, separators=(',', ': ')) - - - - - - diff --git a/visualize_results.py b/visualize_results.py deleted file mode 100644 index 1027b97..0000000 --- a/visualize_results.py +++ /dev/null @@ -1,38 +0,0 @@ -import h5py -from matplotlib import pyplot as plt -import argparse -import os -import os.path as osp - -parser = argparse.ArgumentParser() -parser.add_argument('-p', '--path', type=str, required=True, - help="path to h5 file containing summarization results") -args = parser.parse_args() - -h5_res = h5py.File(args.path, 'r') -keys = h5_res.keys() - -for key in keys: - score = h5_res[key]['score'][...] - machine_summary = h5_res[key]['machine_summary'][...] - gtscore = h5_res[key]['gtscore'][...] - fm = h5_res[key]['fm'][()] - - # plot score vs gtscore - fig, axs = plt.subplots(2) - n = len(gtscore) - axs[0].plot(range(n), gtscore, color='red') - axs[0].set_xlim(0, n) - axs[0].set_yticklabels([]) - axs[0].set_xticklabels([]) - axs[1].set_title("video {} F-score {:.1%}".format(key, fm)) - axs[1].plot(range(n), score, color='blue') - axs[1].set_xlim(0, n) - axs[1].set_yticklabels([]) - axs[1].set_xticklabels([]) - fig.savefig(osp.join(osp.dirname(args.path), 'score_' + key + '.png'), bbox_inches='tight') - plt.close() - - print "Done video {}. # frames {}.".format(key, len(machine_summary)) - -h5_res.close() \ No newline at end of file diff --git a/vsum_tools.py b/vsum_tools.py deleted file mode 100644 index 797b0bf..0000000 --- a/vsum_tools.py +++ /dev/null @@ -1,113 +0,0 @@ -import numpy as np -from knapsack import knapsack_dp -import math - -def generate_summary(ypred, cps, n_frames, nfps, positions, proportion=0.15, method='knapsack'): - """Generate keyshot-based video summary i.e. a binary vector. - Args: - --------------------------------------------- - - ypred: predicted importance scores. - - cps: change points, 2D matrix, each row contains a segment. - - n_frames: original number of frames. - - nfps: number of frames per segment. - - positions: positions of subsampled frames in the original video. - - proportion: length of video summary (compared to original video length). - - method: defines how shots are selected, ['knapsack', 'rank']. - """ - n_segs = cps.shape[0] - frame_scores = np.zeros((n_frames), dtype=np.float32) - if positions.dtype != int: - positions = positions.astype(np.int32) - if positions[-1] != n_frames: - positions = np.concatenate([positions, [n_frames]]) - for i in xrange(len(positions) - 1): - pos_left, pos_right = positions[i], positions[i+1] - if i == len(ypred): - frame_scores[pos_left:pos_right] = 0 - else: - frame_scores[pos_left:pos_right] = ypred[i] - - seg_score = [] - for seg_idx in xrange(n_segs): - start, end = int(cps[seg_idx,0]), int(cps[seg_idx,1]+1) - scores = frame_scores[start:end] - seg_score.append(float(scores.mean())) - - limits = int(math.floor(n_frames * proportion)) - - if method == 'knapsack': - picks = knapsack_dp(seg_score, nfps, n_segs, limits) - elif method == 'rank': - order = np.argsort(seg_score)[::-1].tolist() - picks = [] - total_len = 0 - for i in order: - if total_len + nfps[i] < limits: - picks.append(i) - total_len += nfps[i] - else: - raise KeyError("Unknown method {}".format(method)) - - summary = np.zeros((1), dtype=np.float32) # this element should be deleted - for seg_idx in xrange(n_segs): - nf = nfps[seg_idx] - if seg_idx in picks: - tmp = np.ones((nf), dtype=np.float32) - else: - tmp = np.zeros((nf), dtype=np.float32) - summary = np.concatenate((summary, tmp)) - - summary = np.delete(summary, 0) # delete the first element - return summary - -def evaluate_summary(machine_summary, user_summary, eval_metric='avg'): - """Compare machine summary with user summary (keyshot-based). - Args: - -------------------------------- - machine_summary and user_summary should be binary vectors of ndarray type. - eval_metric = {'avg', 'max'} - 'avg' averages results of comparing multiple human summaries. - 'max' takes the maximum (best) out of multiple comparisons. - """ - machine_summary = machine_summary.astype(np.float32) - user_summary = user_summary.astype(np.float32) - n_users,n_frames = user_summary.shape - - # binarization - machine_summary[machine_summary > 0] = 1 - user_summary[user_summary > 0] = 1 - - if len(machine_summary) > n_frames: - machine_summary = machine_summary[:n_frames] - elif len(machine_summary) < n_frames: - zero_padding = np.zeros((n_frames - len(machine_summary))) - machine_summary = np.concatenate([machine_summary, zero_padding]) - - f_scores = [] - prec_arr = [] - rec_arr = [] - - for user_idx in xrange(n_users): - gt_summary = user_summary[user_idx,:] - overlap_duration = (machine_summary * gt_summary).sum() - precision = overlap_duration / (machine_summary.sum() + 1e-8) - recall = overlap_duration / (gt_summary.sum() + 1e-8) - if precision == 0 and recall == 0: - f_score = 0. - else: - f_score = (2 * precision * recall) / (precision + recall) - f_scores.append(f_score) - prec_arr.append(precision) - rec_arr.append(recall) - - if eval_metric == 'avg': - final_f_score = np.mean(f_scores) - final_prec = np.mean(prec_arr) - final_rec = np.mean(rec_arr) - elif eval_metric == 'max': - final_f_score = np.max(f_scores) - max_idx = np.argmax(f_scores) - final_prec = prec_arr[max_idx] - final_rec = rec_arr[max_idx] - - return final_f_score, final_prec, final_rec \ No newline at end of file From 067642a4538dee98dc71a3d29d78ecf808cdd3a0 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 18:56:09 +0900 Subject: [PATCH 07/31] Commit: Split Dataset --- create_split.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 create_split.py diff --git a/create_split.py b/create_split.py new file mode 100644 index 0000000..f6e1c63 --- /dev/null +++ b/create_split.py @@ -0,0 +1,66 @@ +# Dataset Split + +from __future__ import print_function + +import os +import argparse +import h5py +import math +import numpy as np + +from utils.file_process import write_json + +parser = argparse.ArgumentParser("Code to create splits in json form") +parser.add_argument("-d", "--dataset", type=str, required=True, help="path to h5 dataset (required)") +parser.add_argument("--save-dir", type=str, default='datasets', help="path to save output jon file (default: 'datasets/'") +parser.add_argument("--save-name", type=str, default="splits", help="name to save as, excluding extension (default: 'splits')") +parser.add_argument("--num-splits", type=int, default=5, help="how many splits to generate (default: 5)") +parser.add_argument("--train-percent", type=float, default=0.8, help="percentage of training data (default: 0.8)") + +args = parser.parse_args() + +def split_random(keys, num_videos, num_train): + """ Random split """ + train_keys, test_keys = [], [] + rnd_idxs = np.random.choice(range(num_videos), size=num_train, replace=False) + + for key_idx, key in enumerate(keys): + if key_idx in rnd_idxs: + train_keys.append(key) + else: + test_keys.append(key) + + assert len(set(train_keys) & set(test_keys)) == 0, "Error: train_keys and test_keys overlap" + + return train_keys, test_keys + +def create(): + print("===========\nArgs:{}\n=========".format(args)) + print("Goal: randomly split data for {} times, {:.1%} for training and the rest for testing".format(args.num_splits, args.train_percent)) + print("Loading dataset from: {}".format(args.dataset)) + + dataset = h5py.File(args.dataset, 'r') + keys = dataset.keys() + num_videos = len(keys) + num_train = int(math.ceil(num_videos * args.train_percent)) + num_test = num_videos - num_train + print("Split breakdown: # total videos {}. # train videos. # test videos {}".format(num_videos, num_train, num_test)) + splits = [] + + for split_idx in range(args.num_splits): + train_keys, test_keys = split_random(keys, num_videos, num_train) + + splits.append({ + 'train_keys': train_keys, + 'test_keys': test_keys, + }) + + save_path = os.path.join(args.save_dir, args.save_name + '.json') + write_json(splits, save_path) + print("Splits save to {}".format(save_path)) + + dataset.close() + +if __name__ == '__main__': + create() + From c4bb7332ab7645f47e1acdcbeb6ba97ab7a01236 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 18:56:59 +0900 Subject: [PATCH 08/31] Commit: Load Json and Show Reward --- parse_json.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 parse_json.py diff --git a/parse_json.py b/parse_json.py new file mode 100644 index 0000000..68f67c2 --- /dev/null +++ b/parse_json.py @@ -0,0 +1,32 @@ +import os +import argparse +import matplotlib +matplotlib.use("Agg") +from matplotlib import pyplot as plt +from utils.file_process import read_json + +""" + Parse json file (.json) to extract rewards for specific videos. + + How to use: + # image will be saved in path + $ python parse_json.py -p log/summe-split0/rewards.json -i 0 +""" + +parser = argparse.ArgumentParser() +parser.add_argument("-p", "--path", type=str, required=True, help="path to rewards.json; output saved to the same dir") +parser.add_argument("-i", "--idx", type=int, default=0, help="choose which video to visualize, index starts from 0 (default: 0)") +args = parser.parse_args() + +reward_writers = read_json(args.path) +keys = [key for key in reward_writers] +assert args.idx < len(keys) +key = keys[args.idx] +rewards = reward_writers[key] + +plt.plot(rewards) +plt.xlabel('epoch') +plt.ylabel('reward') +plt.title("{}".format(key)) +plt.savefig(os.path.join(os.path.dirname(args.path), 'epoch_reward_' + str(args.idx) + '.png')) +plt.close() \ No newline at end of file From ad78bc55ef70fdc210d07aee9874913ee9d7665d Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 18:57:22 +0900 Subject: [PATCH 09/31] Commit: Load log(.txt) and Show Reward --- parse_log.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 parse_log.py diff --git a/parse_log.py b/parse_log.py new file mode 100644 index 0000000..c04462c --- /dev/null +++ b/parse_log.py @@ -0,0 +1,42 @@ +import os +import argparse +import re +import matplotlib +matplotlib.use("Agg") +from matplotlib import pyplot as plt + +""" + Parse log file (.txt) to extract rewards. + + How to use: + # image will be saved in path + $ python parse_log.py -p log/summe-split0/log_train.txt +""" + +parser = argparse.ArgumentParser() +parser.add_argument("-p", "--path", type=str, required=True, help="path to log.txt; output saved to the same dir") +args = parser.parse_args() + +if not os.path.exists(args.path): + raise ValueError("Given path is invalid: {}".format(args["path"])) + +if os.path.splitext(os.path.basename(args.path))[-1] != '.txt': + raise ValueError("File found dose not end with .txt: {}".format(args.path)) + +regex_reward = re.compile('reward ([\.\deE+-]+)') +rewards = [] + +with open(args.path, 'r') as f: + lines = f.readlines() + for line in lines: + reward_match = regex_reward.search(line) + if reward_match: + reward = float(reward_match.group(1)) + rewards.append(reward) + +plt.plot(rewards) +plt.xlabel("epoch") +plt.ylabel("reward") +plt.title("Overall rewards") +plt.savefig(os.path.join(os.path.dirname(args.path), 'overall_reward.png')) +plt.close() \ No newline at end of file From e635e2f3d9b269fd385938c5d56a2ac321119aed Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:00:03 +0900 Subject: [PATCH 10/31] Commit: Summary to Video --- summary2video.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 summary2video.py diff --git a/summary2video.py b/summary2video.py new file mode 100644 index 0000000..da091a3 --- /dev/null +++ b/summary2video.py @@ -0,0 +1,42 @@ +import h5py +import cv2 +import os +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('-p', '--path', type=str, required=True, help="path to h5 result file") +parser.add_argument('-d', '--frm-dir', type=str, required=True, help="path to frame directory") +parser.add_argument('-i', '--idx', type=int, default=0, help="which key to choose") +parser.add_argument('--fps', type=int, default=30, help="frames per second") +parser.add_argument('--width', type=int, default=640, help="frame width") +parser.add_argument('--height', type=int, default=480, help="frame height") +parser.add_argument('--save-dir', type=str, default='log', help="directory to save") +parser.add_argument('--save-name', type=str, default='summary.mp4', help="video name to save (ends with .mp4)") +args = parser.parse_args() + +def frm2video(frm_dir, summary, vid_writer): + for idx, val in enumerate(summary): + if val == 1: + # here frame name starts with '000001.jpg' + # change according to your need + frm_name = str(idx+1).zfill(6) + '.jpg' + frm_path = os.path.join(frm_dir, frm_name) + frm = cv2.imread(frm_path) + frm = cv2.resize(frm, (args.width, args.height)) + vid_writer.write(frm) + +if __name__ == '__main__': + if not os.path.exists(args.save_dir): + os.mkdir(args.save_dir) + vid_writer = cv2.VideoWriter( + os.path.join(args.save_dir, args.save_name), + cv2.VideoWriter_fourcc(*'MP4V'), + args.fps, + (args.width, args.height), + ) + h5_res = h5py.File(args.path, 'r') + key = h5_res.keys()[args.idx] + summary = h5_res[key]['machine_summary'][...] + h5_res.close() + frm2video(args.frm_dir, summary, vid_writer) + vid_writer.release() \ No newline at end of file From 54a564ec62d1a2bef7b70d30b684e72ad6e53d21 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:15:05 +0900 Subject: [PATCH 11/31] Commit: Commit init --- utils/__init__.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 utils/__init__.py diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..457e1b5 --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,3 @@ +from file_process import * +from knapsack import * +from vsum_tool import * \ No newline at end of file From 19b7f51ff1eb105d20bb143a4a6e3d0eb6dc0117 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:30:28 +0900 Subject: [PATCH 12/31] Commit: Commit init --- config/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/__init__.py diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..635cf56 --- /dev/null +++ b/config/__init__.py @@ -0,0 +1 @@ +from config import * \ No newline at end of file From 9a53953d22bb79265b34160b01d95e4de96093b6 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:30:36 +0900 Subject: [PATCH 13/31] Commit: Commit init --- networks/__init__.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 networks/__init__.py diff --git a/networks/__init__.py b/networks/__init__.py new file mode 100644 index 0000000..2eea161 --- /dev/null +++ b/networks/__init__.py @@ -0,0 +1,3 @@ +from CNN import * +from DSN import * +from RL import * \ No newline at end of file From 504c3b3cc2d03d251ae2ee6c5c31ce8f22fde1b0 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:31:03 +0900 Subject: [PATCH 14/31] Update: Update config --- config/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/config.py b/config/config.py index fdf5d36..0419535 100644 --- a/config/config.py +++ b/config/config.py @@ -1,6 +1,6 @@ # ============ TRAIN CONFIG ============== # Dataset options -DATASET = 'summe_dataset.h5' # path to h5 dataset (required) +DATASET = 'datasets/eccv16_dataset_summe_google_pool5.h5' # path to h5 dataset (required) SPLIT = 'datasets/summe_splits.json' # path to split file (required) SPLIT_ID = 0 # split index (default: 0) METRIC = 'summe' # evaluation metric ['tvsum', 'summe']) @@ -25,7 +25,7 @@ GPU = '0' # which gpu devices to use (default: 0) USE_CPU = False # use cpu device EVALUATE = False # whether to do evaluation only -TEST = True # whether to do evaluation only +TEST = False # whether to do evaluation only RESUME = False # path to resume file VERBOSE = True # whether to show detailed test results SAVE_DIR = 'log/summe-split0' # path to save output (default: log/) From 331bdb76f1efa7eef4fb01364b2ca2de7e1009f4 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:39:02 +0900 Subject: [PATCH 15/31] Update: Update README.md * Add TODO - Edit code for test --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index ca93882..b66404e 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,7 @@ We preprocess data by extracting image features for videos and save them to `h5` 1. Edit README 2. Reconstruct Repository 3. Add KTS(Kernel Temporal Segmentation) +4. Edit code for test ## Citation ``` From e320b0256632159317706c68b193de6500e836f5 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:39:24 +0900 Subject: [PATCH 16/31] Commit: Commit vsum tool --- utils/vsum_tool.py | 130 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 utils/vsum_tool.py diff --git a/utils/vsum_tool.py b/utils/vsum_tool.py new file mode 100644 index 0000000..981bb5f --- /dev/null +++ b/utils/vsum_tool.py @@ -0,0 +1,130 @@ +import numpy as np +from utils.knapsack import knapsack_dp +import math + +def generate_summary(ypred, cps, n_frames, nfps, positions, proportion=0.15, method='knapsack'): + """ + Generate keyshot-based video summary. i.e. a binary vector + + Args: + ypred: predicted importance scores. + cps: change points, 2D matrix, each row contains a segment. + n_frames: original number of frames. + nfps: number of frames per segment. + positions: positions of subsampled frames in the original video. + proportion: length of video summary (compared to original video length). + method: defines how shots are selected, ['knapsack', 'rank']. + + """ + + n_segs = cps.shape[0] + + # Frame Score + frame_scores = np.zeros((n_frames), dtype=np.float32) + if positions.dtype != int: + positions = positions.astype(np.int32) + + if positions[-1] != n_frames: + positions = np.concatenate([positions, [n_frames]]) + + for idx in range(len(positions) - 1): + pos_cur, pos_next = positions[idx], positions[idx+1] + + if idx == len(ypred): + frame_scores[pos_cur:pos_next] = 0 + else: + frame_scores[pos_cur:pos_next] = ypred[idx] + + # Segment Score + seg_score = [] + for seg_idx in range(n_segs): + pos_start, pos_end = int(cps[seg_idx, 0]), int(cps[seg_idx, 1]+1) + scores = frame_scores[pos_start: pos_end] + seg_score.append(float(scores.mean())) + + limits = int(math.floor(n_frames * proportion)) + + if method == 'knapsack': + picks = knapsack_dp(seg_score, nfps, n_segs, limits) + elif method == 'rank': + order = np.argsort(seg_score)[::-1].tolist() + picks = [] + total_len = 0 + + for idx in order: + if total_len + nfps[idx] < limits: + picks.append(idx) + total_len += nfps[idx] + + else: + raise KeyError("Unknown method {}".format(method)) + + summary = np.zeros((1), dtype=np.float32) # this element should be deleted + for seg_idx in range(n_segs): + nf = nfps[seg_idx] + if seg_idx in picks: + tmp = np.ones((nf), dtype=np.float32) + else: + tmp = np.zeros((nf), dtype=np.float32) + + summary = np.concatenate((summary, tmp)) + + summary = np.delete(summary, 0) # delete the first element + return summary + +def evaluate_summary(machine_summary, user_summary, eval_metric='avg'): + """ + Compare machine summary with user summary (Keyshot-based). + + Args: + machine_summary: summary by machine + user_summary: summary by user(annotation) + eval_metric: {'avg', 'max'} + 'avg' : average results of comparing multiple human summaries. + 'max' : takes the maximum(best) out of multiple comparisons. + """ + + machine_summary = machine_summary.astype(np.float32) + user_summary = user_summary.astype(np.float32) + n_users, n_frames = user_summary.shape + + # binarization + machine_summary[machine_summary > 0] = 1 + user_summary[user_summary > 0] = 1 + + if len(machine_summary) > n_frames: + machine_summary = machine_summary[:n_frames] + elif len(machine_summary) < n_frames: + zero_padding = np.zeros((n_frames - len(machine_summary))) + machine_summary = np.concatenate([machine_summary, zero_padding]) + + f_scores = [] + prec_arr = [] + rec_arr = [] + + for user_idx in range(n_users): + gt_summary = user_summary[user_idx, :] + overlap_duration = (machine_summary * gt_summary).sum() + precision = overlap_duration / (machine_summary.sum() + 1e-8) + recall = overlap_duration / (gt_summary.sum() + 1e-8) + if precision == 0 and recall == 0: + f_score = 0. + else: + f_score = (2 * precision * recall) / (precision + recall) + + f_scores.append(f_score) + prec_arr.append(precision) + rec_arr.append(recall) + + if eval_metric == 'avg': + final_f_score = np.mean(f_scores) + final_prec = np.mean(prec_arr) + final_rec = np.mean(rec_arr) + + elif eval_metric == 'max': + final_f_score = np.max(f_scores) + max_idx = np.argmax(f_scores) + final_prec = prec_arr[max_idx] + final_rec = rec_arr[max_idx] + + return final_f_score, final_prec, final_rec \ No newline at end of file From 82d8c419258518fdc7641383623e8ae7f11a2fe8 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:39:38 +0900 Subject: [PATCH 17/31] Commit: Commit knapsack --- utils/knapsack.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 utils/knapsack.py diff --git a/utils/knapsack.py b/utils/knapsack.py new file mode 100644 index 0000000..a856ae9 --- /dev/null +++ b/utils/knapsack.py @@ -0,0 +1,77 @@ +import numpy as np + +''' +------------------------------------------------ +Use dynamic programming (DP) to solve 0/1 knapsack problem +Time complexity: O(nW), where n is number of items and W is capacity + +Author: Kaiyang Zhou +Website: https://kaiyangzhou.github.io/ +------------------------------------------------ +knapsack_dp(values,weights,n_items,capacity,return_all=False) + +Input arguments: + 1. values: a list of numbers in either int or float, specifying the values of items + 2. weights: a list of int numbers specifying weights of items + 3. n_items: an int number indicating number of items + 4. capacity: an int number indicating the knapsack capacity + 5. return_all: whether return all info, defaulty is False (optional) + +Return: + 1. picks: a list of numbers storing the positions of selected items + 2. max_val: maximum value (optional) +------------------------------------------------ +''' +def knapsack_dp(values,weights,n_items,capacity,return_all=False): + check_inputs(values,weights,n_items,capacity) + + table = np.zeros((n_items+1,capacity+1),dtype=np.float32) + keep = np.zeros((n_items+1,capacity+1),dtype=np.float32) + + for i in range(1,n_items+1): + for w in range(0,capacity+1): + wi = weights[i-1] # weight of current item + vi = values[i-1] # value of current item + if (wi <= w) and (vi + table[i-1,w-wi] > table[i-1,w]): + table[i,w] = vi + table[i-1,w-wi] + keep[i,w] = 1 + else: + table[i,w] = table[i-1,w] + + picks = [] + K = capacity + + for i in range(n_items,0,-1): + if keep[i,K] == 1: + picks.append(i) + K -= weights[i-1] + + picks.sort() + picks = [x-1 for x in picks] # change to 0-index + + if return_all: + max_val = table[n_items,capacity] + return picks,max_val + return picks + +def check_inputs(values, weights, n_items, capacity): + # check variable type + assert(isinstance(values,list)) + assert(isinstance(weights,list)) + assert(isinstance(n_items,int)) + assert(isinstance(capacity,int)) + # check value type + assert(all(isinstance(val,int) or isinstance(val,float) for val in values)) + assert(all(isinstance(val,int) for val in weights)) + # check validity of value + assert(all(val >= 0 for val in weights)) + assert(n_items > 0) + assert(capacity > 0) + +if __name__ == '__main__': + values = [2,3,4] + weights = [1,2,3] + n_items = 3 + capacity = 3 + picks = knapsack_dp(values,weights,n_items,capacity) + print (picks) From 921cd758084d70ae98ee19199df227a689ff2530 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:39:50 +0900 Subject: [PATCH 18/31] Commit: Commit file process --- utils/file_process.py | 85 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 utils/file_process.py diff --git a/utils/file_process.py b/utils/file_process.py new file mode 100644 index 0000000..ccd2d73 --- /dev/null +++ b/utils/file_process.py @@ -0,0 +1,85 @@ +import sys, os +import json +import torch + +def write_json(splits, save_path): + if not os.path.exists(os.path.dirname(save_path)): + os.mkdir(os.path.dirname(save_path)) + + with open(save_path, 'w') as f: + json.dump(splits, f, indent=4, separators=(', ', ': ')) + +def read_json(fpath): + with open(fpath, 'r') as f: + obj = json.load(f) + return obj + +class AverageMeter(object): + """Computes and stores the average and current value. + + Code imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262 + """ + + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def save_checkpoint(state, fpath='checkpoint.pth.tar'): + if not os.path.exists(os.path.dirname(fpath)): + os.mkdir(os.path.dirname(fpath)) + + torch.save(state, fpath) + + +class Logger(object): + """ + Write console output to external text file. + Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/logging.py. + """ + + def __init__(self, fpath=None): + self.console = sys.stdout + self.file = None + if fpath is not None: + if not os.path.exists(os.path.dirname(fpath)): + os.mkdir(os.path.dirname(fpath)) + + self.file = open(fpath, 'w') + + def __del__(self): + self.close() + + def __enter__(self): + pass + + def __exit__(self, *args): + self.close() + + def write(self, msg): + self.console.write(msg) + if self.file is not None: + self.file.write(msg) + + def flush(self): + self.console.flush() + if self.file is not None: + self.file.flush() + os.fsync(self.file.fileno()) + + def close(self): + self.console.close() + if self.file is not None: + self.file.close() + From 746574a9092e2b8d544aea0eb6508e6fe2d7f1f7 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:41:31 +0900 Subject: [PATCH 19/31] Commit: Commit main --- video_summarization.py | 227 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 video_summarization.py diff --git a/video_summarization.py b/video_summarization.py new file mode 100644 index 0000000..6b4a1d2 --- /dev/null +++ b/video_summarization.py @@ -0,0 +1,227 @@ +from __future__ import print_function +import os +import sys +import h5py +import time +import datetime +import numpy as np +from tabulate import tabulate + +import torch +import torch.nn as nn +import torch.backends.cudnn as cudnn +from torch.optim import lr_scheduler +from torch.distributions import Bernoulli + +from config import config +from utils.file_process import Logger, read_json, write_json, save_checkpoint +from networks.DSN import DSN +from networks.RL import compute_reward +from utils import vsum_tool + +torch.manual_seed(config.SEED) +os.environ["CUDA_VISIBLE_DEVCIES"] = config.GPU +use_gpu = torch.cuda.is_available() +if config.USE_CPU: use_gpu = False + +def main(): + if not config.EVALUATE: + sys.stdout = Logger(os.path.join(config.SAVE_DIR, 'log_train.txt')) + else: + sys.stdout = Logger(os.path.join(config.SAVE_DIR, 'log_test.txt')) + + + if use_gpu: + print("Currently using GPU {}".format(config.GPU)) + cudnn.benchmark = True + torch.cuda.manual_seed(config.SEED) + else: + print("Currently using CPU") + + print("Initialize dataset {}".format(config.DATASET)) + dataset = h5py.File(config.DATASET, 'r') + num_videos = len(dataset.keys()) + splits = read_json(config.SPLIT) + assert config.SPLIT_ID < len(splits), "split_id (got {}) exceeds {}".format(config.SPLIT_ID, len(splits )) + split = splits[config.SPLIT_ID] + train_keys = split["train_keys"] + test_keys = split["test_keys"] + print("# total videos {}. # train videos {}. # test videos {}.".format(num_videos, len(train_keys), len(test_keys))) + + print("Initialize model") + model = DSN(in_dim=config.INPUT_DIM, hid_dim=config.HIDDEN_DIM, num_layers = config.NUM_LAYERS, cell=config.RNN_CELL) + print("Model Size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0)) + + optimizer = torch.optim.Adam(model.parameters(), lr=config.LR, weight_decay=config.WEIGHT_DECAY) + if config.STEP_SIZE > 0: + scheduler = lr_scheduler.StepLR(optimizer, step_size= config.STEP_SIZE, gamma=config.GAMMA) + + if config.RESUME: + print("Loading checkpoint from '{}'".format(config.RESUME)) + checkpoint = torch.load(config.RESUME) + model.load_state_dict(checkpoint) + else: + start_epoch = 0 + + if use_gpu: + model = nn.DataParallel(model).cuda() + + if config.TEST: + print("Test only") + test(model, dataset, ['video_0'], use_gpu) + return + + + # Evaluate + if config.EVALUATE: + print("Evaluate only") + evaluate(model, dataset, test_keys, use_gpu) + return + + # Train + print("===> Start training") + start_time = time.time() + model.train() + baselines = {key: 0. for key in train_keys} # baseline rewards for videos + reward_writers = {key: [] for key in train_keys} # record reward changes for each video + + for epoch in range(start_epoch, config.MAX_EPOCH): + indices = np.arange(len(train_keys)) + np.random.shuffle(indices) + + # Input each Video to Model + for idx in indices: + key = train_keys[idx] + seq = dataset[key]['features'][...] # sequence of features, (seq_len, dim) + seq = torch.from_numpy(seq).unsqueeze(0) # input shape (1, seq_len, dim) + + if use_gpu: seq = seq.cuda() + probs = model(seq) # output shape (1, seq_len, 1) + + cost = config.BETA * (probs.mean() - 0.5) ** 2 # minimize summary length penalty term [Eq.11] + m = Bernoulli(probs) + + epis_rewards = [] + for _ in range(config.NUM_EPISODE): + actions = m.sample() + log_probs = m.log_prob(actions) + reward = compute_reward(seq, actions, use_gpu=use_gpu) + + expected_reward = log_probs.mean() * (reward - baselines[key]) + cost -= expected_reward # minimize negative expected reward + epis_rewards.append(reward.item()) + + optimizer.zero_grad() + cost.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0) + optimizer.step() + + baselines[key] = 0.9 * baselines[key] + 0.1 * np.mean(epis_rewards) # update baseline reward via moving average + reward_writers[key].append(np.mean(epis_rewards)) + + epoch_reward = np.mean([reward_writers[key][epoch] for key in train_keys]) + print("epoch {}/{}\t reward {}\t".format(epoch+1, config.MAX_EPOCH, epoch_reward)) + + write_json(reward_writers, os.path.join(config.SAVE_DIR, 'rewards.json')) + evaluate(model, dataset, test_keys, use_gpu) + + elapsed = round(time.time() - start_time) + elapsed = str(datetime.timedelta(seconds=elapsed)) + print("Finished. Total elapsed time (h:m:s): {}".format(elapsed)) + + model_state_dict = model.module.state_dict() if use_gpu else model.state_dict() + model_save_path = os.path.join(config.SAVE_DIR, 'model_epoch' + str(config.MAX_EPOCH) + '.pth.tar') + save_checkpoint(model_state_dict, model_save_path) + print("Model saved to {}".format(model_save_path)) + + dataset.close() + + +def evaluate(model, dataset, test_keys, use_gpu): + print("===> Evaluation") + with torch.no_grad(): + model.eval() + fms = [] + eval_metric = 'avg' if config.METRIC == 'tvsum' else 'max' + + if config.VERBOSE: table = [["No.", "Video", "F-Score"]] + + if config.SAVE_RESULTS: + h5_res = h5py.File(os.path.join(config.SAVE_DIR, 'result.h5'), 'w') + + for key_idx, key in enumerate(test_keys): + seq = dataset[key]['features'][...] + seq = torch.from_numpy(seq).unsqueeze(0) + + if use_gpu: seq = seq.cuda() + probs = model(seq) + probs = probs.data.cpu().squeeze().numpy() + + cps = dataset[key]['change_points'][...] + num_frames = dataset[key]['n_frames'][()] + nfps = dataset[key]['n_frame_per_seg'][...].tolist() + positions = dataset[key]['picks'][...] + user_summary = dataset[key]['user_summary'][...] + + machine_summary = vsum_tool.generate_summary(probs, cps, num_frames, nfps, positions) + fm, _, _ = vsum_tool.evaluate_summary(machine_summary, user_summary, eval_metric) + fms.append(fm) + + + if config.VERBOSE: + table.append([key_idx+1, key, "{:.1%}".format(fm)]) + + if config.SAVE_RESULTS: + h5_res.create_dataset(key + '/score', data=probs) + h5_res.create_dataset(key + '/machine_summary', data=machine_summary) + h5_res.create_dataset(key + '/gtscore', data=dataset[key]['gtscore'][...]) + h5_res.create_dataset(key + '/fm', data=fm) + + if config.VERBOSE: + print(tabulate(table)) + + if config.SAVE_RESULTS: h5_res.close() + + mean_fm = np.mean(fms) + print("Average F-Score {:.1%}".format(mean_fm)) + + return mean_fm + +def test(model, dataset, test_data, use_gpu): + print("===> Test") + with torch.no_grad(): + model.eval() + + if config.SAVE_RESULTS: + h5_res = h5py.File(os.path.join(config.SAVE_DIR, 'result_test.h5'),'w') + + for key_idx, key in enumerate(test_data): + seq = dataset[key]['features'][...] + seq = torch.from_numpy(seq).unsqueeze(0) + + if use_gpu: seq.cuda() + probs = model(seq) + probs = probs.data.cpu().squeeze().numpy() + + cps = dataset[key]['change_points'][...] + num_frames = dataset[key]['n_frames'][...] + nfps = dataset[key]['n_frame_per_seg'][...] + nfps = [65, 2159] + positions = dataset[key]['picks'][...] + + machine_summary = vsum_tool.generate_summary(probs, cps, num_frames, nfps,positions) + + if config.SAVE_RESULTS: + h5_res.create_dataset(key + '/score', data=probs) + h5_res.create_dataset(key + '/machine_summary', data=machine_summary) + + if config.SAVE_RESULTS: + h5_res.close() + +if __name__ == '__main__': + main() + + + + + From 245730173f6d7ac9143ecb8cfd3a807057e1d064 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:41:51 +0900 Subject: [PATCH 20/31] Commit: Commit README.txt --- utils/KTS/README.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 utils/KTS/README.txt diff --git a/utils/KTS/README.txt b/utils/KTS/README.txt new file mode 100644 index 0000000..38a2d39 --- /dev/null +++ b/utils/KTS/README.txt @@ -0,0 +1,10 @@ +Kernel temporal segmentation +============================ + +This archive contains the following files: +cpd_nonlin.py - kernel temporal segmentation with fixed number of segments +cpd_auto.py - kernel temporal segmentation with autocalibration +demo.py - demo on synthetic examples + +Dependencies: +python + libraries: numpy, scipy, matplotlib (for demo) From 43555a240c283957456fa565bca39648a032e721 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:42:05 +0900 Subject: [PATCH 21/31] Commit: Commit demo --- utils/KTS/demo.py | 80 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 utils/KTS/demo.py diff --git a/utils/KTS/demo.py b/utils/KTS/demo.py new file mode 100644 index 0000000..c6f023a --- /dev/null +++ b/utils/KTS/demo.py @@ -0,0 +1,80 @@ +import numpy as np +from cpd_nonlin import cpd_nonlin +from cpd_auto import cpd_auto + +def gen_data(n, m, d=1): + """Generates data with change points + n - number of samples + m - number of change-points + WARN: sigma is proportional to m + Returns: + X - data array (n X d) + cps - change-points array, including 0 and n""" + np.random.seed(1) + # Select changes at some distance from the boundaries + cps = np.random.permutation((n*3/4)-1)[0:m] + 1 + n/8 + cps = np.sort(cps) + cps = [0] + list(cps) + [n] + mus = np.random.rand(m+1, d)*(m/2) # make sigma = m/2 + X = np.zeros((n, d)) + for k in range(m+1): + X[cps[k]:cps[k+1], :] = mus[k, :][np.newaxis, :] + np.random.rand(cps[k+1]-cps[k], d) + return (X, np.array(cps)) + + +if __name__ == "__main__": + from matplotlib import pyplot as plt + plt.ioff() + + print ("Test 1: 1-dimensional signal") + plt.figure("Test 1: 1-dimensional signal") + n = 1000 + m = 10 + (X, cps_gt) = gen_data(n, m) + print ("Ground truth:", cps_gt) + plt.plot(X) + K = np.dot(X, X.T) + cps, scores = cpd_nonlin(K, m, lmin=1, lmax=10000) + print ("Estimated:", cps) + mi = np.min(X) + ma = np.max(X) + for cp in cps: + plt.plot([cp, cp], [mi, ma], 'r') + plt.show() + print ("="*79) + + + print ("Test 2: multidimensional signal") + plt.figure("Test 2: multidimensional signal") + n = 1000 + m = 20 + (X, cps_gt) = gen_data(n, m, d=50) + print ("Ground truth:", cps_gt) + plt.plot(X) + K = np.dot(X, X.T) + cps, scores = cpd_nonlin(K, m, lmin=1, lmax=10000) + print ("Estimated:", cps) + mi = np.min(X) + ma = np.max(X) + for cp in cps: + plt.plot([cp, cp], [mi, ma], 'r') + plt.show() + print ("="*79) + + + print ("Test 3: automatic selection of the number of change-points") + plt.figure("Test 3: automatic selection of the number of change-points") + (X, cps_gt) = gen_data(n, m) + print ("Ground truth: (m=%d)" % m, cps_gt) + plt.plot(X) + K = np.dot(X, X.T) + cps, scores = cpd_auto(K, 2*m, 1) + print ("Estimated: (m=%d)" % len(cps), cps) + mi = np.min(X) + ma = np.max(X) + for cp in cps: + plt.plot([cp, cp], [mi, ma], 'r') + plt.show() + print ("="*79) + + From 052de6b077ce95018490e3654292f6cda2a40487 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:42:59 +0900 Subject: [PATCH 22/31] Commit: Commit change points detection --- utils/KTS/cpd_auto.py | 86 ++++++++++++++++++++++++++++++++ utils/KTS/cpd_nonlin.py | 108 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 utils/KTS/cpd_auto.py create mode 100644 utils/KTS/cpd_nonlin.py diff --git a/utils/KTS/cpd_auto.py b/utils/KTS/cpd_auto.py new file mode 100644 index 0000000..d551936 --- /dev/null +++ b/utils/KTS/cpd_auto.py @@ -0,0 +1,86 @@ +import numpy as np +from cpd_nonlin import cpd_nonlin + +def cpd_auto(K, ncp, vmax, desc_rate=1, **kwargs): + """Main interface + + Detect change points automatically selecting their number + K - kernel between each pair of frames in video + ncp - maximum ncp + vmax - special parameter + Optional arguments: + lmin - minimum segment length + lmax - maximum segment length + desc_rate - rate of descriptor sampling (vmax always corresponds to 1x) + + Note: + - cps are always calculated in subsampled coordinates irrespective to + desc_rate + - lmin and m should be in agreement + --- + Returns: (cps, costs) + cps - best selected change-points + costs - costs for 0,1,2,...,m change-points + + Memory requirement: ~ (3*N*N + N*ncp)*4 bytes ~= 16 * N^2 bytes + That is 1,6 Gb for the N=10000. + """ + m = ncp + (_, scores) = cpd_nonlin(K, m, backtrack=False, **kwargs) + + N = K.shape[0] + N2 = N*desc_rate # length of the video before subsampling + + penalties = np.zeros(m+1) + # Prevent division by zero (in case of 0 changes) + ncp = np.arange(1, m+1) + penalties[1:] = (vmax*ncp/(2.0*N2))*(np.log(float(N2)/ncp)+1) + + costs = scores/float(N) + penalties + m_best = np.argmin(costs) + (cps, scores2) = cpd_nonlin(K, m_best, **kwargs) + + return (cps, costs) + + +# ------------------------------------------------------------------------------ +# Extra functions (currently not used) + +def estimate_vmax(K_stable): + """K_stable - kernel between all frames of a stable segment""" + n = K_stable.shape[0] + vmax = np.trace(centering(K_stable)/n) + return vmax + + +def centering(K): + """Apply kernel centering""" + mean_rows = np.mean(K, 1)[:, np.newaxis] + return K - mean_rows - mean_rows.T + np.mean(mean_rows) + + +def eval_score(K, cps): + """ Evaluate unnormalized empirical score + (sum of kernelized scatters) for the given change-points """ + N = K.shape[0] + cps = [0] + list(cps) + [N] + V1 = 0 + V2 = 0 + for i in range(len(cps)-1): + K_sub = K[cps[i]:cps[i+1], :][:, cps[i]:cps[i+1]] + V1 += np.sum(np.diag(K_sub)) + V2 += np.sum(K_sub) / float(cps[i+1] - cps[i]) + return (V1 - V2) + + +def eval_cost(K, cps, score, vmax): + """ Evaluate cost function for automatic number of change points selection + K - kernel between all frames + cps - selected change-points + score - unnormalized empirical score (sum of kernelized scatters) + vmax - vmax parameter""" + + N = K.shape[0] + penalty = (vmax*len(cps)/(2.0*N))*(np.log(float(N)/len(cps))+1) + return score/float(N) + penalty + diff --git a/utils/KTS/cpd_nonlin.py b/utils/KTS/cpd_nonlin.py new file mode 100644 index 0000000..115eafe --- /dev/null +++ b/utils/KTS/cpd_nonlin.py @@ -0,0 +1,108 @@ +import numpy as np + +import weave + +def calc_scatters(K): + """ + Calculate scatter matrix: + scatters[i,j] = {scatter of the sequence with starting frame i and ending frame j} + """ + n = K.shape[0] + K1 = np.cumsum([0] + list(np.diag(K))) + K2 = np.zeros((n+1, n+1)) + K2[1:, 1:] = np.cumsum(np.cumsum(K, 0), 1); # TODO: use the fact that K - symmetric + + scatters = np.zeros((n, n)); + + code = r""" + for (int i = 0; i < n; i++) { + for (int j = i; j < n; j++) { + scatters(i,j) = K1(j+1)-K1(i) - (K2(j+1,j+1)+K2(i,i)-K2(j+1,i)-K2(i,j+1))/(j-i+1); + } + } + """ + weave.inline(code, ['K1','K2','scatters','n'], global_dict = \ + {'K1':K1, 'K2':K2, 'scatters':scatters, 'n':n}, type_converters=weave.converters.blitz) + + return scatters + +def cpd_nonlin(K, ncp, lmin=1, lmax=100000, backtrack=True, verbose=True, + out_scatters=None): + """ Change point detection with dynamic programming + K - square kernel matrix + ncp - number of change points to detect (ncp >= 0) + lmin - minimal length of a segment + lmax - maximal length of a segment + backtrack - when False - only evaluate objective scores (to save memory) + + Returns: (cps, obj) + cps - detected array of change points: mean is thought to be constant on [ cps[i], cps[i+1] ) + obj_vals - values of the objective function for 0..m changepoints + + """ + m = int(ncp) # prevent numpy.int64 + + (n, n1) = K.shape + assert(n == n1), "Kernel matrix awaited." + + assert(n >= (m + 1)*lmin) + assert(n <= (m + 1)*lmax) + assert(lmax >= lmin >= 1) + + if verbose: + #print "n =", n + print ("Precomputing scatters...") + J = calc_scatters(K) + + if out_scatters != None: + out_scatters[0] = J + + if verbose: + print ("Inferring best change points...") + # I[k, l] - value of the objective for k change-points and l first frames + I = 1e101*np.ones((m+1, n+1)) + I[0, lmin:lmax] = J[0, lmin-1:lmax-1] + + if backtrack: + # p[k, l] --- "previous change" --- best t[k] when t[k+1] equals l + p = np.zeros((m+1, n+1), dtype=int) + else: + p = np.zeros((1,1), dtype=int) + + code = r""" + #define max(x,y) ((x)>(y)?(x):(y)) + for (int k=1; k 1e99] = np.inf + return cps, scores + + From 078f2f06fb0745e2d58d6b5afa0f5032585c813c Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Tue, 4 Jun 2019 19:44:17 +0900 Subject: [PATCH 23/31] Commit: Commit init --- utils/KTS/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 utils/KTS/__init__.py diff --git a/utils/KTS/__init__.py b/utils/KTS/__init__.py new file mode 100644 index 0000000..fd84190 --- /dev/null +++ b/utils/KTS/__init__.py @@ -0,0 +1 @@ +from cpd_auto import * \ No newline at end of file From 5437f381ef8b417437edf2c3054349ab1a39428b Mon Sep 17 00:00:00 2001 From: Shin Donghwan Date: Wed, 5 Jun 2019 09:53:14 +0900 Subject: [PATCH 24/31] Update README.md * Update README.md - How to train, evaluate, test --- README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b66404e..a12cb37 100644 --- a/README.md +++ b/README.md @@ -25,13 +25,23 @@ As a result, the dataset is randomly split for 5 times, which are saved as json Train and test codes are written in `main.py`. To see the detailed arguments, please do `python main.py -h`. ## How to train +* Edit config/config.py ( see config/README.txt ) + ```bash -python main.py -d datasets/eccv16_dataset_summe_google_pool5.h5 -s datasets/summe_splits.json -m summe --gpu 0 --save-dir log/summe-split0 --split-id 0 --verbose +python video_summarization.py +``` +## How to evaluate +* Edit config/config.py ( see config/README.txt ) + +```bash +python video_summarization.py ``` ## How to test +* Edit config/config.py ( see config/README.txt ) + ```bash -python main.py -d datasets/eccv16_dataset_summe_google_pool5.h5 -s datasets/summe_splits.json -m summe --gpu 0 --save-dir log/summe-split0 --split-id 0 --evaluate --resume path_to_your_model.pth.tar --verbose --save-results +python video_summarization.py ``` If argument `--save-results` is enabled, output results will be saved to `results.h5` under the same folder specified by `--save-dir`. To visualize the score-vs-gtscore, simple do From 9e7f9cf3c26713e5e4956ee0b4afb09d2fa4a98d Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Wed, 5 Jun 2019 10:11:26 +0900 Subject: [PATCH 25/31] Commit: Commit README.txt --- config/README.txt | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 config/README.txt diff --git a/config/README.txt b/config/README.txt new file mode 100644 index 0000000..54dcc3b --- /dev/null +++ b/config/README.txt @@ -0,0 +1,34 @@ +Configuration README +==================== + +1. example config to train +# Dataset options +DATASET = 'datasets/eccv16_dataset_summe_google_pool5.h5' +SPLIT = 'datasets/summe_splits.json' +SPLIT_ID = 0 +METRIC = 'summe' + +# Misc +GPU = '0' +EVALUATE = False +TEST = False +VERBOSE = True +SAVE_DIR = 'log/summe-split0' + +2. example config to evaluate +# Dataset options +DATASET = 'datasets/eccv16_dataset_summe_google_pool5.h5' +SPLIT = 'datasets/summe_splits.json' +SPLIT_ID = 0 +METRIC = 'summe' + +# Misc +GPU = '0' +EVALUATE = True +TEST = False +RESUME = 'log/summe-split0/model_epoch60.pth.tar' +VERBOSE = True +SAVE_DIR = 'log/summe-split0' +SAVE_RESULT = True + +3. example config to test \ No newline at end of file From ed0750adc432e7e01657c0e58043754ca2ba62fd Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Wed, 5 Jun 2019 10:11:51 +0900 Subject: [PATCH 26/31] Update: Update RESUME --- config/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.py b/config/config.py index 0419535..d2d8e76 100644 --- a/config/config.py +++ b/config/config.py @@ -26,7 +26,7 @@ USE_CPU = False # use cpu device EVALUATE = False # whether to do evaluation only TEST = False # whether to do evaluation only -RESUME = False # path to resume file +RESUME = '' # path to resume file VERBOSE = True # whether to show detailed test results SAVE_DIR = 'log/summe-split0' # path to save output (default: log/) SAVE_RESULTS = True # whether to save output results \ No newline at end of file From 58c93a3e51e1be524380d81178b30b14e4c00eb3 Mon Sep 17 00:00:00 2001 From: dhshin <102wjdql1!> Date: Wed, 5 Jun 2019 19:48:49 +0900 Subject: [PATCH 27/31] Commit: Commit Generate Dataset --- utils/generate_dataset.py | 148 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 utils/generate_dataset.py diff --git a/utils/generate_dataset.py b/utils/generate_dataset.py new file mode 100644 index 0000000..8b86207 --- /dev/null +++ b/utils/generate_dataset.py @@ -0,0 +1,148 @@ +""" + Generate Dataset + + 1. Converting video to frames + 2. Extracting features + 3. Getting change points + 4. User Summary ( for evaluation ) + +""" +import os, sys +sys.path.append('../') +from networks.CNN import ResNet +from utils.KTS.cpd_auto import cpd_auto +from tqdm import tqdm +import math +import cv2 +import numpy as np +import h5py + +class Generate_Dataset: + def __init__(self, video_path, save_path): + self.resnet = ResNet() + self.dataset = {} + self.video_list = [] + self.video_path = '' + self.frame_root_path = './frames' + self.h5_file = h5py.File(save_path, 'w') + + self._set_video_list(video_path) + + def _set_video_list(self, video_path): + if os.path.isdir(video_path): + self.video_path = video_path + self.video_list = os.listdir(video_path) + self.video_list.sort() + else: + self.video_path = '' + self.video_list.append(video_path) + + for idx, file_name in enumerate(self.video_list): + self.dataset['video_{}'.format(idx+1)] = {} + self.h5_file.create_group('video_{}'.format(idx+1)) + + + def _extract_feature(self, frame): + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + frame = cv2.resize(frame, (224, 224)) + res_pool5 = self.resnet(frame) + frame_feat = res_pool5.cpu().data.numpy().flatten() + + return frame_feat + + def _get_change_points(self, video_feat, n_frame, fps): + n = n_frame / fps + m = int(math.ceil(n/2.0)) + K = np.dot(video_feat, video_feat.T) + change_points, _ = cpd_auto(K, m, 1) + change_points = np.concatenate(([0], change_points, [n_frame-1])) + + temp_change_points = [] + for idx in range(len(change_points)-1): + segment = [change_points[idx], change_points[idx+1]-1] + if idx == len(change_points)-2: + segment = [change_points[idx], change_points[idx+1]] + + temp_change_points.append(segment) + change_points = np.array(list(temp_change_points)) + + temp_n_frame_per_seg = [] + for change_points_idx in range(len(change_points)): + n_frame = change_points[change_points_idx][1] - change_points[change_points_idx][0] + temp_n_frame_per_seg.append(n_frame) + n_frame_per_seg = np.array(list(temp_n_frame_per_seg)) + + return change_points, n_frame_per_seg + + # TODO : save dataset + def _save_dataset(self): + pass + + def generate_dataset(self): + for video_idx, video_filename in enumerate(tqdm(self.video_list)): + video_path = video_filename + if os.path.isdir(self.video_path): + video_path = os.path.join(self.video_path, video_filename) + + video_basename = os.path.basename(video_path).split('.')[0] + + if not os.path.exists(os.path.join(self.frame_root_path, video_basename)): + os.mkdir(os.path.join(self.frame_root_path, video_basename)) + + video_capture = cv2.VideoCapture(video_path) + + fps = video_capture.get(cv2.CAP_PROP_FPS) + n_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) + + frame_list = [] + picks = [] + video_feat = None + video_feat_for_train = None + for frame_idx in tqdm(range(n_frames-1)): + success, frame = video_capture.read() + if success: + frame_list.append(frame) + frame_feat = self._extract_feature(frame) + + if frame_idx % 15 == 0: + picks.append(frame_idx) + + if video_feat_for_train is None: + video_feat_for_train = frame_feat + else: + video_feat_for_train = np.vstack((video_feat_for_train, frame_feat)) + + if video_feat is None: + video_feat = frame_feat + else: + video_feat = np.vstack((video_feat, frame_feat)) + + img_filename = "{}.jpg".format(str(frame_idx).zfill(5)) + cv2.imwrite(os.path.join(self.frame_root_path, video_basename, img_filename), frame) + + else: + break + + video_capture.release() + + change_points, n_frame_per_seg = self._get_change_points(video_feat, n_frames, fps) + + # self.dataset['video_{}'.format(video_idx+1)]['frames'] = list(frame_list) + # self.dataset['video_{}'.format(video_idx+1)]['features'] = list(video_feat) + # self.dataset['video_{}'.format(video_idx+1)]['picks'] = np.array(list(picks)) + # self.dataset['video_{}'.format(video_idx+1)]['n_frames'] = n_frames + # self.dataset['video_{}'.format(video_idx+1)]['fps'] = fps + # self.dataset['video_{}'.format(video_idx+1)]['change_points'] = change_points + # self.dataset['video_{}'.format(video_idx+1)]['n_frame_per_seg'] = n_frame_per_seg + + self.h5_file['video_{}'.format(video_idx+1)]['features'] = list(video_feat_for_train) + self.h5_file['video_{}'.format(video_idx+1)]['picks'] = np.array(list(picks)) + self.h5_file['video_{}'.format(video_idx+1)]['n_frames'] = n_frames + self.h5_file['video_{}'.format(video_idx+1)]['fps'] = fps + self.h5_file['video_{}'.format(video_idx+1)]['change_points'] = change_points + self.h5_file['video_{}'.format(video_idx+1)]['n_frame_per_seg'] = n_frame_per_seg + +if __name__ == "__main__": + gen = Generate_Dataset('/data/video_summarization/dataset_SumMe/videos/Air_Force_One.mp4', 'summe_dataset.h5') + gen.generate_dataset() + gen.h5_file.close() \ No newline at end of file From cdc1c03e43f7e7982bf4285778c50211e55ca812 Mon Sep 17 00:00:00 2001 From: Shin Donghwan Date: Wed, 5 Jun 2019 19:54:35 +0900 Subject: [PATCH 28/31] Update README.md Added KTS --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index a12cb37..e52e1e3 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,12 @@ This repo contains the Pytorch implementation of the AAAI'18 paper - [Deep Reinf The main requirements are [pytorch](http://pytorch.org/) (`v0.4.0`) and python `2.7`. Some dependencies that may not be installed in your machine are [tabulate](https://pypi.org/project/tabulate/) and [h5py](https://github.com/h5py/h5py). Please install other missing dependencies. +## TODO +1. Edit README +2. Reconstruct Repository +3. ~~Add KTS(Kernel Temporal Segmentation)~~ +4. Edit code for test + ## Get started 1. Download preprocessed datasets ```bash @@ -83,11 +89,6 @@ Please remember to specify the naming format of your video frames on this [line] ## How to use your own data We preprocess data by extracting image features for videos and save them to `h5` file. The file format looks like [this](https://github.com/KaiyangZhou/vsumm-reinforce/issues/1#issuecomment-363492711). After that, you can make split via `create_split.py`. If you wanna train policy network using the entire dataset, just do `train_keys = dataset.keys()`. [Here](https://github.com/KaiyangZhou/pytorch-vsumm-reinforce/blob/master/main.py#L75) is the code where we initialize dataset. If you have any problems, feel free to contact me by email or raise an `issue`. -## TODO -1. Edit README -2. Reconstruct Repository -3. Add KTS(Kernel Temporal Segmentation) -4. Edit code for test ## Citation ``` From 3c69c6bad4bc5bdacdc27a5388a9c605229a75fe Mon Sep 17 00:00:00 2001 From: SinDongHwan Date: Thu, 20 Jun 2019 17:55:05 +0900 Subject: [PATCH 29/31] Update: Remove append all frames * Leak RAM, so remove --- utils/generate_dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/generate_dataset.py b/utils/generate_dataset.py index 8b86207..4e3c4de 100644 --- a/utils/generate_dataset.py +++ b/utils/generate_dataset.py @@ -101,7 +101,6 @@ def generate_dataset(self): for frame_idx in tqdm(range(n_frames-1)): success, frame = video_capture.read() if success: - frame_list.append(frame) frame_feat = self._extract_feature(frame) if frame_idx % 15 == 0: From 1cbcc7c95771f3267806dfdf3af8f1a4ed0fed66 Mon Sep 17 00:00:00 2001 From: SinDongHwan Date: Thu, 20 Jun 2019 19:26:59 +0900 Subject: [PATCH 30/31] Update: Update Code Fix bug: nfps must be list type --- video_summarization.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/video_summarization.py b/video_summarization.py index 6b4a1d2..e962ef6 100644 --- a/video_summarization.py +++ b/video_summarization.py @@ -41,12 +41,15 @@ def main(): print("Initialize dataset {}".format(config.DATASET)) dataset = h5py.File(config.DATASET, 'r') num_videos = len(dataset.keys()) + splits = read_json(config.SPLIT) - assert config.SPLIT_ID < len(splits), "split_id (got {}) exceeds {}".format(config.SPLIT_ID, len(splits )) - split = splits[config.SPLIT_ID] - train_keys = split["train_keys"] - test_keys = split["test_keys"] - print("# total videos {}. # train videos {}. # test videos {}.".format(num_videos, len(train_keys), len(test_keys))) + + if not config.TEST: + assert config.SPLIT_ID < len(splits), "split_id (got {}) exceeds {}".format(config.SPLIT_ID, len(splits )) + split = splits[config.SPLIT_ID] + train_keys = split["train_keys"] + test_keys = split["test_keys"] + print("# total videos {}. # train videos {}. # test videos {}.".format(num_videos, len(train_keys), len(test_keys))) print("Initialize model") model = DSN(in_dim=config.INPUT_DIM, hid_dim=config.HIDDEN_DIM, num_layers = config.NUM_LAYERS, cell=config.RNN_CELL) @@ -68,7 +71,7 @@ def main(): if config.TEST: print("Test only") - test(model, dataset, ['video_0'], use_gpu) + test(model, dataset, ['video_1'], use_gpu) return @@ -205,8 +208,7 @@ def test(model, dataset, test_data, use_gpu): cps = dataset[key]['change_points'][...] num_frames = dataset[key]['n_frames'][...] - nfps = dataset[key]['n_frame_per_seg'][...] - nfps = [65, 2159] + nfps = dataset[key]['n_frame_per_seg'][...].tolist() positions = dataset[key]['picks'][...] machine_summary = vsum_tool.generate_summary(probs, cps, num_frames, nfps,positions) From e3c3a6df8c15bbcf76af73efd29cf61673c57971 Mon Sep 17 00:00:00 2001 From: Shin Donghwan Date: Thu, 24 Oct 2019 11:19:59 +0900 Subject: [PATCH 31/31] Update README.md Add a dataset download link from my onedrive. --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index e52e1e3..4204b75 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,11 @@ cd pytorch-vsumm-reinforce wget http://www.eecs.qmul.ac.uk/~kz303/vsumm-reinforce/datasets.tar.gz tar -xvzf datasets.tar.gz ``` +* If can't download, open following link.(I loaded same dataset in my onedrive) +``` +https://onedrive.live.com/?authkey=%21AO1tsqjDVCeakGg&cid=6FD3437627D709EE&id=6FD3437627D709EE%212809&parId=root&action=locate +``` + 2. Make splits ```bash python create_split.py -d datasets/eccv16_dataset_summe_google_pool5.h5 --save-dir datasets --save-name summe_splits --num-splits 5