From 28ad980a7f0620c29c5d48b51afcea639f873304 Mon Sep 17 00:00:00 2001 From: Benoit Seguin Date: Sat, 6 Oct 2018 14:28:15 +0200 Subject: [PATCH 01/57] Update installation packages and installation instructions --- doc/start/install.rst | 5 ----- environment.yml | 4 ++-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/doc/start/install.rst b/doc/start/install.rst index 734a3de..8181745 100644 --- a/doc/start/install.rst +++ b/doc/start/install.rst @@ -10,11 +10,6 @@ Using Anaconda - Then activate the environment with ``source activate dh_segment`` -- It might be possible that the following needs to be added to your ``~/.bashrc`` :: - - export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" - export CUDA_HOME=/usr/local/cuda - - To be able to import the package (i.e ``import dh_segment``) in your code, you have to run : :: python setup.py install diff --git a/environment.yml b/environment.yml index 898cd22..dcdc250 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,7 @@ channels: - defaults dependencies: - imageio=2.3.0 - - opencv=3.4.1 + - opencv=3.4.2 - pandas=0.23.0 - pillow=5.1.0 - python=3.6 @@ -13,8 +13,8 @@ dependencies: - setuptools=39.1.0 - shapely=1.6.4 - tqdm=4.23.3 + - tensorflow-gpu=1.10 - pip: - better-exceptions==0.2.1 - sacred==0.7.3 - - tensorflow-gpu==1.11 From 957cd58a83cae2dd4de69dd65127ac571c5a1589 Mon Sep 17 00:00:00 2001 From: Benoit Seguin Date: Fri, 12 Oct 2018 20:02:28 +0200 Subject: [PATCH 02/57] Revamp of the network description and architecture in a more flexible way. --- dh_segment/estimator_fn.py | 50 +-- dh_segment/network/model.py | 386 +++++++----------------- dh_segment/network/pretrained_models.py | 272 ++++++++++------- dh_segment/utils/misc.py | 60 ++++ dh_segment/utils/params_config.py | 58 ++-- 5 files changed, 362 insertions(+), 464 deletions(-) diff --git a/dh_segment/estimator_fn.py b/dh_segment/estimator_fn.py index d25303b..a4c1f47 100644 --- a/dh_segment/estimator_fn.py +++ b/dh_segment/estimator_fn.py @@ -2,7 +2,6 @@ from .utils import PredictionType, ModelParams, TrainingParams, \ class_to_label_image, multiclass_to_label_image import numpy as np -from .network.model import inference_resnet_v1_50, inference_vgg16, inference_u_net def model_fn(mode, features, labels, params): @@ -18,45 +17,22 @@ def model_fn(mode, features, labels, params): input_images = tf.pad(input_images, [[0, 0], [margin, margin], [margin, margin], [0, 0]], mode='SYMMETRIC', name='mirror_padding') - if model_params.pretrained_model_name == 'vgg16': - network_output = inference_vgg16(input_images, - model_params, - model_params.n_classes, - use_batch_norm=model_params.batch_norm, - weight_decay=model_params.weight_decay, - is_training=(mode == tf.estimator.ModeKeys.TRAIN) - ) - key_restore_model = 'vgg_16' + encoder_class = model_params.get_encoder() + encoder = encoder_class(**model_params.encoder_params) + decoder_class = model_params.get_decoder() + decoder = decoder_class(**model_params.decoder_params) - elif model_params.pretrained_model_name == 'resnet50': - network_output = inference_resnet_v1_50(input_images, - model_params, - model_params.n_classes, - use_batch_norm=model_params.batch_norm, - weight_decay=model_params.weight_decay, - is_training=(mode == tf.estimator.ModeKeys.TRAIN) - ) - key_restore_model = 'resnet_v1_50' - elif model_params.pretrained_model_name == 'unet': - network_output = inference_u_net(input_images, - model_params, - model_params.n_classes, - use_batch_norm=model_params.batch_norm, - weight_decay=model_params.weight_decay, - is_training=(mode == tf.estimator.ModeKeys.TRAIN) - ) - key_restore_model = None - else: - raise NotImplementedError + feature_maps = encoder(input_images) + network_output = decoder(feature_maps, num_classes=model_params.n_classes) if mode == tf.estimator.ModeKeys.TRAIN: - if key_restore_model is not None: + pretrained_file, pretrained_vars = encoder.pretrained_information() + if pretrained_file: # Pretrained weights as initialization - pretrained_restorer = tf.train.Saver(var_list=[v for v in tf.global_variables() - if key_restore_model in v.name]) + pretrained_restorer = tf.train.Saver(var_list=pretrained_vars) def init_fn(scaffold, session): - pretrained_restorer.restore(session, model_params.pretrained_model_file) + pretrained_restorer.restore(session, pretrained_file) else: init_fn = None else: @@ -92,8 +68,10 @@ def init_fn(scaffold, session): if prediction_type == PredictionType.CLASSIFICATION: onehot_labels = tf.one_hot(indices=labels, depth=model_params.n_classes) with tf.name_scope("loss"): - per_pixel_loss = tf.nn.softmax_cross_entropy_with_logits(logits=network_output, - labels=onehot_labels, name='per_pixel_loss') + #per_pixel_loss = tf.nn.softmax_cross_entropy_with_logits(logits=network_output, + # labels=onehot_labels, name='per_pixel_loss') + per_pixel_loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=network_output, + labels=onehot_labels, name='per_pixel_loss') if training_params.focal_loss_gamma > 0.0: # Probability per pixel of getting the correct label probs_correct_label = tf.reduce_max(tf.multiply(prediction_probs, onehot_labels)) diff --git a/dh_segment/network/model.py b/dh_segment/network/model.py index cc55bae..5eb874a 100644 --- a/dh_segment/network/model.py +++ b/dh_segment/network/model.py @@ -1,310 +1,134 @@ #!/usr/bin/env python import tensorflow as tf -from ..utils import ModelParams from tensorflow.contrib import layers # TODO migration to tf.layers ? -from tensorflow.contrib.slim.nets import resnet_v1 from tensorflow.contrib.slim import arg_scope -from .pretrained_models import vgg_16_fn, resnet_v1_50_fn -from collections import OrderedDict +from abc import ABC, abstractmethod +from typing import List, Union, Tuple, Optional, Dict -def inference_vgg16(images: tf.Tensor, params: ModelParams, num_classes: int, use_batch_norm=False, weight_decay=0.0, - is_training=False) -> tf.Tensor: - with tf.name_scope('vgg_augmented'): +class Encoder(ABC): + @abstractmethod + def __call__(self, images: tf.Tensor) -> List[tf.Tensor]: + """ + + :param images: [NxHxWx3] float32 [0..255] input images + :return: a list of the feature maps in decreasing spatial resolution (first element is most likely the input + image itself, then the output of the first pooling op, etc...) + """ + pass + + def pretrained_information(self) -> Tuple[Optional[str], Union[None, List, Dict]]: + """ + + :return: The filename of the pretrained checkpoint and the corresponding variables (List of Dict mapping) + or `None` if no-pretraining is done + """ + return None, None + - if use_batch_norm: - if params.batch_renorm: +class Decoder(ABC): + @abstractmethod + def __call__(self, feature_maps: List[tf.Tensor], num_classes: int) -> tf.Tensor: + """ + + :param feature_maps: list of feature maps, in decreasing spatial resolution, first one being at the original resolution + :return: [N,H,W,num_classes] float32 tensor of logit scores + """ + pass + + +class SimpleDecoder(Decoder): + def __init__(self, upsampling_dims: List[int], max_depth: int = None, train_batchnorm=False, weight_decay=0.): + self.upsampling_dims = upsampling_dims + self.max_depth = max_depth + self.weight_decay = weight_decay + if train_batchnorm: + # TODO + renorm = False + if renorm: renorm_clipping = {'rmax': 100, 'rmin': 0.1, 'dmax': 10} renorm_momentum = 0.98 else: renorm_clipping = None renorm_momentum = 0.99 - batch_norm_fn = lambda x: tf.layers.batch_normalization(x, axis=-1, training=is_training, name='batch_norm', - renorm=params.batch_renorm, - renorm_clipping=renorm_clipping, - renorm_momentum=renorm_momentum) + self.batch_norm_fn = lambda x: tf.layers.batch_normalization(x, axis=-1, training=train_batchnorm, + name='batch_norm', + renorm=renorm, + renorm_clipping=renorm_clipping, + renorm_momentum=renorm_momentum) else: - batch_norm_fn = None - - def upsample_conv(pooled_layer, previous_layer, layer_params, number): - with tf.name_scope('deconv{}'.format(number)): - if previous_layer.get_shape()[1].value and previous_layer.get_shape()[2].value: - target_shape = previous_layer.get_shape()[1:3] - else: - target_shape = tf.shape(previous_layer)[1:3] - upsampled_layer = tf.image.resize_images(pooled_layer, target_shape, - method=tf.image.ResizeMethod.BILINEAR) - input_tensor = tf.concat([upsampled_layer, previous_layer], 3) - - for i, (nb_filters, filter_size) in enumerate(layer_params): - input_tensor = layers.conv2d( - inputs=input_tensor, - num_outputs=nb_filters, - kernel_size=[filter_size, filter_size], - normalizer_fn=batch_norm_fn, - scope="conv{}_{}".format(number, i + 1) - ) - return input_tensor - - # Original VGG : - vgg_net, intermediate_levels = vgg_16_fn(images, blocks=5, weight_decay=weight_decay) - out_tensor = vgg_net - - # Intermediate convolution - if params.intermediate_conv is not None: - with tf.name_scope('intermediate_convs'): - for layer_params in params.intermediate_conv: - for k, (nb_filters, filter_size) in enumerate(layer_params): - out_tensor = layers.conv2d(inputs=out_tensor, - num_outputs=nb_filters, - kernel_size=[filter_size, filter_size], - normalizer_fn=batch_norm_fn, - scope='conv_{}'.format(k + 1)) - - # Upsampling : - with tf.name_scope('upsampling'): - selected_upscale_params = [l for i, l in enumerate(params.upscale_params) - if params.selected_levels_upscaling[i]] - - assert len(params.selected_levels_upscaling) == len(intermediate_levels), \ - 'Upscaling : {} is different from {}'.format(len(params.selected_levels_upscaling), - len(intermediate_levels)) - - selected_intermediate_levels = [l for i, l in enumerate(intermediate_levels) - if params.selected_levels_upscaling[i]] - - # Upsampling loop - n_layer = 1 - for i in reversed(range(len(selected_intermediate_levels))): - out_tensor = upsample_conv(out_tensor, selected_intermediate_levels[i], - selected_upscale_params[i], n_layer) - n_layer += 1 + self.batch_norm_fn = None + + def __call__(self, feature_maps: List[tf.Tensor], num_classes: int): + + # Upsampling + with tf.variable_scope('SimpleDecoder'): + with arg_scope([layers.conv2d], + normalizer_fn=self.batch_norm_fn, + weights_regularizer=layers.l2_regularizer(self.weight_decay)): + + assert len(self.upsampling_dims) + 1 == len(feature_maps), \ + 'Upscaling : length of {} does not match {}'.format(len(self.upsampling_dims), + len(feature_maps)) + + # Force layers to not be too big to reduce memory usage + for i, l in enumerate(feature_maps): + if self.max_depth and l.get_shape()[-1] > self.max_depth: + feature_maps[i] = layers.conv2d( + inputs=l, + num_outputs=self.max_depth, + kernel_size=[1, 1], + scope="dimreduc_{}".format(i), + normalizer_fn=self.batch_norm_fn, + activation_fn=None + ) + + # Deconvolving loop + out_tensor = feature_maps[-1] + for i, f_map in reversed(list(enumerate(feature_maps[:-1]))): + out_tensor = _upsample_concat(out_tensor, f_map, scope_name='upsample_{}'.format(i)) + out_tensor = layers.conv2d(inputs=out_tensor, + num_outputs=self.upsampling_dims[i], + kernel_size=[3, 3], + scope="conv_{}".format(i)) logits = layers.conv2d(inputs=out_tensor, num_outputs=num_classes, activation_fn=None, kernel_size=[1, 1], - scope="conv{}-logits".format(n_layer)) - - return logits # [B,h,w,Classes] - - -def inference_resnet_v1_50(images, params, num_classes, use_batch_norm=False, weight_decay=0.0, - is_training=False) -> tf.Tensor: - if use_batch_norm: - if params.batch_renorm: - renorm_clipping = {'rmax': 100, 'rmin': 0.1, 'dmax': 1} - renorm_momentum = 0.98 - else: - renorm_clipping = None - renorm_momentum = 0.99 - batch_norm_fn = lambda x: tf.layers.batch_normalization(x, axis=-1, training=is_training, name='batch_norm', - renorm=params.batch_renorm, - renorm_clipping=renorm_clipping, - renorm_momentum=renorm_momentum) - else: - batch_norm_fn = None - - def upsample_conv(input_tensor, previous_intermediate_layer, layer_params, number) -> tf.Tensor: - """ - Deconvolution (upscaling) layers - - :param input_tensor: - :param previous_intermediate_layer: - :param layer_params: - :param number: - :return: - """ - with tf.variable_scope('deconv_{}'.format(number)): - if previous_intermediate_layer.get_shape()[1].value and \ - previous_intermediate_layer.get_shape()[2].value: - target_shape = previous_intermediate_layer.get_shape()[1:3] - else: - target_shape = tf.shape(previous_intermediate_layer)[1:3] - upsampled_layer = tf.image.resize_images(input_tensor, target_shape, - method=tf.image.ResizeMethod.BILINEAR) - net = tf.concat([upsampled_layer, previous_intermediate_layer], 3) - - filter_size, nb_bottlenecks = layer_params - if nb_bottlenecks > 0: - for i in range(nb_bottlenecks): - net = resnet_v1.bottleneck( - inputs=net, - depth=filter_size, - depth_bottleneck=filter_size // 4, - stride=1 - ) - else: - net = layers.conv2d( - inputs=net, - num_outputs=filter_size, - kernel_size=[3, 3], - scope="conv{}".format(number) - ) - - return net + scope="conv-logits") - # Original ResNet - blocks_needed = max([i for i, is_needed in enumerate(params.selected_levels_upscaling) if is_needed]) - resnet_net, intermediate_layers = resnet_v1_50_fn(images, is_training=False, blocks=blocks_needed, - weight_decay=weight_decay, renorm=False, - corrected_version=params.correct_resnet_version) + return logits - # Upsampling - with tf.variable_scope('upsampling'): - with arg_scope([layers.conv2d], - normalizer_fn=batch_norm_fn, - weights_regularizer=layers.l2_regularizer(weight_decay)): - selected_upscale_params = [l for i, l in enumerate(params.upscale_params) - if params.selected_levels_upscaling[i]] - assert len(selected_upscale_params) == len(intermediate_layers), \ - 'Upscaling : {} is different from {}'.format(len(selected_upscale_params), - len(intermediate_layers)) - - selected_intermediate_levels = [l for i, l in enumerate(intermediate_layers) - if params.selected_levels_upscaling[i]] - - # Rescaled image values to [0,1] - selected_intermediate_levels.insert(0, images/255.0) - - # Force layers to not be too big to reduce memory usage - for i, l in enumerate(selected_intermediate_levels): - if l.get_shape()[-1] > params.max_depth: - selected_intermediate_levels[i] = layers.conv2d( - inputs=l, - num_outputs=params.max_depth, - kernel_size=[1, 1], - scope="dimreduc_{}".format(i), - # normalizer_fn=batch_norm_fn, - activation_fn=None - ) - - # Deconvolving loop - out_tensor = selected_intermediate_levels[-1] - n_layer = 1 - for i in reversed(range(len(selected_intermediate_levels) - 1)): - out_tensor = upsample_conv(out_tensor, selected_intermediate_levels[i], - selected_upscale_params[i], n_layer) - - n_layer += 1 - - if images.get_shape()[1].value and images.get_shape()[2].value: - target_shape = images.get_shape()[1:3] - else: - target_shape = tf.shape(images)[1:3] - out_tensor = tf.image.resize_images(out_tensor, target_shape, - method=tf.image.ResizeMethod.BILINEAR) - - logits = layers.conv2d(inputs=out_tensor, - num_outputs=num_classes, - activation_fn=None, - kernel_size=[1, 1], - scope="conv{}-logits".format(n_layer)) - - return logits - - -def conv_bn_layer(input_tensor, kernel_size, output_channels, stride=1, bn=False, - is_training=True, relu=True): - # with tf.variable_scope(name) as scope: - conv_layer = layers.conv2d(inputs=input_tensor, - num_outputs=output_channels, - kernel_size=kernel_size, - stride=stride, - activation_fn=tf.identity, - padding='SAME') - if bn and relu: - # How to use Batch Norm: https://github.com/martin-gorner/tensorflow-mnist-tutorial/blob/master/README_BATCHNORM.md - - # Why scale is false when using ReLU as the next activation - # https://datascience.stackexchange.com/questions/22073/why-is-scale-parameter-on-batch-normalization-not-needed-on-relu/22127 - - # Using fuse operation: https://www.tensorflow.org/performance/performance_guide#common_fused_ops - conv_layer = layers.batch_norm(inputs=conv_layer, center=True, scale=False, is_training=is_training, fused=True) - conv_layer = tf.nn.relu(conv_layer) - - if bn and not relu: - conv_layer = layers.batch_norm(inputs=conv_layer, center=True, scale=True, is_training=is_training) - - # print('Conv layer {0} -> {1}'.format(input_tensor.get_shape().as_list(),conv_layer.get_shape().as_list())) - return conv_layer - - -def _get_image_shape_tensor(tensor: tf.Tensor): +def _get_image_shape_tensor(tensor: tf.Tensor) -> Union[Tuple[int, int], tf.Tensor]: + """ + Get the image shape of the tensor + :param tensor: Input image tensor [N,H,W,...] + :return: a (int, int) tuple if shape is defined, otherwise the corresponding tf.Tensor value + """ if tensor.get_shape()[1].value and \ - tensor.get_shape()[2].value: + tensor.get_shape()[2].value: target_shape = tensor.get_shape()[1:3] else: target_shape = tf.shape(tensor)[1:3] return target_shape -def inference_u_net(images: tf.Tensor, params: ModelParams, num_classes: int, use_batch_norm=False, weight_decay=0.0, - is_training=False) -> tf.Tensor: - enc_layers = OrderedDict() - dec_layers = OrderedDict() - - with tf.variable_scope('U-Net'): - - with tf.variable_scope('Encoder'): - - conv_layer = layers.conv2d(images, num_outputs=64, kernel_size=(3, 3), padding='SAME', - activation_fn=tf.identity) - - enc_layers['conv_layer_enc_64'] = conv_bn_layer(conv_layer, kernel_size=(3, 3), - output_channels=64, - bn=True, is_training=is_training, relu=True) - - conv_layer = layers.max_pool2d(inputs=enc_layers['conv_layer_enc_64'], kernel_size=(2, 2), stride=2) - - for n_feat in [128, 256, 512]: - enc_layers['conv_layer_enc_' + str(n_feat)] = conv_bn_layer(conv_layer, kernel_size=(3, 3), - output_channels=n_feat, - bn=True, - is_training=is_training, relu=True) - - enc_layers['conv_layer_enc_' + str(n_feat)] = conv_bn_layer( - enc_layers['conv_layer_enc_' + str(n_feat)], kernel_size=(3, 3), - output_channels=n_feat, - bn=True, is_training=is_training, relu=True) - - conv_layer = layers.max_pool2d(inputs=enc_layers['conv_layer_enc_' + str(n_feat)], kernel_size=(2, 2), stride=2) - - conv_layer_enc_1024 = conv_bn_layer(conv_layer, kernel_size=(3, 3), - output_channels=1024, - bn=True, is_training=is_training, relu=True) - - with tf.variable_scope('Decoder'): - dec_layers['conv_layer_dec_512'] = conv_bn_layer(conv_layer_enc_1024, kernel_size=(3, 3), - output_channels=512, - bn=True, is_training=is_training, relu=True) - - reduced_patchsize = _get_image_shape_tensor(enc_layers['conv_layer_enc_512']) - dec_layers['conv_layer_dec_512'] = tf.image.resize_images(dec_layers['conv_layer_dec_512'], size=reduced_patchsize, - method=tf.image.ResizeMethod.BILINEAR) - - for n_feat in [512, 256, 128, 64]: - - dec_layers['conv_layer_dec_' + str(n_feat * 2)] = tf.concat([dec_layers['conv_layer_dec_' + str(n_feat)], - enc_layers['conv_layer_enc_' + str(n_feat)]], - axis=3) - dec_layers['conv_layer_dec_' + str(n_feat)] = conv_bn_layer( - dec_layers['conv_layer_dec_' + str(n_feat * 2)], kernel_size=(3, 3), - output_channels=n_feat, - bn=True, is_training=is_training, relu=True) - if n_feat > 64: - dec_layers['conv_layer_dec_' + str(int(n_feat / 2))] = conv_bn_layer( - dec_layers['conv_layer_dec_' + str(n_feat)], kernel_size=(3, 3), - output_channels=n_feat / 2, - bn=True, is_training=is_training, relu=True) - - reduced_patchsize = _get_image_shape_tensor(enc_layers['conv_layer_enc_' + str(int(n_feat / 2))]) - dec_layers['conv_layer_dec_' + str(int(n_feat / 2))] = tf.image.resize_images( - dec_layers['conv_layer_dec_' + str(int(n_feat / 2))], - size=reduced_patchsize, - method=tf.image.ResizeMethod.BILINEAR) - - return layers.conv2d(dec_layers['conv_layer_dec_64'], num_outputs=num_classes, kernel_size=(3, 3), - padding='SAME', activation_fn=tf.identity) +def _upsample_concat(pooled_layer: tf.Tensor, previous_layer: tf.Tensor, scope_name='UpsampleConcat'): + """ + + :param pooled_layer: [N,H,W,C] coarse layer + :param previous_layer: [N,H',W',C'] fine layer (H'>H, and W'>W) + :param scope_name: + :return: [N,H',W',C+C'] concatenation of upsampled-`pooled_layer` and `previous_layer` + """ + with tf.name_scope(scope_name): + # Upsamples the coarse level + target_shape = _get_image_shape_tensor(previous_layer) + upsampled_layer = tf.image.resize_images(pooled_layer, target_shape, + method=tf.image.ResizeMethod.BILINEAR) + # Concatenate the upsampled-coarse and the other feature_map + input_tensor = tf.concat([upsampled_layer, previous_layer], 3) + return input_tensor diff --git a/dh_segment/network/pretrained_models.py b/dh_segment/network/pretrained_models.py index 9f69af6..fc55feb 100644 --- a/dh_segment/network/pretrained_models.py +++ b/dh_segment/network/pretrained_models.py @@ -2,6 +2,10 @@ import tensorflow as tf from tensorflow.contrib.slim import nets import numpy as np +from .model import Encoder +import os +import tarfile +from ..utils.misc import get_data_folder, download_file _VGG_MEANS = [123.68, 116.78, 103.94] @@ -10,114 +14,162 @@ def mean_substraction(input_tensor, means=_VGG_MEANS): return tf.subtract(input_tensor, np.array(means)[None, None, None, :], name='MeanSubstraction') -def vgg_16_fn(input_tensor: tf.Tensor, scope='vgg_16', blocks=5, weight_decay=0.0005) \ - -> (tf.Tensor, list): # list of tf.Tensors (layers) - intermediate_levels = [] - # intermediate_levels.append(input_tensor) - with slim.arg_scope(nets.vgg.vgg_arg_scope(weight_decay=weight_decay)): - with tf.variable_scope(scope, 'vgg_16', [input_tensor]) as sc: - input_tensor = mean_substraction(input_tensor) - intermediate_levels.append(input_tensor) - end_points_collection = sc.original_name_scope + '_end_points' - # Collect outputs for conv2d, fully_connected and max_pool2d. - with slim.arg_scope( - [layers.conv2d, layers.fully_connected, layers.max_pool2d], - outputs_collections=end_points_collection): - net = layers.repeat( - input_tensor, 2, layers.conv2d, 64, [3, 3], scope='conv1') - intermediate_levels.append(net) - net = layers.max_pool2d(net, [2, 2], scope='pool1') - if blocks >= 2: - net = layers.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') - intermediate_levels.append(net) - net = layers.max_pool2d(net, [2, 2], scope='pool2') - if blocks >= 3: - net = layers.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') - intermediate_levels.append(net) - net = layers.max_pool2d(net, [2, 2], scope='pool3') - if blocks >= 4: - net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') - intermediate_levels.append(net) - net = layers.max_pool2d(net, [2, 2], scope='pool4') - if blocks >= 5: - net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') +class ResnetV1_50(Encoder): + def __init__(self, train_batchnorm=False, blocks=4, weight_decay=0.0001, + renorm=True, corrected_version=False): + self.train_batchnorm = train_batchnorm + self.blocks = blocks + self.weight_decay = weight_decay + self.renorm = renorm + self.corrected_version = corrected_version + self.pretrained_file = os.path.join(get_data_folder(), 'resnet_v1_50.ckpt') + if not os.path.exists(self.pretrained_file): + print("Could not find pre-trained file {}, downloading it!".format(self.pretrained_file)) + tar_filename = os.path.join(get_data_folder(), 'resnet_v1_50.tar.gz') + download_file('http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz', tar_filename) + tar = tarfile.open(tar_filename) + tar.extractall(path=get_data_folder()) + tar.close() + os.remove(tar_filename) + assert os.path.exists(self.pretrained_file) + print('Pre-trained weights downloaded!') + + def pretrained_information(self): + return self.pretrained_file, [v for v in tf.global_variables() + if 'resnet_v1_50' in v.name + and 'renorm' not in v.name] + + def __call__(self, images: tf.Tensor): + outputs = [] + + with slim.arg_scope(nets.resnet_v1.resnet_arg_scope(weight_decay=self.weight_decay, batch_norm_decay=0.999)), \ + slim.arg_scope([layers.batch_norm], renorm_decay=0.95, renorm=self.renorm): + mean_substracted_tensor = mean_substraction(images) + assert 0 < self.blocks <= 4 + + if self.corrected_version: + def corrected_resnet_v1_block(scope, base_depth, num_units, stride): + """Helper function for creating a resnet_v1 bottleneck block. + + Args: + scope: The scope of the block. + base_depth: The depth of the bottleneck layer for each unit. + num_units: The number of units in the block. + stride: The stride of the block, implemented as a stride in the last unit. + All other units have stride=1. + + Returns: + A resnet_v1 bottleneck block. + """ + return nets.resnet_utils.Block(scope, nets.resnet_v1.bottleneck, [{ + 'depth': base_depth * 4, + 'depth_bottleneck': base_depth, + 'stride': stride + }] + [{ + 'depth': base_depth * 4, + 'depth_bottleneck': base_depth, + 'stride': 1 + }] * (num_units - 1)) + + blocks_list = [ + corrected_resnet_v1_block('block1', base_depth=64, num_units=3, stride=1), + corrected_resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), + corrected_resnet_v1_block('block3', base_depth=256, num_units=6, stride=2), + corrected_resnet_v1_block('block4', base_depth=512, num_units=3, stride=2), + ] + desired_endpoints = [ + 'resnet_v1_50/conv1', + 'resnet_v1_50/block1/unit_3/bottleneck_v1', + 'resnet_v1_50/block2/unit_4/bottleneck_v1', + 'resnet_v1_50/block3/unit_6/bottleneck_v1', + 'resnet_v1_50/block4/unit_3/bottleneck_v1' + ] + else: + blocks_list = [ + nets.resnet_v1.resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), + nets.resnet_v1.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), + nets.resnet_v1.resnet_v1_block('block3', base_depth=256, num_units=6, stride=2), + nets.resnet_v1.resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), + ] + desired_endpoints = [ + 'resnet_v1_50/conv1', + 'resnet_v1_50/block1/unit_2/bottleneck_v1', + 'resnet_v1_50/block2/unit_3/bottleneck_v1', + 'resnet_v1_50/block3/unit_5/bottleneck_v1', + 'resnet_v1_50/block4/unit_3/bottleneck_v1' + ] + + net, endpoints = nets.resnet_v1.resnet_v1(mean_substracted_tensor, + blocks=blocks_list[:self.blocks], + num_classes=None, + is_training=self.train_batchnorm, + global_pool=False, + output_stride=None, + include_root_block=True, + reuse=None, + scope='resnet_v1_50') + + # Add standardized original images + outputs.append(mean_substracted_tensor/127.0) + + for d in desired_endpoints[:self.blocks + 1]: + outputs.append(endpoints[d]) + + return outputs + + +class VGG16(Encoder): + def __init__(self, blocks=5, weight_decay=0.0005): + self.blocks = blocks + self.weight_decay = weight_decay + self.pretrained_file = os.path.join(get_data_folder(), 'vgg_16.ckpt') + if not os.path.exists(self.pretrained_file): + print("Could not find pre-trained file {}, downloading it!".format(self.pretrained_file)) + tar_filename = os.path.join(get_data_folder(), 'vgg_16.tar.gz') + download_file('http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz', tar_filename) + tar = tarfile.open(tar_filename) + tar.extractall(path=get_data_folder()) + tar.close() + os.remove(tar_filename) + assert os.path.exists(self.pretrained_file) + print('Pre-trained weights downloaded!') + + def pretrained_information(self): + return self.pretrained_file, [v for v in tf.global_variables() + if 'vgg_16' in v.name + and 'renorm' not in v.name] + + def __call__(self, images: tf.Tensor): + intermediate_levels = [] + + with slim.arg_scope(nets.vgg.vgg_arg_scope(weight_decay=self.weight_decay)): + with tf.variable_scope(None, 'vgg_16', [images]) as sc: + input_tensor = mean_substraction(images) + intermediate_levels.append(input_tensor) + end_points_collection = sc.original_name_scope + '_end_points' + # Collect outputs for conv2d, fully_connected and max_pool2d. + with slim.arg_scope( + [layers.conv2d, layers.fully_connected, layers.max_pool2d], + outputs_collections=end_points_collection): + net = layers.repeat( + input_tensor, 2, layers.conv2d, 64, [3, 3], scope='conv1') intermediate_levels.append(net) - net = layers.max_pool2d(net, [2, 2], scope='pool5') - - return net, intermediate_levels - - -def resnet_v1_50_fn(input_tensor: tf.Tensor, is_training=False, blocks=4, weight_decay=0.0001, - renorm=True, corrected_version=False) -> tf.Tensor: - with slim.arg_scope(nets.resnet_v1.resnet_arg_scope(weight_decay=weight_decay, batch_norm_decay=0.999)), \ - slim.arg_scope([layers.batch_norm], renorm_decay=0.95, renorm=renorm): - input_tensor = mean_substraction(input_tensor) - assert 0 < blocks <= 4 - - if corrected_version: - def corrected_resnet_v1_block(scope, base_depth, num_units, stride): - """Helper function for creating a resnet_v1 bottleneck block. - - Args: - scope: The scope of the block. - base_depth: The depth of the bottleneck layer for each unit. - num_units: The number of units in the block. - stride: The stride of the block, implemented as a stride in the last unit. - All other units have stride=1. - - Returns: - A resnet_v1 bottleneck block. - """ - return nets.resnet_utils.Block(scope, nets.resnet_v1.bottleneck,[{ - 'depth': base_depth * 4, - 'depth_bottleneck': base_depth, - 'stride': stride - }] + [{ - 'depth': base_depth * 4, - 'depth_bottleneck': base_depth, - 'stride': 1 - }] * (num_units - 1)) - - blocks_list = [ - corrected_resnet_v1_block('block1', base_depth=64, num_units=3, stride=1), - corrected_resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), - corrected_resnet_v1_block('block3', base_depth=256, num_units=6, stride=2), - corrected_resnet_v1_block('block4', base_depth=512, num_units=3, stride=2), - ] - desired_endpoints = [ - 'resnet_v1_50/conv1', - 'resnet_v1_50/block1/unit_3/bottleneck_v1', - 'resnet_v1_50/block2/unit_4/bottleneck_v1', - 'resnet_v1_50/block3/unit_6/bottleneck_v1', - 'resnet_v1_50/block4/unit_3/bottleneck_v1' - ] - else: - blocks_list = [ - nets.resnet_v1.resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), - nets.resnet_v1.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), - nets.resnet_v1.resnet_v1_block('block3', base_depth=256, num_units=6, stride=2), - nets.resnet_v1.resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), - ] - desired_endpoints = [ - 'resnet_v1_50/conv1', - 'resnet_v1_50/block1/unit_2/bottleneck_v1', - 'resnet_v1_50/block2/unit_3/bottleneck_v1', - 'resnet_v1_50/block3/unit_5/bottleneck_v1', - 'resnet_v1_50/block4/unit_3/bottleneck_v1' - ] - - net, endpoints = nets.resnet_v1.resnet_v1(input_tensor, - blocks=blocks_list[:blocks], - num_classes=None, - is_training=is_training, - global_pool=False, - output_stride=None, - include_root_block=True, - reuse=None, - scope='resnet_v1_50') - - intermediate_layers = list() - for d in desired_endpoints[:blocks + 1]: - intermediate_layers.append(endpoints[d]) - - return net, intermediate_layers + net = layers.max_pool2d(net, [2, 2], scope='pool1') + if self.blocks >= 2: + net = layers.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') + intermediate_levels.append(net) + net = layers.max_pool2d(net, [2, 2], scope='pool2') + if self.blocks >= 3: + net = layers.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') + intermediate_levels.append(net) + net = layers.max_pool2d(net, [2, 2], scope='pool3') + if self.blocks >= 4: + net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') + intermediate_levels.append(net) + net = layers.max_pool2d(net, [2, 2], scope='pool4') + if self.blocks >= 5: + net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') + intermediate_levels.append(net) + net = layers.max_pool2d(net, [2, 2], scope='pool5') + + return intermediate_levels diff --git a/dh_segment/utils/misc.py b/dh_segment/utils/misc.py index 5a1b77a..a923313 100644 --- a/dh_segment/utils/misc.py +++ b/dh_segment/utils/misc.py @@ -5,6 +5,14 @@ import json import pickle from hashlib import sha1 +from typing import Any +import importlib +import os +import urllib.request +import tarfile +import os +from tqdm import tqdm +from random import shuffle def parse_json(filename): @@ -29,3 +37,55 @@ def dump_pickle(filename, obj): def hash_dict(params): return sha1(json.dumps(params, sort_keys=True).encode()).hexdigest() + + +def shuffled(l: list) -> list: + ll = l.copy() + shuffle(ll) + return ll + + +def get_class_from_name(full_class_name: str) -> Any: + """ + Tries to load the class from its naming, will import the corresponding module. + Raises an Error if it does not work. + :param full_class_name: full name of the class, for instance `foo.bar.Baz` + :return: the loaded class + """ + module_name, class_name = full_class_name.rsplit('.', maxsplit=1) + # load the module, will raise ImportError if module cannot be loaded + m = importlib.import_module(module_name) + # get the class, will raise AttributeError if class cannot be found + c = getattr(m, class_name) + return c + + +def get_data_folder() -> str: + folder = os.path.join(os.path.expanduser('~'), '.dh_segment') + os.makedirs(folder, exist_ok=True) + return folder + + +def download_file(url: str, output_file: str): + def progress_hook(t): + last_b = [0] + + def update_to(b=1, bsize=1, tsize=None): + """ + b : int, optional + Number of blocks transferred so far [default: 1]. + bsize : int, optional + Size of each block (in tqdm units) [default: 1]. + tsize : int, optional + Total size (in tqdm units). If [default: None] remains unchanged. + """ + if tsize is not None: + t.total = tsize + t.update((b - last_b[0]) * bsize) + last_b[0] = b + + return update_to + + with tqdm(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, + desc="Downloading pre-trained weights") as t: + urllib.request.urlretrieve(url, output_file, reporthook=progress_hook(t)) diff --git a/dh_segment/utils/params_config.py b/dh_segment/utils/params_config.py index 81010c2..80b1fe3 100644 --- a/dh_segment/utils/params_config.py +++ b/dh_segment/utils/params_config.py @@ -2,9 +2,9 @@ __author__ = "solivr" __license__ = "GPL" -import os -import warnings -from random import shuffle +from .misc import get_class_from_name +from ..network.model import Encoder, Decoder +from typing import Type class PredictionType: @@ -98,45 +98,29 @@ class UNetModelParams: class ModelParams(BaseParams): def __init__(self, **kwargs): - self.batch_norm = kwargs.get('batch_norm', True) # type: bool - self.batch_renorm = kwargs.get('batch_renorm', True) # type: bool - self.weight_decay = kwargs.get('weight_decay', 1e-6) # type: float + self.encoder_name = kwargs.get('encoder_name', 'dh_segment.network.pretrained_models.ResnetV1_50') # type: str + self.encoder_params = kwargs.get('encoder_params', dict()) # type: dict + self.decoder_name = kwargs.get('decoder_name', 'dh_segment.network.SimpleDecoder') # type: str + self.decoder_params = kwargs.get('decoder_params', { + 'upsampling_dims': [32, 64, 128, 256, 512] + }) # type: dict self.n_classes = kwargs.get('n_classes', None) # type: int - self.pretrained_model_name = kwargs.get('pretrained_model_name', None) # type: str - self.max_depth = kwargs.get('max_depth', 512) # type: int - - if self.pretrained_model_name == 'vgg16': - model_class = VGG16ModelParams - elif self.pretrained_model_name == 'resnet50': - model_class = ResNetModelParams - elif self.pretrained_model_name == 'unet': - model_class = UNetModelParams - else: - raise NotImplementedError - self.pretrained_model_file = kwargs.get('pretrained_model_file', model_class.PRETRAINED_MODEL_FILE) - self.intermediate_conv = kwargs.get('intermediate_conv', model_class.INTERMEDIATE_CONV) - self.upscale_params = kwargs.get('upscale_params', model_class.UPSCALE_PARAMS) - self.selected_levels_upscaling = kwargs.get('selected_levels_upscaling', model_class.SELECTED_LAYERS_UPSCALING) - self.correct_resnet_version = kwargs.get('correct_resnet_version', model_class.CORRECT_VERSION) self.check_params() + def get_encoder(self) -> Type[Encoder]: + encoder = get_class_from_name(self.encoder_name) + assert issubclass(encoder, Encoder), "{} is not an Encoder".format(encoder) + return encoder + + def get_decoder(self) -> Type[Decoder]: + decoder = get_class_from_name(self.decoder_name) + assert issubclass(decoder, Decoder), "{} is not a Decoder".format(decoder) + return decoder + def check_params(self): - # Pretrained model name check - # assert self.upscale_params is not None and self.selected_levels_upscaling is not None, \ - # 'Model parameters cannot be None' - if self.upscale_params is not None and self.selected_levels_upscaling is not None: - - assert len(self.upscale_params) == len(self.selected_levels_upscaling), \ - 'Upscaling levels and selection levels must have the same lengths (in model_params definition), ' \ - '{} != {}'.format(len(self.upscale_params), - len(self.selected_levels_upscaling)) - - # assert os.path.isfile(self.pretrained_model_file), \ - # 'Pretrained weights file {} not found'.format(self.pretrained_model_file) - if not os.path.isfile(self.pretrained_model_file): - warnings.warn('WARNING - Default pretrained weights file in {} was not found. ' - 'Have you changed the default pretrained file ?'.format(self.pretrained_model_file)) + self.get_encoder() + self.get_decoder() class TrainingParams(BaseParams): From ff1edd83fd4045d62665da2cd5cae930391451ab Mon Sep 17 00:00:00 2001 From: Benoit Seguin Date: Fri, 12 Oct 2018 20:02:44 +0200 Subject: [PATCH 03/57] Removing useless files --- .../download_resnet_pretrained_model.py | 39 ------ .../download_vgg_pretrained_model.py | 39 ------ train.py | 122 ------------------ 3 files changed, 200 deletions(-) delete mode 100644 pretrained_models/download_resnet_pretrained_model.py delete mode 100644 pretrained_models/download_vgg_pretrained_model.py delete mode 100644 train.py diff --git a/pretrained_models/download_resnet_pretrained_model.py b/pretrained_models/download_resnet_pretrained_model.py deleted file mode 100644 index 42943fe..0000000 --- a/pretrained_models/download_resnet_pretrained_model.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python - -import urllib.request -import tarfile -import os -from tqdm import tqdm - - -def progress_hook(t): - last_b = [0] - - def update_to(b=1, bsize=1, tsize=None): - """ - b : int, optional - Number of blocks transferred so far [default: 1]. - bsize : int, optional - Size of each block (in tqdm units) [default: 1]. - tsize : int, optional - Total size (in tqdm units). If [default: None] remains unchanged. - """ - if tsize is not None: - t.total = tsize - t.update((b - last_b[0]) * bsize) - last_b[0] = b - - return update_to - - -if __name__ == '__main__': - tar_filename = 'resnet_v1_50.tar.gz' - with tqdm(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, - desc="Downloading pre-trained weights") as t: - urllib.request.urlretrieve('http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz', tar_filename, - reporthook=progress_hook(t)) - tar = tarfile.open(tar_filename) - tar.extractall() - tar.close() - print('Resnet pre-trained weights downloaded!') - os.remove(tar_filename) diff --git a/pretrained_models/download_vgg_pretrained_model.py b/pretrained_models/download_vgg_pretrained_model.py deleted file mode 100644 index d38d89f..0000000 --- a/pretrained_models/download_vgg_pretrained_model.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python - -import urllib.request -import tarfile -import os -from tqdm import tqdm - - -def progress_hook(t): - last_b = [0] - - def update_to(b=1, bsize=1, tsize=None): - """ - b : int, optional - Number of blocks transferred so far [default: 1]. - bsize : int, optional - Size of each block (in tqdm units) [default: 1]. - tsize : int, optional - Total size (in tqdm units). If [default: None] remains unchanged. - """ - if tsize is not None: - t.total = tsize - t.update((b - last_b[0]) * bsize) - last_b[0] = b - - return update_to - - -if __name__ == '__main__': - tar_filename = 'vgg_16.tar.gz' - with tqdm(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, - desc="Downloading pre-trained weights") as t: - urllib.request.urlretrieve('http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz', tar_filename, - reporthook=progress_hook(t)) - tar = tarfile.open(tar_filename) - tar.extractall() - tar.close() - print('VGG-16 pre-trained weights downloaded!') - os.remove(tar_filename) diff --git a/train.py b/train.py deleted file mode 100644 index 9cf0b44..0000000 --- a/train.py +++ /dev/null @@ -1,122 +0,0 @@ -import os -import tensorflow as tf -# Tensorflow logging level -from logging import WARNING # import DEBUG, INFO, ERROR for more/less verbosity - -tf.logging.set_verbosity(WARNING) -from dh_segment import estimator_fn, input, utils -import json -from glob import glob -import numpy as np - -try: - import better_exceptions -except ImportError: - print('/!\ W -- Not able to import package better_exceptions') - pass -from tqdm import trange -from sacred import Experiment -import pandas as pd - -ex = Experiment('dhSegment_experiment') - - -@ex.config -def default_config(): - train_data = None # Directory with training data - eval_data = None # Directory with validation data - model_output_dir = None # Directory to output tf model - restore_model = False # Set to true to continue training - classes_file = None # txt file with classes values (unused for REGRESSION) - gpu = '' # GPU to be used for training - prediction_type = utils.PredictionType.CLASSIFICATION # One of CLASSIFICATION, REGRESSION or MULTILABEL - pretrained_model_name = 'resnet50' - model_params = utils.ModelParams(pretrained_model_name=pretrained_model_name).to_dict() # Model parameters - training_params = utils.TrainingParams().to_dict() # Training parameters - if prediction_type == utils.PredictionType.CLASSIFICATION: - assert classes_file is not None - model_params['n_classes'] = utils.get_n_classes_from_file(classes_file) - elif prediction_type == utils.PredictionType.REGRESSION: - model_params['n_classes'] = 1 - elif prediction_type == utils.PredictionType.MULTILABEL: - assert classes_file is not None - model_params['n_classes'] = utils.get_n_classes_from_file_multilabel(classes_file) - - -@ex.automain -def run(train_data, eval_data, model_output_dir, gpu, training_params, _config): - # Create output directory - if not os.path.isdir(model_output_dir): - os.makedirs(model_output_dir) - else: - assert _config.get('restore_model'), \ - '{0} already exists, you cannot use it as output directory. ' \ - 'Set "restore_model=True" to continue training, or delete dir "rm -r {0}"'.format(model_output_dir) - # Save config - with open(os.path.join(model_output_dir, 'config.json'), 'w') as f: - json.dump(_config, f, indent=4, sort_keys=True) - - # Create export directory for saved models - saved_model_dir = os.path.join(model_output_dir, 'export') - if not os.path.isdir(saved_model_dir): - os.makedirs(saved_model_dir) - - training_params = utils.TrainingParams.from_dict(training_params) - - session_config = tf.ConfigProto() - session_config.gpu_options.visible_device_list = str(gpu) - session_config.gpu_options.per_process_gpu_memory_fraction = 0.9 - estimator_config = tf.estimator.RunConfig().replace(session_config=session_config, - save_summary_steps=10, - keep_checkpoint_max=1) - estimator = tf.estimator.Estimator(estimator_fn.model_fn, model_dir=model_output_dir, - params=_config, config=estimator_config) - - def get_dirs_or_files(input_data): - if os.path.isdir(input_data): - train_input, train_labels_input = os.path.join(input_data, 'images'), os.path.join(input_data, 'labels') - # Check if training dir exists - if not os.path.isdir(train_input): - raise FileNotFoundError(train_input) - if not os.path.isdir(train_labels_input): - raise FileNotFoundError(train_labels_input) - elif os.path.isfile(train_data) and train_data.endswith('.csv'): - train_input = train_data - train_labels_input = None - else: - raise TypeError('input_data {} is neither a directory nor a csv file'.format(input_data)) - return train_input, train_labels_input - - train_input, train_labels_input = get_dirs_or_files(train_data) - if eval_data is not None: - eval_input, eval_labels_input = get_dirs_or_files(eval_data) - - # Configure exporter - serving_input_fn = input.serving_input_filename(training_params.input_resized_size) - exporter = tf.estimator.BestExporter(serving_input_receiver_fn=serving_input_fn, exports_to_keep=2) - - for i in trange(0, training_params.n_epochs, training_params.evaluate_every_epoch, desc='Evaluated epochs'): - estimator.train(input.input_fn(train_input, - input_label_dir=train_labels_input, - num_epochs=training_params.evaluate_every_epoch, - batch_size=training_params.batch_size, - data_augmentation=training_params.data_augmentation, - make_patches=training_params.make_patches, - image_summaries=True, - params=_config, - num_threads=32)) - - if eval_data is not None: - eval_result = estimator.evaluate(input.input_fn(eval_input, - input_label_dir=eval_labels_input, - batch_size=1, - data_augmentation=False, - make_patches=False, - image_summaries=False, - params=_config, - num_threads=32)) - else: - eval_result = None - - exporter.export(estimator, saved_model_dir, checkpoint_path=None, eval_result=eval_result, - is_the_final_export=False) From a9e0ed7653ea87e3ae86c0a54f8c0fd81ff0ed19 Mon Sep 17 00:00:00 2001 From: Benoit Seguin Date: Fri, 12 Oct 2018 20:03:11 +0200 Subject: [PATCH 04/57] dh_segment_train as a script --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 710e7ac..3d7e51d 100644 --- a/setup.py +++ b/setup.py @@ -11,4 +11,5 @@ 'Paper': 'https://arxiv.org/abs/1804.10371', 'Source Code': 'https://github.com/dhlab-epfl/dhSegment' }, + scripts=['dh_segment_train'], zip_safe=False) From e0d6c5d260829d125ee15f0e7b76fbff72c44911 Mon Sep 17 00:00:00 2001 From: Benoit Seguin Date: Fri, 12 Oct 2018 20:06:58 +0200 Subject: [PATCH 05/57] Correcting the deletion of the main script, oops... --- dh_segment_train.py | 121 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 dh_segment_train.py diff --git a/dh_segment_train.py b/dh_segment_train.py new file mode 100644 index 0000000..259e8da --- /dev/null +++ b/dh_segment_train.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python + +import os +import tensorflow as tf +# Tensorflow logging level +from logging import WARNING # import DEBUG, INFO, ERROR for more/less verbosity + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # or any {'0', '1', '2'} +tf.logging.set_verbosity(WARNING) +from dh_segment import estimator_fn, io, utils +import json + +try: + import better_exceptions +except ImportError: + print('/!\ W -- Not able to import package better_exceptions') + pass +from tqdm import trange +from sacred import Experiment + +ex = Experiment('dhSegment_experiment') + + +@ex.config +def default_config(): + train_data = None # Directory with training data + eval_data = None # Directory with validation data + model_output_dir = None # Directory to output tf model + restore_model = False # Set to true to continue training + classes_file = None # txt file with classes values (unused for REGRESSION) + gpu = '' # GPU to be used for training + prediction_type = utils.PredictionType.CLASSIFICATION # One of CLASSIFICATION, REGRESSION or MULTILABEL + model_params = utils.ModelParams().to_dict() # Model parameters + training_params = utils.TrainingParams().to_dict() # Training parameters + if prediction_type == utils.PredictionType.CLASSIFICATION: + assert classes_file is not None + model_params['n_classes'] = utils.get_n_classes_from_file(classes_file) + elif prediction_type == utils.PredictionType.REGRESSION: + model_params['n_classes'] = 1 + elif prediction_type == utils.PredictionType.MULTILABEL: + assert classes_file is not None + model_params['n_classes'] = utils.get_n_classes_from_file_multilabel(classes_file) + + +@ex.automain +def run(train_data, eval_data, model_output_dir, gpu, training_params, _config): + # Create output directory + if not os.path.isdir(model_output_dir): + os.makedirs(model_output_dir) + else: + assert _config.get('restore_model'), \ + '{0} already exists, you cannot use it as output directory. ' \ + 'Set "restore_model=True" to continue training, or delete dir "rm -r {0}"'.format(model_output_dir) + # Save config + with open(os.path.join(model_output_dir, 'config.json'), 'w') as f: + json.dump(_config, f, indent=4, sort_keys=True) + + # Create export directory for saved models + saved_model_dir = os.path.join(model_output_dir, 'export') + if not os.path.isdir(saved_model_dir): + os.makedirs(saved_model_dir) + + training_params = utils.TrainingParams.from_dict(training_params) + + session_config = tf.ConfigProto() + session_config.gpu_options.visible_device_list = str(gpu) + session_config.gpu_options.per_process_gpu_memory_fraction = 0.9 + estimator_config = tf.estimator.RunConfig().replace(session_config=session_config, + save_summary_steps=10, + keep_checkpoint_max=1) + estimator = tf.estimator.Estimator(estimator_fn.model_fn, model_dir=model_output_dir, + params=_config, config=estimator_config) + + def get_dirs_or_files(input_data): + if os.path.isdir(input_data): + train_input, train_labels_input = os.path.join(input_data, 'images'), os.path.join(input_data, 'labels') + # Check if training dir exists + if not os.path.isdir(train_input): + raise FileNotFoundError(train_input) + if not os.path.isdir(train_labels_input): + raise FileNotFoundError(train_labels_input) + elif os.path.isfile(train_data) and train_data.endswith('.csv'): + train_input = train_data + train_labels_input = None + else: + raise TypeError('input_data {} is neither a directory nor a csv file'.format(input_data)) + return train_input, train_labels_input + + train_input, train_labels_input = get_dirs_or_files(train_data) + if eval_data is not None: + eval_input, eval_labels_input = get_dirs_or_files(eval_data) + + # Configure exporter + serving_input_fn = io.input.serving_input_filename(training_params.input_resized_size) + exporter = tf.estimator.BestExporter(serving_input_receiver_fn=serving_input_fn, exports_to_keep=2) + + for i in trange(0, training_params.n_epochs, training_params.evaluate_every_epoch, desc='Evaluated epochs'): + estimator.train(io.input.input_fn(train_input, + input_label_dir=train_labels_input, + num_epochs=training_params.evaluate_every_epoch, + batch_size=training_params.batch_size, + data_augmentation=training_params.data_augmentation, + make_patches=training_params.make_patches, + image_summaries=True, + params=_config, + num_threads=32)) + + if eval_data is not None: + eval_result = estimator.evaluate(io.input.input_fn(eval_input, + input_label_dir=eval_labels_input, + batch_size=1, + data_augmentation=False, + make_patches=False, + image_summaries=False, + params=_config, + num_threads=32)) + else: + eval_result = None + + exporter.export(estimator, saved_model_dir, checkpoint_path=None, eval_result=eval_result, + is_the_final_export=False) From cb1d8fc71df9c0039deb94db0049123809eb936f Mon Sep 17 00:00:00 2001 From: Benoit Seguin Date: Fri, 12 Oct 2018 20:15:12 +0200 Subject: [PATCH 06/57] Nicer labels for the progress bars --- dh_segment/io/input.py | 27 ++++++++++++++++----------- dh_segment_train.py | 6 ++++-- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/dh_segment/io/input.py b/dh_segment/io/input.py index 453ca2d..edd6343 100644 --- a/dh_segment/io/input.py +++ b/dh_segment/io/input.py @@ -17,9 +17,9 @@ class InputCase(Enum): INPUT_CSV = 'INPUT_CSV' -def input_fn(input_data: Union[str, List[str]], params: dict, input_label_dir: str=None, - data_augmentation: bool=False, batch_size: int=5, make_patches: bool=False, num_epochs: int=1, - num_threads: int=4, image_summaries: bool=False): +def input_fn(input_data: Union[str, List[str]], params: dict, input_label_dir: str = None, + data_augmentation: bool = False, batch_size: int = 5, make_patches: bool = False, num_epochs: int = 1, + num_threads: int = 4, image_summaries: bool = False, progressbar_description: str = 'Dataset'): """ Input_fn for estimator @@ -33,6 +33,7 @@ def input_fn(input_data: Union[str, List[str]], params: dict, input_label_dir: s :param num_epochs: number of epochs to cycle trough data (set it to None for infinite repeat) :param num_threads: number of thread to use in parallele when usin tf.data.Dataset.map :param image_summaries: boolean, whether to make tf.Summary to watch on tensorboard + :param progressbar_description: what will appear in the progressbar showing the number of files read :return: fn """ training_params = utils.TrainingParams.from_dict(params['training_params']) @@ -96,8 +97,9 @@ def _scaling_and_patch_fn(input_image, label_image): # Data augmentation def _augment_data_fn(input_image, label_image): \ - return data_augmentation_fn(input_image, label_image, training_params.data_augmentation_flip_lr, - training_params.data_augmentation_flip_ud, training_params.data_augmentation_color) + return data_augmentation_fn(input_image, label_image, training_params.data_augmentation_flip_lr, + training_params.data_augmentation_flip_ud, + training_params.data_augmentation_color) # Assign color to class id def _assign_color_to_class_id(input_image, label_image): @@ -112,13 +114,14 @@ def _assign_color_to_class_id(input_image, label_image): output['weight_maps'] = local_entropy(tf.equal(label_image, 1), sigma=training_params.local_entropy_sigma) return output + # --- # Finding the list of images to be used if isinstance(input_data, list): input_case = InputCase.INPUT_LIST input_image_filenames = input_data - print('Found {} images'.format(len(input_image_filenames))) + #print('Found {} images'.format(len(input_image_filenames))) elif os.path.isdir(input_data): input_case = InputCase.INPUT_DIR @@ -126,13 +129,14 @@ def _assign_color_to_class_id(input_image, label_image): recursive=True) + \ glob(os.path.join(input_data, '**', '*.png'), recursive=True) - print('Found {} images'.format(len(input_image_filenames))) + #print('Found {} images'.format(len(input_image_filenames))) elif os.path.isfile(input_data) and \ input_data.endswith('.csv'): input_case = InputCase.INPUT_CSV else: - raise NotImplementedError('Input data should be a directory, a csv file or a list of filenames but got {}'.format(input_data)) + raise NotImplementedError( + 'Input data should be a directory, a csv file or a list of filenames but got {}'.format(input_data)) # Finding the list of labelled images if available has_labelled_data = False @@ -169,15 +173,16 @@ def _assign_color_to_class_id(input_image, label_image): def fn(): if not has_labelled_data: encoded_filenames = [f.encode() for f in input_image_filenames] - dataset = tf.data.Dataset.from_generator(lambda: tqdm(encoded_filenames, desc='Dataset'), + dataset = tf.data.Dataset.from_generator(lambda: tqdm(encoded_filenames, desc=progressbar_description), tf.string, tf.TensorShape([])) dataset = dataset.repeat(count=num_epochs) dataset = dataset.map(lambda filename: {'images': load_and_resize_image(filename, 3, training_params.input_resized_size)}) else: encoded_filenames = [(i.encode(), l.encode()) for i, l in zip(input_image_filenames, label_image_filenames)] - dataset = tf.data.Dataset.from_generator(lambda: tqdm(utils.shuffled(encoded_filenames), desc='Dataset'), - (tf.string, tf.string), (tf.TensorShape([]), tf.TensorShape([]))) + dataset = tf.data.Dataset.from_generator(lambda: tqdm(utils.shuffled(encoded_filenames), + desc=progressbar_description), + (tf.string, tf.string), (tf.TensorShape([]), tf.TensorShape([]))) dataset = dataset.repeat(count=num_epochs) dataset = dataset.map(_load_image_fn, num_threads).flat_map(_scaling_and_patch_fn) diff --git a/dh_segment_train.py b/dh_segment_train.py index 259e8da..e02e947 100644 --- a/dh_segment_train.py +++ b/dh_segment_train.py @@ -103,7 +103,8 @@ def get_dirs_or_files(input_data): make_patches=training_params.make_patches, image_summaries=True, params=_config, - num_threads=32)) + num_threads=32, + progressbar_description="Training".format(i))) if eval_data is not None: eval_result = estimator.evaluate(io.input.input_fn(eval_input, @@ -113,7 +114,8 @@ def get_dirs_or_files(input_data): make_patches=False, image_summaries=False, params=_config, - num_threads=32)) + num_threads=32, + progressbar_description="Evaluation")) else: eval_result = None From da1258ae6860041f6e36a2bfce35955c90e7720a Mon Sep 17 00:00:00 2001 From: Benoit Seguin Date: Fri, 12 Oct 2018 20:18:54 +0200 Subject: [PATCH 07/57] Nicer handling of number of threads --- dh_segment_train.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dh_segment_train.py b/dh_segment_train.py index e02e947..d550d0e 100644 --- a/dh_segment_train.py +++ b/dh_segment_train.py @@ -94,6 +94,12 @@ def get_dirs_or_files(input_data): serving_input_fn = io.input.serving_input_filename(training_params.input_resized_size) exporter = tf.estimator.BestExporter(serving_input_receiver_fn=serving_input_fn, exports_to_keep=2) + nb_cores = os.cpu_count() + if nb_cores: + num_threads = min(nb_cores//2, 16) + else: + num_threads = 4 + for i in trange(0, training_params.n_epochs, training_params.evaluate_every_epoch, desc='Evaluated epochs'): estimator.train(io.input.input_fn(train_input, input_label_dir=train_labels_input, @@ -103,7 +109,7 @@ def get_dirs_or_files(input_data): make_patches=training_params.make_patches, image_summaries=True, params=_config, - num_threads=32, + num_threads=num_threads, progressbar_description="Training".format(i))) if eval_data is not None: @@ -114,7 +120,7 @@ def get_dirs_or_files(input_data): make_patches=False, image_summaries=False, params=_config, - num_threads=32, + num_threads=num_threads, progressbar_description="Evaluation")) else: eval_result = None From 4de57febd202e912a5667e49a02db0e6210a1337 Mon Sep 17 00:00:00 2001 From: Benoit Seguin Date: Fri, 12 Oct 2018 20:35:03 +0200 Subject: [PATCH 08/57] Removing code which has been made useless --- dh_segment/utils/params_config.py | 54 ------------------------------- 1 file changed, 54 deletions(-) diff --git a/dh_segment/utils/params_config.py b/dh_segment/utils/params_config.py index 80b1fe3..d015cf4 100644 --- a/dh_segment/utils/params_config.py +++ b/dh_segment/utils/params_config.py @@ -42,60 +42,6 @@ def check_params(self): pass -class VGG16ModelParams: - PRETRAINED_MODEL_FILE = 'pretrained_models/vgg_16.ckpt' - INTERMEDIATE_CONV = [ - [(256, 3)] - ] - UPSCALE_PARAMS = [ - [(32, 3)], - [(64, 3)], - [(128, 3)], - [(256, 3)], - [(512, 3)], - [(512, 3)] - ] - SELECTED_LAYERS_UPSCALING = [ - True, - True, # Must have same length as vgg_upscale_params - True, - True, - False, - False - ] - CORRECTED_VERSION = None - - -class ResNetModelParams: - PRETRAINED_MODEL_FILE = 'pretrained_models/resnet_v1_50.ckpt' - INTERMEDIATE_CONV = None - UPSCALE_PARAMS = [ - # (Filter size (depth bottleneck's output), number of bottleneck) - (32, 0), - (64, 0), - (128, 0), - (256, 0), - (512, 0) - ] - SELECTED_LAYERS_UPSCALING = [ - # Must have the same length as resnet_upscale_params - True, - True, - True, - True, - True - ] - CORRECT_VERSION = False - - -class UNetModelParams: - PRETRAINED_MODEL_FILE = None - INTERMEDIATE_CONV = None - UPSCALE_PARAMS = None - SELECTED_LAYERS_UPSCALING = None - CORRECT_VERSION = False - - class ModelParams(BaseParams): def __init__(self, **kwargs): self.encoder_name = kwargs.get('encoder_name', 'dh_segment.network.pretrained_models.ResnetV1_50') # type: str From 7e5ccb49ef2f6e7c55915ec5a2fef2500a2f0345 Mon Sep 17 00:00:00 2001 From: soliveir Date: Mon, 22 Oct 2018 15:06:22 +0200 Subject: [PATCH 09/57] mainly docstring formatting --- dh_segment/network/__init__.py | 14 +++--- dh_segment/network/model.py | 20 ++++++-- dh_segment/network/pretrained_models.py | 65 +++++++++++++++---------- dh_segment/utils/__init__.py | 3 -- dh_segment/utils/misc.py | 7 +++ dh_segment/utils/params_config.py | 10 +++- 6 files changed, 79 insertions(+), 40 deletions(-) diff --git a/dh_segment/network/__init__.py b/dh_segment/network/__init__.py index 553184c..e5aa398 100644 --- a/dh_segment/network/__init__.py +++ b/dh_segment/network/__init__.py @@ -1,12 +1,14 @@ _MODEL = [ - 'inference_vgg16', - 'inference_resnet_v1_50', - 'inference_u_net', - 'vgg_16_fn', - 'resnet_v1_50_fn' + 'Encoder', + 'Decoder', + 'SimpleDecoder', ] -__all__ = _MODEL +_PRETRAINED = [ + 'ResnetV1_50', + 'VGG16' +] +__all__ = _MODEL + _PRETRAINED from .model import * from .pretrained_models import * diff --git a/dh_segment/network/model.py b/dh_segment/network/model.py index 5eb874a..8f1512d 100644 --- a/dh_segment/network/model.py +++ b/dh_segment/network/model.py @@ -13,7 +13,7 @@ def __call__(self, images: tf.Tensor) -> List[tf.Tensor]: """ :param images: [NxHxWx3] float32 [0..255] input images - :return: a list of the feature maps in decreasing spatial resolution (first element is most likely the input + :return: a list of the feature maps in decreasing spatial resolution (first element is most likely the input \ image itself, then the output of the first pooling op, etc...) """ pass @@ -21,7 +21,7 @@ def __call__(self, images: tf.Tensor) -> List[tf.Tensor]: def pretrained_information(self) -> Tuple[Optional[str], Union[None, List, Dict]]: """ - :return: The filename of the pretrained checkpoint and the corresponding variables (List of Dict mapping) + :return: The filename of the pretrained checkpoint and the corresponding variables (List of Dict mapping) \ or `None` if no-pretraining is done """ return None, None @@ -32,14 +32,23 @@ class Decoder(ABC): def __call__(self, feature_maps: List[tf.Tensor], num_classes: int) -> tf.Tensor: """ - :param feature_maps: list of feature maps, in decreasing spatial resolution, first one being at the original resolution + :param feature_maps: list of feature maps, in decreasing spatial resolution, first one being at the original \ + resolution :return: [N,H,W,num_classes] float32 tensor of logit scores """ pass class SimpleDecoder(Decoder): - def __init__(self, upsampling_dims: List[int], max_depth: int = None, train_batchnorm=False, weight_decay=0.): + """ + + :param upsampling_dims: + :param max_depth: + :param weight_decay: + :param self.batch_norm_fn: + """ + def __init__(self, upsampling_dims: List[int], max_depth: int = None, train_batchnorm: bool=False, + weight_decay: float=0.): self.upsampling_dims = upsampling_dims self.max_depth = max_depth self.weight_decay = weight_decay @@ -105,6 +114,7 @@ def __call__(self, feature_maps: List[tf.Tensor], num_classes: int): def _get_image_shape_tensor(tensor: tf.Tensor) -> Union[Tuple[int, int], tf.Tensor]: """ Get the image shape of the tensor + :param tensor: Input image tensor [N,H,W,...] :return: a (int, int) tuple if shape is defined, otherwise the corresponding tf.Tensor value """ @@ -116,7 +126,7 @@ def _get_image_shape_tensor(tensor: tf.Tensor) -> Union[Tuple[int, int], tf.Tens return target_shape -def _upsample_concat(pooled_layer: tf.Tensor, previous_layer: tf.Tensor, scope_name='UpsampleConcat'): +def _upsample_concat(pooled_layer: tf.Tensor, previous_layer: tf.Tensor, scope_name: str='UpsampleConcat'): """ :param pooled_layer: [N,H,W,C] coarse layer diff --git a/dh_segment/network/pretrained_models.py b/dh_segment/network/pretrained_models.py index fc55feb..5bc5db0 100644 --- a/dh_segment/network/pretrained_models.py +++ b/dh_segment/network/pretrained_models.py @@ -15,12 +15,22 @@ def mean_substraction(input_tensor, means=_VGG_MEANS): class ResnetV1_50(Encoder): - def __init__(self, train_batchnorm=False, blocks=4, weight_decay=0.0001, - renorm=True, corrected_version=False): + """ResNet-50 implementation + + :param train_batchnorm: Option to use batch norm + :param blocks: number of blocks (resnet blocks) + :param weight_decay: value of weight decay + :param batch_renorm: Option to use batch renorm + :param corrected_version: option to use the original resnet implementation (True) but less efficient than + `slim`'s implementation + :param pretrained_file: path to the file (.ckpt) containing the pretrained weights + """ + def __init__(self, train_batchnorm: bool=False, blocks: int=4, weight_decay: float=0.0001, + batch_renorm: bool=True, corrected_version: bool=False): self.train_batchnorm = train_batchnorm self.blocks = blocks self.weight_decay = weight_decay - self.renorm = renorm + self.batch_renorm = batch_renorm self.corrected_version = corrected_version self.pretrained_file = os.path.join(get_data_folder(), 'resnet_v1_50.ckpt') if not os.path.exists(self.pretrained_file): @@ -43,23 +53,21 @@ def __call__(self, images: tf.Tensor): outputs = [] with slim.arg_scope(nets.resnet_v1.resnet_arg_scope(weight_decay=self.weight_decay, batch_norm_decay=0.999)), \ - slim.arg_scope([layers.batch_norm], renorm_decay=0.95, renorm=self.renorm): + slim.arg_scope([layers.batch_norm], renorm_decay=0.95, renorm=self.batch_renorm): mean_substracted_tensor = mean_substraction(images) assert 0 < self.blocks <= 4 if self.corrected_version: - def corrected_resnet_v1_block(scope, base_depth, num_units, stride): - """Helper function for creating a resnet_v1 bottleneck block. - - Args: - scope: The scope of the block. - base_depth: The depth of the bottleneck layer for each unit. - num_units: The number of units in the block. - stride: The stride of the block, implemented as a stride in the last unit. - All other units have stride=1. - - Returns: - A resnet_v1 bottleneck block. + def corrected_resnet_v1_block(scope: str, base_depth: int, num_units: int, stride: int) -> tf.Tensor: + """ + Helper function for creating a resnet_v1 bottleneck block. + + :param scope: The scope of the block. + :param base_depth: The depth of the bottleneck layer for each unit. + :param num_units: The number of units in the block. + :param stride: The stride of the block, implemented as a stride in the last unit. + All other units have stride=1. + :return: A resnet_v1 bottleneck block. """ return nets.resnet_utils.Block(scope, nets.resnet_v1.bottleneck, [{ 'depth': base_depth * 4, @@ -119,7 +127,13 @@ def corrected_resnet_v1_block(scope, base_depth, num_units, stride): class VGG16(Encoder): - def __init__(self, blocks=5, weight_decay=0.0005): + """VGG-16 implementation + + :param blocks: number of blocks (vgg blocks) + :param weight_decay: weight decay value + :param pretrained_file: path to the file (.ckpt) containing the pretrained weights + """ + def __init__(self, blocks: int=5, weight_decay: float=0.0005): self.blocks = blocks self.weight_decay = weight_decay self.pretrained_file = os.path.join(get_data_folder(), 'vgg_16.ckpt') @@ -140,12 +154,12 @@ def pretrained_information(self): and 'renorm' not in v.name] def __call__(self, images: tf.Tensor): - intermediate_levels = [] + outputs = [] with slim.arg_scope(nets.vgg.vgg_arg_scope(weight_decay=self.weight_decay)): with tf.variable_scope(None, 'vgg_16', [images]) as sc: input_tensor = mean_substraction(images) - intermediate_levels.append(input_tensor) + outputs.append(input_tensor) end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( @@ -153,23 +167,24 @@ def __call__(self, images: tf.Tensor): outputs_collections=end_points_collection): net = layers.repeat( input_tensor, 2, layers.conv2d, 64, [3, 3], scope='conv1') - intermediate_levels.append(net) + outputs.append(net) net = layers.max_pool2d(net, [2, 2], scope='pool1') if self.blocks >= 2: net = layers.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') - intermediate_levels.append(net) + outputs.append(net) net = layers.max_pool2d(net, [2, 2], scope='pool2') if self.blocks >= 3: net = layers.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') - intermediate_levels.append(net) + outputs.append(net) net = layers.max_pool2d(net, [2, 2], scope='pool3') if self.blocks >= 4: net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') - intermediate_levels.append(net) + outputs.append(net) net = layers.max_pool2d(net, [2, 2], scope='pool4') if self.blocks >= 5: net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') - intermediate_levels.append(net) + outputs.append(net) net = layers.max_pool2d(net, [2, 2], scope='pool5') - return intermediate_levels + # TODO : the output of the last max pool is not returned, shouldn't it be ? + return outputs diff --git a/dh_segment/utils/__init__.py b/dh_segment/utils/__init__.py index e227053..389c561 100644 --- a/dh_segment/utils/__init__.py +++ b/dh_segment/utils/__init__.py @@ -1,8 +1,5 @@ _PARAMSCONFIG = [ 'PredictionType', - 'VGG16ModelParams', - 'ResNetModelParams', - 'UNetModelParams', 'ModelParams', 'TrainingParams' ] diff --git a/dh_segment/utils/misc.py b/dh_segment/utils/misc.py index a923313..102a21c 100644 --- a/dh_segment/utils/misc.py +++ b/dh_segment/utils/misc.py @@ -49,6 +49,7 @@ def get_class_from_name(full_class_name: str) -> Any: """ Tries to load the class from its naming, will import the corresponding module. Raises an Error if it does not work. + :param full_class_name: full name of the class, for instance `foo.bar.Baz` :return: the loaded class """ @@ -67,6 +68,12 @@ def get_data_folder() -> str: def download_file(url: str, output_file: str): + """ + + :param url: + :param output_file: + :return: + """ def progress_hook(t): last_b = [0] diff --git a/dh_segment/utils/params_config.py b/dh_segment/utils/params_config.py index d015cf4..083e8de 100644 --- a/dh_segment/utils/params_config.py +++ b/dh_segment/utils/params_config.py @@ -13,7 +13,7 @@ class PredictionType: MULTILABEL = 'MULTILABEL' @classmethod - def parse(cls, prediction_type): + def parse(cls, prediction_type) -> 'PredictionType': if prediction_type == 'CLASSIFICATION': return PredictionType.CLASSIFICATION elif prediction_type == 'REGRESSION': @@ -43,6 +43,14 @@ def check_params(self): class ModelParams(BaseParams): + """ + + :param encoder_name: + :param encoder_params: + :param decoder_name: + :param decoder_params: + :param n_classes: + """ def __init__(self, **kwargs): self.encoder_name = kwargs.get('encoder_name', 'dh_segment.network.pretrained_models.ResnetV1_50') # type: str self.encoder_params = kwargs.get('encoder_params', dict()) # type: dict From ce214c2b1a439e43a149bfd568a655f082f86e3a Mon Sep 17 00:00:00 2001 From: soliveir Date: Mon, 22 Oct 2018 16:35:07 +0200 Subject: [PATCH 10/57] changed :param: by :ivar: --- dh_segment/network/model.py | 8 ++++---- dh_segment/network/pretrained_models.py | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dh_segment/network/model.py b/dh_segment/network/model.py index 8f1512d..6905726 100644 --- a/dh_segment/network/model.py +++ b/dh_segment/network/model.py @@ -42,10 +42,10 @@ def __call__(self, feature_maps: List[tf.Tensor], num_classes: int) -> tf.Tensor class SimpleDecoder(Decoder): """ - :param upsampling_dims: - :param max_depth: - :param weight_decay: - :param self.batch_norm_fn: + :ivar upsampling_dims: + :ivar max_depth: + :ivar weight_decay: + :ivar self.batch_norm_fn: """ def __init__(self, upsampling_dims: List[int], max_depth: int = None, train_batchnorm: bool=False, weight_decay: float=0.): diff --git a/dh_segment/network/pretrained_models.py b/dh_segment/network/pretrained_models.py index 5bc5db0..f6a3e57 100644 --- a/dh_segment/network/pretrained_models.py +++ b/dh_segment/network/pretrained_models.py @@ -17,13 +17,13 @@ def mean_substraction(input_tensor, means=_VGG_MEANS): class ResnetV1_50(Encoder): """ResNet-50 implementation - :param train_batchnorm: Option to use batch norm - :param blocks: number of blocks (resnet blocks) - :param weight_decay: value of weight decay - :param batch_renorm: Option to use batch renorm - :param corrected_version: option to use the original resnet implementation (True) but less efficient than - `slim`'s implementation - :param pretrained_file: path to the file (.ckpt) containing the pretrained weights + :ivar train_batchnorm: Option to use batch norm + :ivar blocks: number of blocks (resnet blocks) + :ivar weight_decay: value of weight decay + :ivar batch_renorm: Option to use batch renorm + :ivar corrected_version: option to use the original resnet implementation (True) but less efficient than \ + `slim`'s implementation + :ivar pretrained_file: path to the file (.ckpt) containing the pretrained weights """ def __init__(self, train_batchnorm: bool=False, blocks: int=4, weight_decay: float=0.0001, batch_renorm: bool=True, corrected_version: bool=False): @@ -129,9 +129,9 @@ def corrected_resnet_v1_block(scope: str, base_depth: int, num_units: int, strid class VGG16(Encoder): """VGG-16 implementation - :param blocks: number of blocks (vgg blocks) - :param weight_decay: weight decay value - :param pretrained_file: path to the file (.ckpt) containing the pretrained weights + :ivar blocks: number of blocks (vgg blocks) + :ivar weight_decay: weight decay value + :ivar pretrained_file: path to the file (.ckpt) containing the pretrained weights """ def __init__(self, blocks: int=5, weight_decay: float=0.0005): self.blocks = blocks From 62ec71d27a7c3ee0897235194b5fbabc53af50fd Mon Sep 17 00:00:00 2001 From: Benoit Seguin Date: Fri, 26 Oct 2018 14:07:41 +0200 Subject: [PATCH 11/57] Updating batchnorm training --- dh_segment/estimator_fn.py | 5 ++-- dh_segment/network/model.py | 40 ++++++++++--------------- dh_segment/network/pretrained_models.py | 6 ++-- 3 files changed, 22 insertions(+), 29 deletions(-) diff --git a/dh_segment/estimator_fn.py b/dh_segment/estimator_fn.py index a4c1f47..863d9fe 100644 --- a/dh_segment/estimator_fn.py +++ b/dh_segment/estimator_fn.py @@ -22,8 +22,9 @@ def model_fn(mode, features, labels, params): decoder_class = model_params.get_decoder() decoder = decoder_class(**model_params.decoder_params) - feature_maps = encoder(input_images) - network_output = decoder(feature_maps, num_classes=model_params.n_classes) + is_training = (mode == tf.estimator.ModeKeys.TRAIN) + feature_maps = encoder(input_images, is_training=is_training) + network_output = decoder(feature_maps, num_classes=model_params.n_classes, is_training=is_training) if mode == tf.estimator.ModeKeys.TRAIN: pretrained_file, pretrained_vars = encoder.pretrained_information() diff --git a/dh_segment/network/model.py b/dh_segment/network/model.py index 6905726..b20de04 100644 --- a/dh_segment/network/model.py +++ b/dh_segment/network/model.py @@ -9,7 +9,7 @@ class Encoder(ABC): @abstractmethod - def __call__(self, images: tf.Tensor) -> List[tf.Tensor]: + def __call__(self, images: tf.Tensor, is_training=False) -> List[tf.Tensor]: """ :param images: [NxHxWx3] float32 [0..255] input images @@ -29,7 +29,7 @@ def pretrained_information(self) -> Tuple[Optional[str], Union[None, List, Dict] class Decoder(ABC): @abstractmethod - def __call__(self, feature_maps: List[tf.Tensor], num_classes: int) -> tf.Tensor: + def __call__(self, feature_maps: List[tf.Tensor], num_classes: int, is_training=False) -> tf.Tensor: """ :param feature_maps: list of feature maps, in decreasing spatial resolution, first one being at the original \ @@ -47,34 +47,26 @@ class SimpleDecoder(Decoder): :ivar weight_decay: :ivar self.batch_norm_fn: """ - def __init__(self, upsampling_dims: List[int], max_depth: int = None, train_batchnorm: bool=False, - weight_decay: float=0.): + def __init__(self, upsampling_dims: List[int], max_depth: int = None, weight_decay: float=0.): self.upsampling_dims = upsampling_dims self.max_depth = max_depth self.weight_decay = weight_decay - if train_batchnorm: - # TODO - renorm = False - if renorm: - renorm_clipping = {'rmax': 100, 'rmin': 0.1, 'dmax': 10} - renorm_momentum = 0.98 - else: - renorm_clipping = None - renorm_momentum = 0.99 - self.batch_norm_fn = lambda x: tf.layers.batch_normalization(x, axis=-1, training=train_batchnorm, - name='batch_norm', - renorm=renorm, - renorm_clipping=renorm_clipping, - renorm_momentum=renorm_momentum) - else: - self.batch_norm_fn = None - - def __call__(self, feature_maps: List[tf.Tensor], num_classes: int): + renorm = True + self.batch_norm_params = { + "renorm": renorm, + "renorm_clipping": {'rmax': 100, 'rmin': 0.1, 'dmax': 10}, + "renorm_momentum": 0.98 + } + + def __call__(self, feature_maps: List[tf.Tensor], num_classes: int, is_training=False): + + batch_norm_fn = lambda x: tf.layers.batch_normalization(x, axis=-1, training=is_training, + name='batch_norm', **self.batch_norm_params) # Upsampling with tf.variable_scope('SimpleDecoder'): with arg_scope([layers.conv2d], - normalizer_fn=self.batch_norm_fn, + normalizer_fn=batch_norm_fn, weights_regularizer=layers.l2_regularizer(self.weight_decay)): assert len(self.upsampling_dims) + 1 == len(feature_maps), \ @@ -89,7 +81,7 @@ def __call__(self, feature_maps: List[tf.Tensor], num_classes: int): num_outputs=self.max_depth, kernel_size=[1, 1], scope="dimreduc_{}".format(i), - normalizer_fn=self.batch_norm_fn, + normalizer_fn=batch_norm_fn, activation_fn=None ) diff --git a/dh_segment/network/pretrained_models.py b/dh_segment/network/pretrained_models.py index f6a3e57..1be97dc 100644 --- a/dh_segment/network/pretrained_models.py +++ b/dh_segment/network/pretrained_models.py @@ -49,7 +49,7 @@ def pretrained_information(self): if 'resnet_v1_50' in v.name and 'renorm' not in v.name] - def __call__(self, images: tf.Tensor): + def __call__(self, images: tf.Tensor, is_training=False): outputs = [] with slim.arg_scope(nets.resnet_v1.resnet_arg_scope(weight_decay=self.weight_decay, batch_norm_decay=0.999)), \ @@ -110,7 +110,7 @@ def corrected_resnet_v1_block(scope: str, base_depth: int, num_units: int, strid net, endpoints = nets.resnet_v1.resnet_v1(mean_substracted_tensor, blocks=blocks_list[:self.blocks], num_classes=None, - is_training=self.train_batchnorm, + is_training=self.train_batchnorm and is_training, global_pool=False, output_stride=None, include_root_block=True, @@ -153,7 +153,7 @@ def pretrained_information(self): if 'vgg_16' in v.name and 'renorm' not in v.name] - def __call__(self, images: tf.Tensor): + def __call__(self, images: tf.Tensor, is_training=False): outputs = [] with slim.arg_scope(nets.vgg.vgg_arg_scope(weight_decay=self.weight_decay)): From cace550beba579b96bd7b4a5a3eef77397d6d272 Mon Sep 17 00:00:00 2001 From: Benoit Seguin Date: Fri, 26 Oct 2018 15:23:40 +0200 Subject: [PATCH 12/57] Added MobileNetV2 --- .../network/pretrained_models/__init__.py | 3 + .../pretrained_models/mobilenet/__init__.py | 0 .../mobilenet/conv_blocks.py | 358 ++++++++++++++ .../pretrained_models/mobilenet/encoder.py | 53 ++ .../pretrained_models/mobilenet/mobilenet.py | 466 ++++++++++++++++++ .../mobilenet/mobilenet_v2.py | 219 ++++++++ .../resnet50.py} | 76 +-- dh_segment/network/pretrained_models/vgg16.py | 77 +++ 8 files changed, 1179 insertions(+), 73 deletions(-) create mode 100644 dh_segment/network/pretrained_models/__init__.py create mode 100644 dh_segment/network/pretrained_models/mobilenet/__init__.py create mode 100644 dh_segment/network/pretrained_models/mobilenet/conv_blocks.py create mode 100644 dh_segment/network/pretrained_models/mobilenet/encoder.py create mode 100644 dh_segment/network/pretrained_models/mobilenet/mobilenet.py create mode 100644 dh_segment/network/pretrained_models/mobilenet/mobilenet_v2.py rename dh_segment/network/{pretrained_models.py => pretrained_models/resnet50.py} (62%) create mode 100644 dh_segment/network/pretrained_models/vgg16.py diff --git a/dh_segment/network/pretrained_models/__init__.py b/dh_segment/network/pretrained_models/__init__.py new file mode 100644 index 0000000..c95406f --- /dev/null +++ b/dh_segment/network/pretrained_models/__init__.py @@ -0,0 +1,3 @@ +from .resnet50 import ResnetV1_50 +from .vgg16 import VGG16 +from .mobilenet.encoder import MobileNetV2 \ No newline at end of file diff --git a/dh_segment/network/pretrained_models/mobilenet/__init__.py b/dh_segment/network/pretrained_models/mobilenet/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dh_segment/network/pretrained_models/mobilenet/conv_blocks.py b/dh_segment/network/pretrained_models/mobilenet/conv_blocks.py new file mode 100644 index 0000000..498ce77 --- /dev/null +++ b/dh_segment/network/pretrained_models/mobilenet/conv_blocks.py @@ -0,0 +1,358 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Convolution blocks for mobilenet.""" +import contextlib +import functools + +import tensorflow as tf + +slim = tf.contrib.slim + + +def _fixed_padding(inputs, kernel_size, rate=1): + """Pads the input along the spatial dimensions independently of input size. + + Pads the input such that if it was used in a convolution with 'VALID' padding, + the output would have the same dimensions as if the unpadded input was used + in a convolution with 'SAME' padding. + + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. + rate: An integer, rate for atrous convolution. + + Returns: + output: A tensor of size [batch, height_out, width_out, channels] with the + input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). + """ + kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1), + kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)] + pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1] + pad_beg = [pad_total[0] // 2, pad_total[1] // 2] + pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]] + padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]], + [pad_beg[1], pad_end[1]], [0, 0]]) + return padded_inputs + + +def _make_divisible(v, divisor, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def _split_divisible(num, num_ways, divisible_by=8): + """Evenly splits num, num_ways so each piece is a multiple of divisible_by.""" + assert num % divisible_by == 0 + assert num / num_ways >= divisible_by + # Note: want to round down, we adjust each split to match the total. + base = num // num_ways // divisible_by * divisible_by + result = [] + accumulated = 0 + for i in range(num_ways): + r = base + while accumulated + r < num * (i + 1) / num_ways: + r += divisible_by + result.append(r) + accumulated += r + assert accumulated == num + return result + + +@contextlib.contextmanager +def _v1_compatible_scope_naming(scope): + if scope is None: # Create uniqified separable blocks. + with tf.variable_scope(None, default_name='separable') as s, \ + tf.name_scope(s.original_name_scope): + yield '' + else: + # We use scope_depthwise, scope_pointwise for compatibility with V1 ckpts. + # which provide numbered scopes. + scope += '_' + yield scope + + +@slim.add_arg_scope +def split_separable_conv2d(input_tensor, + num_outputs, + scope=None, + normalizer_fn=None, + stride=1, + rate=1, + endpoints=None, + use_explicit_padding=False): + """Separable mobilenet V1 style convolution. + + Depthwise convolution, with default non-linearity, + followed by 1x1 depthwise convolution. This is similar to + slim.separable_conv2d, but differs in tha it applies batch + normalization and non-linearity to depthwise. This matches + the basic building of Mobilenet Paper + (https://arxiv.org/abs/1704.04861) + + Args: + input_tensor: input + num_outputs: number of outputs + scope: optional name of the scope. Note if provided it will use + scope_depthwise for deptwhise, and scope_pointwise for pointwise. + normalizer_fn: which normalizer function to use for depthwise/pointwise + stride: stride + rate: output rate (also known as dilation rate) + endpoints: optional, if provided, will export additional tensors to it. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + + Returns: + output tesnor + """ + + with _v1_compatible_scope_naming(scope) as scope: + dw_scope = scope + 'depthwise' + endpoints = endpoints if endpoints is not None else {} + kernel_size = [3, 3] + padding = 'SAME' + if use_explicit_padding: + padding = 'VALID' + input_tensor = _fixed_padding(input_tensor, kernel_size, rate) + net = slim.separable_conv2d( + input_tensor, + None, + kernel_size, + depth_multiplier=1, + stride=stride, + rate=rate, + normalizer_fn=normalizer_fn, + padding=padding, + scope=dw_scope) + + endpoints[dw_scope] = net + + pw_scope = scope + 'pointwise' + net = slim.conv2d( + net, + num_outputs, [1, 1], + stride=1, + normalizer_fn=normalizer_fn, + scope=pw_scope) + endpoints[pw_scope] = net + return net + + +def expand_input_by_factor(n, divisible_by=8): + return lambda num_inputs, **_: _make_divisible(num_inputs * n, divisible_by) + + +@slim.add_arg_scope +def expanded_conv(input_tensor, + num_outputs, + expansion_size=expand_input_by_factor(6), + stride=1, + rate=1, + kernel_size=(3, 3), + residual=True, + normalizer_fn=None, + project_activation_fn=tf.identity, + split_projection=1, + split_expansion=1, + expansion_transform=None, + depthwise_location='expansion', + depthwise_channel_multiplier=1, + endpoints=None, + use_explicit_padding=False, + padding='SAME', + scope=None): + """Depthwise Convolution Block with expansion. + + Builds a composite convolution that has the following structure + expansion (1x1) -> depthwise (kernel_size) -> projection (1x1) + + Args: + input_tensor: input + num_outputs: number of outputs in the final layer. + expansion_size: the size of expansion, could be a constant or a callable. + If latter it will be provided 'num_inputs' as an input. For forward + compatibility it should accept arbitrary keyword arguments. + Default will expand the input by factor of 6. + stride: depthwise stride + rate: depthwise rate + kernel_size: depthwise kernel + residual: whether to include residual connection between input + and output. + normalizer_fn: batchnorm or otherwise + project_activation_fn: activation function for the project layer + split_projection: how many ways to split projection operator + (that is conv expansion->bottleneck) + split_expansion: how many ways to split expansion op + (that is conv bottleneck->expansion) ops will keep depth divisible + by this value. + expansion_transform: Optional function that takes expansion + as a single input and returns output. + depthwise_location: where to put depthwise covnvolutions supported + values None, 'input', 'output', 'expansion' + depthwise_channel_multiplier: depthwise channel multiplier: + each input will replicated (with different filters) + that many times. So if input had c channels, + output will have c x depthwise_channel_multpilier. + endpoints: An optional dictionary into which intermediate endpoints are + placed. The keys "expansion_output", "depthwise_output", + "projection_output" and "expansion_transform" are always populated, even + if the corresponding functions are not invoked. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + padding: Padding type to use if `use_explicit_padding` is not set. + scope: optional scope. + + Returns: + Tensor of depth num_outputs + + Raises: + TypeError: on inval + """ + with tf.variable_scope(scope, default_name='expanded_conv') as s, \ + tf.name_scope(s.original_name_scope): + prev_depth = input_tensor.get_shape().as_list()[3] + if depthwise_location not in [None, 'input', 'output', 'expansion']: + raise TypeError('%r is unknown value for depthwise_location' % + depthwise_location) + if use_explicit_padding: + if padding != 'SAME': + raise TypeError('`use_explicit_padding` should only be used with ' + '"SAME" padding.') + padding = 'VALID' + depthwise_func = functools.partial( + slim.separable_conv2d, + num_outputs=None, + kernel_size=kernel_size, + depth_multiplier=depthwise_channel_multiplier, + stride=stride, + rate=rate, + normalizer_fn=normalizer_fn, + padding=padding, + scope='depthwise') + # b1 -> b2 * r -> b2 + # i -> (o * r) (bottleneck) -> o + input_tensor = tf.identity(input_tensor, 'input') + net = input_tensor + + if depthwise_location == 'input': + if use_explicit_padding: + net = _fixed_padding(net, kernel_size, rate) + net = depthwise_func(net, activation_fn=None) + + if callable(expansion_size): + inner_size = expansion_size(num_inputs=prev_depth) + else: + inner_size = expansion_size + + if inner_size > net.shape[3]: + net = split_conv( + net, + inner_size, + num_ways=split_expansion, + scope='expand', + stride=1, + normalizer_fn=normalizer_fn) + net = tf.identity(net, 'expansion_output') + if endpoints is not None: + endpoints['expansion_output'] = net + + if depthwise_location == 'expansion': + if use_explicit_padding: + net = _fixed_padding(net, kernel_size, rate) + net = depthwise_func(net) + + net = tf.identity(net, name='depthwise_output') + if endpoints is not None: + endpoints['depthwise_output'] = net + if expansion_transform: + net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor) + # Note in contrast with expansion, we always have + # projection to produce the desired output size. + net = split_conv( + net, + num_outputs, + num_ways=split_projection, + stride=1, + scope='project', + normalizer_fn=normalizer_fn, + activation_fn=project_activation_fn) + if endpoints is not None: + endpoints['projection_output'] = net + if depthwise_location == 'output': + if use_explicit_padding: + net = _fixed_padding(net, kernel_size, rate) + net = depthwise_func(net, activation_fn=None) + + if callable(residual): # custom residual + net = residual(input_tensor=input_tensor, output_tensor=net) + elif (residual and + # stride check enforces that we don't add residuals when spatial + # dimensions are None + stride == 1 and + # Depth matches + net.get_shape().as_list()[3] == + input_tensor.get_shape().as_list()[3]): + net += input_tensor + return tf.identity(net, name='output') + + +def split_conv(input_tensor, + num_outputs, + num_ways, + scope, + divisible_by=8, + **kwargs): + """Creates a split convolution. + + Split convolution splits the input and output into + 'num_blocks' blocks of approximately the same size each, + and only connects $i$-th input to $i$ output. + + Args: + input_tensor: input tensor + num_outputs: number of output filters + num_ways: num blocks to split by. + scope: scope for all the operators. + divisible_by: make sure that every part is divisiable by this. + **kwargs: will be passed directly into conv2d operator + Returns: + tensor + """ + b = input_tensor.get_shape().as_list()[3] + + if num_ways == 1 or min(b // num_ways, + num_outputs // num_ways) < divisible_by: + # Don't do any splitting if we end up with less than 8 filters + # on either side. + return slim.conv2d(input_tensor, num_outputs, [1, 1], scope=scope, **kwargs) + + outs = [] + input_splits = _split_divisible(b, num_ways, divisible_by=divisible_by) + output_splits = _split_divisible( + num_outputs, num_ways, divisible_by=divisible_by) + inputs = tf.split(input_tensor, input_splits, axis=3, name='split_' + scope) + base = scope + for i, (input_tensor, out_size) in enumerate(zip(inputs, output_splits)): + scope = base + '_part_%d' % (i,) + n = slim.conv2d(input_tensor, out_size, [1, 1], scope=scope, **kwargs) + n = tf.identity(n, scope + '_output') + outs.append(n) + return tf.concat(outs, 3, name=scope + '_concat') diff --git a/dh_segment/network/pretrained_models/mobilenet/encoder.py b/dh_segment/network/pretrained_models/mobilenet/encoder.py new file mode 100644 index 0000000..4516b98 --- /dev/null +++ b/dh_segment/network/pretrained_models/mobilenet/encoder.py @@ -0,0 +1,53 @@ +from ...model import Encoder +import tensorflow as tf +from .mobilenet_v2 import training_scope, mobilenet_base +from typing import Tuple, Optional, Union, List, Dict +from tensorflow.contrib import slim +import os +from ....utils.misc import get_data_folder, download_file +import tarfile + + +class MobileNetV2(Encoder): + def __init__(self, train_batchnorm: bool=False, weight_decay: float=0.00004, batch_renorm: bool=True): + self.train_batchnorm = train_batchnorm + self.weight_decay = weight_decay + self.batch_renorm = batch_renorm + pretrained_dir = os.path.join(get_data_folder(), 'mobilenet_v2') + self.pretrained_file = os.path.join(pretrained_dir, 'mobilenet_v2_1.0_224.ckpt') + if not os.path.exists(self.pretrained_file+'.index'): + print("Could not find pre-trained file {}, downloading it!".format(self.pretrained_file)) + tar_filename = os.path.join(get_data_folder(), 'resnet_v1_50.tar.gz') + download_file('https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz', tar_filename) + tar = tarfile.open(tar_filename) + tar.extractall(path=pretrained_dir) + tar.close() + os.remove(tar_filename) + assert os.path.exists(self.pretrained_file+'.index') + print('Pre-trained weights downloaded!') + + def __call__(self, images: tf.Tensor, is_training=False) -> List[tf.Tensor]: + outputs = [] + + with slim.arg_scope(training_scope(weight_decay=self.weight_decay, + is_training=is_training and self.train_batchnorm)): + normalized_images = (images / 127.5) - 1.0 + outputs.append(normalized_images) + + desired_endpoints = [ + 'layer_2', + 'layer_4', + 'layer_7', + 'layer_14', + 'layer_18' + ] + + _, endpoints = mobilenet_base(normalized_images) + for d in desired_endpoints: + outputs.append(endpoints[d]) + + return outputs + + def pretrained_information(self) -> Tuple[Optional[str], Union[None, List, Dict]]: + return self.pretrained_file, [v for v in tf.global_variables() + if 'MobilenetV2' in v.name and 'renorm' not in v.name] \ No newline at end of file diff --git a/dh_segment/network/pretrained_models/mobilenet/mobilenet.py b/dh_segment/network/pretrained_models/mobilenet/mobilenet.py new file mode 100644 index 0000000..8c47dd9 --- /dev/null +++ b/dh_segment/network/pretrained_models/mobilenet/mobilenet.py @@ -0,0 +1,466 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Mobilenet Base Class.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import collections +import contextlib +import copy +import os + +import tensorflow as tf + +slim = tf.contrib.slim + + +@slim.add_arg_scope +def apply_activation(x, name=None, activation_fn=None): + return activation_fn(x, name=name) if activation_fn else x + + +def _fixed_padding(inputs, kernel_size, rate=1): + """Pads the input along the spatial dimensions independently of input size. + + Pads the input such that if it was used in a convolution with 'VALID' padding, + the output would have the same dimensions as if the unpadded input was used + in a convolution with 'SAME' padding. + + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. + rate: An integer, rate for atrous convolution. + + Returns: + output: A tensor of size [batch, height_out, width_out, channels] with the + input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). + """ + kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1), + kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)] + pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1] + pad_beg = [pad_total[0] // 2, pad_total[1] // 2] + pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]] + padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]], + [pad_beg[1], pad_end[1]], [0, 0]]) + return padded_inputs + + +def _make_divisible(v, divisor, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +@contextlib.contextmanager +def _set_arg_scope_defaults(defaults): + """Sets arg scope defaults for all items present in defaults. + + Args: + defaults: dictionary/list of pairs, containing a mapping from + function to a dictionary of default args. + + Yields: + context manager where all defaults are set. + """ + if hasattr(defaults, 'items'): + items = list(defaults.items()) + else: + items = defaults + if not items: + yield + else: + func, default_arg = items[0] + with slim.arg_scope(func, **default_arg): + with _set_arg_scope_defaults(items[1:]): + yield + + +@slim.add_arg_scope +def depth_multiplier(output_params, + multiplier, + divisible_by=8, + min_depth=8, + **unused_kwargs): + if 'num_outputs' not in output_params: + return + d = output_params['num_outputs'] + output_params['num_outputs'] = _make_divisible(d * multiplier, divisible_by, + min_depth) + + +_Op = collections.namedtuple('Op', ['op', 'params', 'multiplier_func']) + + +def op(opfunc, **params): + multiplier = params.pop('multiplier_transorm', depth_multiplier) + return _Op(opfunc, params=params, multiplier_func=multiplier) + + +class NoOpScope(object): + """No-op context manager.""" + + def __enter__(self): + return None + + def __exit__(self, exc_type, exc_value, traceback): + return False + + +def safe_arg_scope(funcs, **kwargs): + """Returns `slim.arg_scope` with all None arguments removed. + + Arguments: + funcs: Functions to pass to `arg_scope`. + **kwargs: Arguments to pass to `arg_scope`. + + Returns: + arg_scope or No-op context manager. + + Note: can be useful if None value should be interpreted as "do not overwrite + this parameter value". + """ + filtered_args = {name: value for name, value in kwargs.items() + if value is not None} + if filtered_args: + return slim.arg_scope(funcs, **filtered_args) + else: + return NoOpScope() + + +@slim.add_arg_scope +def mobilenet_base( # pylint: disable=invalid-name + inputs, + conv_defs, + multiplier=1.0, + final_endpoint=None, + output_stride=None, + use_explicit_padding=False, + scope=None, + is_training=False): + """Mobilenet base network. + + Constructs a network from inputs to the given final endpoint. By default + the network is constructed in inference mode. To create network + in training mode use: + + with slim.arg_scope(mobilenet.training_scope()): + logits, endpoints = mobilenet_base(...) + + Args: + inputs: a tensor of shape [batch_size, height, width, channels]. + conv_defs: A list of op(...) layers specifying the net architecture. + multiplier: Float multiplier for the depth (number of channels) + for all convolution ops. The value must be greater than zero. Typical + usage will be to set this value in (0, 1) to reduce the number of + parameters or computation cost of the model. + final_endpoint: The name of last layer, for early termination for + for V1-based networks: last layer is "layer_14", for V2: "layer_20" + output_stride: An integer that specifies the requested ratio of input to + output spatial resolution. If not None, then we invoke atrous convolution + if necessary to prevent the network from reducing the spatial resolution + of the activation maps. Allowed values are 1 or any even number, excluding + zero. Typical values are 8 (accurate fully convolutional mode), 16 + (fast fully convolutional mode), and 32 (classification mode). + + NOTE- output_stride relies on all consequent operators to support dilated + operators via "rate" parameter. This might require wrapping non-conv + operators to operate properly. + + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + scope: optional variable scope. + is_training: How to setup batch_norm and other ops. Note: most of the time + this does not need be set directly. Use mobilenet.training_scope() to set + up training instead. This parameter is here for backward compatibility + only. It is safe to set it to the value matching + training_scope(is_training=...). It is also safe to explicitly set + it to False, even if there is outer training_scope set to to training. + (The network will be built in inference mode). If this is set to None, + no arg_scope is added for slim.batch_norm's is_training parameter. + + Returns: + tensor_out: output tensor. + end_points: a set of activations for external use, for example summaries or + losses. + + Raises: + ValueError: depth_multiplier <= 0, or the target output_stride is not + allowed. + """ + if multiplier <= 0: + raise ValueError('multiplier is not greater than zero.') + + # Set conv defs defaults and overrides. + conv_defs_defaults = conv_defs.get('defaults', {}) + conv_defs_overrides = conv_defs.get('overrides', {}) + if use_explicit_padding: + conv_defs_overrides = copy.deepcopy(conv_defs_overrides) + conv_defs_overrides[ + (slim.conv2d, slim.separable_conv2d)] = {'padding': 'VALID'} + + if output_stride is not None: + if output_stride == 0 or (output_stride > 1 and output_stride % 2): + raise ValueError('Output stride must be None, 1 or a multiple of 2.') + + # a) Set the tensorflow scope + # b) set padding to default: note we might consider removing this + # since it is also set by mobilenet_scope + # c) set all defaults + # d) set all extra overrides. + with _scope_all(scope, default_scope='Mobilenet'), \ + safe_arg_scope([slim.batch_norm], is_training=is_training), \ + _set_arg_scope_defaults(conv_defs_defaults), \ + _set_arg_scope_defaults(conv_defs_overrides): + # The current_stride variable keeps track of the output stride of the + # activations, i.e., the running product of convolution strides up to the + # current network layer. This allows us to invoke atrous convolution + # whenever applying the next convolution would result in the activations + # having output stride larger than the target output_stride. + current_stride = 1 + + # The atrous convolution rate parameter. + rate = 1 + + net = inputs + # Insert default parameters before the base scope which includes + # any custom overrides set in mobilenet. + end_points = {} + scopes = {} + for i, opdef in enumerate(conv_defs['spec']): + params = dict(opdef.params) + opdef.multiplier_func(params, multiplier) + stride = params.get('stride', 1) + if output_stride is not None and current_stride == output_stride: + # If we have reached the target output_stride, then we need to employ + # atrous convolution with stride=1 and multiply the atrous rate by the + # current unit's stride for use in subsequent layers. + layer_stride = 1 + layer_rate = rate + rate *= stride + else: + layer_stride = stride + layer_rate = 1 + current_stride *= stride + # Update params. + params['stride'] = layer_stride + # Only insert rate to params if rate > 1. + if layer_rate > 1: + params['rate'] = layer_rate + # Set padding + if use_explicit_padding: + if 'kernel_size' in params: + net = _fixed_padding(net, params['kernel_size'], layer_rate) + else: + params['use_explicit_padding'] = True + + end_point = 'layer_%d' % (i + 1) + try: + net = opdef.op(net, **params) + except Exception: + print('Failed to create op %i: %r params: %r' % (i, opdef, params)) + raise + end_points[end_point] = net + scope = os.path.dirname(net.name) + scopes[scope] = end_point + if final_endpoint is not None and end_point == final_endpoint: + break + + # Add all tensors that end with 'output' to + # endpoints + for t in net.graph.get_operations(): + scope = os.path.dirname(t.name) + bn = os.path.basename(t.name) + if scope in scopes and t.name.endswith('output'): + end_points[scopes[scope] + '/' + bn] = t.outputs[0] + return net, end_points + + +@contextlib.contextmanager +def _scope_all(scope, default_scope=None): + with tf.variable_scope(scope, default_name=default_scope) as s, \ + tf.name_scope(s.original_name_scope): + yield s + + +@slim.add_arg_scope +def mobilenet(inputs, + num_classes=1001, + prediction_fn=slim.softmax, + reuse=None, + scope='Mobilenet', + base_only=False, + **mobilenet_args): + """Mobilenet model for classification, supports both V1 and V2. + + Note: default mode is inference, use mobilenet.training_scope to create + training network. + + + Args: + inputs: a tensor of shape [batch_size, height, width, channels]. + num_classes: number of predicted classes. If 0 or None, the logits layer + is omitted and the input features to the logits layer (before dropout) + are returned instead. + prediction_fn: a function to get predictions out of logits + (default softmax). + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + base_only: if True will only create the base of the network (no pooling + and no logits). + **mobilenet_args: passed to mobilenet_base verbatim. + - conv_defs: list of conv defs + - multiplier: Float multiplier for the depth (number of channels) + for all convolution ops. The value must be greater than zero. Typical + usage will be to set this value in (0, 1) to reduce the number of + parameters or computation cost of the model. + - output_stride: will ensure that the last layer has at most total stride. + If the architecture calls for more stride than that provided + (e.g. output_stride=16, but the architecture has 5 stride=2 operators), + it will replace output_stride with fractional convolutions using Atrous + Convolutions. + + Returns: + logits: the pre-softmax activations, a tensor of size + [batch_size, num_classes] + end_points: a dictionary from components of the network to the corresponding + activation tensor. + + Raises: + ValueError: Input rank is invalid. + """ + is_training = mobilenet_args.get('is_training', False) + input_shape = inputs.get_shape().as_list() + if len(input_shape) != 4: + raise ValueError('Expected rank 4 input, was: %d' % len(input_shape)) + + with tf.variable_scope(scope, 'Mobilenet', reuse=reuse) as scope: + inputs = tf.identity(inputs, 'input') + net, end_points = mobilenet_base(inputs, scope=scope, **mobilenet_args) + if base_only: + return net, end_points + + net = tf.identity(net, name='embedding') + + with tf.variable_scope('Logits'): + net = global_pool(net) + end_points['global_pool'] = net + if not num_classes: + return net, end_points + net = slim.dropout(net, scope='Dropout', is_training=is_training) + # 1 x 1 x num_classes + # Note: legacy scope name. + logits = slim.conv2d( + net, + num_classes, [1, 1], + activation_fn=None, + normalizer_fn=None, + biases_initializer=tf.zeros_initializer(), + scope='Conv2d_1c_1x1') + + logits = tf.squeeze(logits, [1, 2]) + + logits = tf.identity(logits, name='output') + end_points['Logits'] = logits + if prediction_fn: + end_points['Predictions'] = prediction_fn(logits, 'Predictions') + return logits, end_points + + +def global_pool(input_tensor, pool_op=tf.nn.avg_pool): + """Applies avg pool to produce 1x1 output. + + NOTE: This function is funcitonally equivalenet to reduce_mean, but it has + baked in average pool which has better support across hardware. + + Args: + input_tensor: input tensor + pool_op: pooling op (avg pool is default) + Returns: + a tensor batch_size x 1 x 1 x depth. + """ + shape = input_tensor.get_shape().as_list() + if shape[1] is None or shape[2] is None: + kernel_size = tf.convert_to_tensor( + [1, tf.shape(input_tensor)[1], + tf.shape(input_tensor)[2], 1]) + else: + kernel_size = [1, shape[1], shape[2], 1] + output = pool_op( + input_tensor, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID') + # Recover output shape, for unknown shape. + output.set_shape([None, 1, 1, None]) + return output + + +def training_scope(is_training=True, + weight_decay=0.00004, + stddev=0.09, + dropout_keep_prob=0.8, + bn_decay=0.997): + """Defines Mobilenet training scope. + + Usage: + with tf.contrib.slim.arg_scope(mobilenet.training_scope()): + logits, endpoints = mobilenet_v2.mobilenet(input_tensor) + + # the network created will be trainble with dropout/batch norm + # initialized appropriately. + Args: + is_training: if set to False this will ensure that all customizations are + set to non-training mode. This might be helpful for code that is reused + across both training/evaluation, but most of the time training_scope with + value False is not needed. If this is set to None, the parameters is not + added to the batch_norm arg_scope. + + weight_decay: The weight decay to use for regularizing the model. + stddev: Standard deviation for initialization, if negative uses xavier. + dropout_keep_prob: dropout keep probability (not set if equals to None). + bn_decay: decay for the batch norm moving averages (not set if equals to + None). + + Returns: + An argument scope to use via arg_scope. + """ + # Note: do not introduce parameters that would change the inference + # model here (for example whether to use bias), modify conv_def instead. + batch_norm_params = { + 'decay': bn_decay, + 'is_training': is_training + } + if stddev < 0: + weight_intitializer = slim.initializers.xavier_initializer() + else: + weight_intitializer = tf.truncated_normal_initializer(stddev=stddev) + + # Set weight_decay for weights in Conv and FC layers. + with slim.arg_scope( + [slim.conv2d, slim.fully_connected, slim.separable_conv2d], + weights_initializer=weight_intitializer, + normalizer_fn=slim.batch_norm), \ + slim.arg_scope([mobilenet_base, mobilenet], is_training=is_training), \ + safe_arg_scope([slim.batch_norm], **batch_norm_params), \ + safe_arg_scope([slim.dropout], is_training=is_training, + keep_prob=dropout_keep_prob), \ + slim.arg_scope([slim.conv2d], \ + weights_regularizer=slim.l2_regularizer(weight_decay)), \ + slim.arg_scope([slim.separable_conv2d], weights_regularizer=None) as s: + return s diff --git a/dh_segment/network/pretrained_models/mobilenet/mobilenet_v2.py b/dh_segment/network/pretrained_models/mobilenet/mobilenet_v2.py new file mode 100644 index 0000000..f2df180 --- /dev/null +++ b/dh_segment/network/pretrained_models/mobilenet/mobilenet_v2.py @@ -0,0 +1,219 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Implementation of Mobilenet V2. + +Architecture: https://arxiv.org/abs/1801.04381 + +The base model gives 72.2% accuracy on ImageNet, with 300MMadds, +3.4 M parameters. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import functools + +import tensorflow as tf + +from . import conv_blocks as ops +from . import mobilenet as lib + +slim = tf.contrib.slim +op = lib.op + +expand_input = ops.expand_input_by_factor + +# pyformat: disable +# Architecture: https://arxiv.org/abs/1801.04381 +V2_DEF = dict( + defaults={ + # Note: these parameters of batch norm affect the architecture + # that's why they are here and not in training_scope. + (slim.batch_norm,): {'center': True, 'scale': True}, + (slim.conv2d, slim.fully_connected, slim.separable_conv2d): { + 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6 + }, + (ops.expanded_conv,): { + 'expansion_size': expand_input(6), + 'split_expansion': 1, + 'normalizer_fn': slim.batch_norm, + 'residual': True + }, + (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'} + }, + spec=[ + op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]), + op(ops.expanded_conv, + expansion_size=expand_input(1, divisible_by=1), + num_outputs=16), + op(ops.expanded_conv, stride=2, num_outputs=24), + op(ops.expanded_conv, stride=1, num_outputs=24), + op(ops.expanded_conv, stride=2, num_outputs=32), + op(ops.expanded_conv, stride=1, num_outputs=32), + op(ops.expanded_conv, stride=1, num_outputs=32), + op(ops.expanded_conv, stride=2, num_outputs=64), + op(ops.expanded_conv, stride=1, num_outputs=64), + op(ops.expanded_conv, stride=1, num_outputs=64), + op(ops.expanded_conv, stride=1, num_outputs=64), + op(ops.expanded_conv, stride=1, num_outputs=96), + op(ops.expanded_conv, stride=1, num_outputs=96), + op(ops.expanded_conv, stride=1, num_outputs=96), + op(ops.expanded_conv, stride=2, num_outputs=160), + op(ops.expanded_conv, stride=1, num_outputs=160), + op(ops.expanded_conv, stride=1, num_outputs=160), + op(ops.expanded_conv, stride=1, num_outputs=320), + op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280) + ], +) + + +# pyformat: enable + + +@slim.add_arg_scope +def mobilenet(input_tensor, + num_classes=1001, + depth_multiplier=1.0, + scope='MobilenetV2', + conv_defs=None, + finegrain_classification_mode=False, + min_depth=None, + divisible_by=None, + activation_fn=None, + **kwargs): + """Creates mobilenet V2 network. + + Inference mode is created by default. To create training use training_scope + below. + + with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): + logits, endpoints = mobilenet_v2.mobilenet(input_tensor) + + Args: + input_tensor: The input tensor + num_classes: number of classes + depth_multiplier: The multiplier applied to scale number of + channels in each layer. Note: this is called depth multiplier in the + paper but the name is kept for consistency with slim's model builder. + scope: Scope of the operator + conv_defs: Allows to override default conv def. + finegrain_classification_mode: When set to True, the model + will keep the last layer large even for small multipliers. Following + https://arxiv.org/abs/1801.04381 + suggests that it improves performance for ImageNet-type of problems. + *Note* ignored if final_endpoint makes the builder exit earlier. + min_depth: If provided, will ensure that all layers will have that + many channels after application of depth multiplier. + divisible_by: If provided will ensure that all layers # channels + will be divisible by this number. + activation_fn: Activation function to use, defaults to tf.nn.relu6 if not + specified. + **kwargs: passed directly to mobilenet.mobilenet: + prediction_fn- what prediction function to use. + reuse-: whether to reuse variables (if reuse set to true, scope + must be given). + Returns: + logits/endpoints pair + + Raises: + ValueError: On invalid arguments + """ + if conv_defs is None: + conv_defs = V2_DEF + if 'multiplier' in kwargs: + raise ValueError('mobilenetv2 doesn\'t support generic ' + 'multiplier parameter use "depth_multiplier" instead.') + if finegrain_classification_mode: + conv_defs = copy.deepcopy(conv_defs) + if depth_multiplier < 1: + conv_defs['spec'][-1].params['num_outputs'] /= depth_multiplier + if activation_fn: + conv_defs = copy.deepcopy(conv_defs) + defaults = conv_defs['defaults'] + conv_defaults = ( + defaults[(slim.conv2d, slim.fully_connected, slim.separable_conv2d)]) + conv_defaults['activation_fn'] = activation_fn + + depth_args = {} + # NB: do not set depth_args unless they are provided to avoid overriding + # whatever default depth_multiplier might have thanks to arg_scope. + if min_depth is not None: + depth_args['min_depth'] = min_depth + if divisible_by is not None: + depth_args['divisible_by'] = divisible_by + + with slim.arg_scope((lib.depth_multiplier,), **depth_args): + return lib.mobilenet( + input_tensor, + num_classes=num_classes, + conv_defs=conv_defs, + scope=scope, + multiplier=depth_multiplier, + **kwargs) + + +mobilenet.default_image_size = 224 + + +def wrapped_partial(func, *args, **kwargs): + partial_func = functools.partial(func, *args, **kwargs) + functools.update_wrapper(partial_func, func) + return partial_func + + +# Wrappers for mobilenet v2 with depth-multipliers. Be noticed that +# 'finegrain_classification_mode' is set to True, which means the embedding +# layer will not be shrinked when given a depth-multiplier < 1.0. +mobilenet_v2_140 = wrapped_partial(mobilenet, depth_multiplier=1.4) +mobilenet_v2_050 = wrapped_partial(mobilenet, depth_multiplier=0.50, + finegrain_classification_mode=True) +mobilenet_v2_035 = wrapped_partial(mobilenet, depth_multiplier=0.35, + finegrain_classification_mode=True) + + +@slim.add_arg_scope +def mobilenet_base(input_tensor, depth_multiplier=1.0, **kwargs): + """Creates base of the mobilenet (no pooling and no logits) .""" + return mobilenet(input_tensor, + depth_multiplier=depth_multiplier, + base_only=True, **kwargs) + + +def training_scope(**kwargs): + """Defines MobilenetV2 training scope. + + Usage: + with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): + logits, endpoints = mobilenet_v2.mobilenet(input_tensor) + + with slim. + + Args: + **kwargs: Passed to mobilenet.training_scope. The following parameters + are supported: + weight_decay- The weight decay to use for regularizing the model. + stddev- Standard deviation for initialization, if negative uses xavier. + dropout_keep_prob- dropout keep probability + bn_decay- decay for the batch norm moving averages. + + Returns: + An `arg_scope` to use for the mobilenet v2 model. + """ + return lib.training_scope(**kwargs) + + +__all__ = ['training_scope', 'mobilenet_base', 'mobilenet', 'V2_DEF'] diff --git a/dh_segment/network/pretrained_models.py b/dh_segment/network/pretrained_models/resnet50.py similarity index 62% rename from dh_segment/network/pretrained_models.py rename to dh_segment/network/pretrained_models/resnet50.py index 1be97dc..f4de0bb 100644 --- a/dh_segment/network/pretrained_models.py +++ b/dh_segment/network/pretrained_models/resnet50.py @@ -1,17 +1,11 @@ from tensorflow.contrib import slim, layers import tensorflow as tf from tensorflow.contrib.slim import nets -import numpy as np -from .model import Encoder +from ..model import Encoder import os import tarfile -from ..utils.misc import get_data_folder, download_file - -_VGG_MEANS = [123.68, 116.78, 103.94] - - -def mean_substraction(input_tensor, means=_VGG_MEANS): - return tf.subtract(input_tensor, np.array(means)[None, None, None, :], name='MeanSubstraction') +from ...utils.misc import get_data_folder, download_file +from .vgg16 import mean_substraction class ResnetV1_50(Encoder): @@ -124,67 +118,3 @@ def corrected_resnet_v1_block(scope: str, base_depth: int, num_units: int, strid outputs.append(endpoints[d]) return outputs - - -class VGG16(Encoder): - """VGG-16 implementation - - :ivar blocks: number of blocks (vgg blocks) - :ivar weight_decay: weight decay value - :ivar pretrained_file: path to the file (.ckpt) containing the pretrained weights - """ - def __init__(self, blocks: int=5, weight_decay: float=0.0005): - self.blocks = blocks - self.weight_decay = weight_decay - self.pretrained_file = os.path.join(get_data_folder(), 'vgg_16.ckpt') - if not os.path.exists(self.pretrained_file): - print("Could not find pre-trained file {}, downloading it!".format(self.pretrained_file)) - tar_filename = os.path.join(get_data_folder(), 'vgg_16.tar.gz') - download_file('http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz', tar_filename) - tar = tarfile.open(tar_filename) - tar.extractall(path=get_data_folder()) - tar.close() - os.remove(tar_filename) - assert os.path.exists(self.pretrained_file) - print('Pre-trained weights downloaded!') - - def pretrained_information(self): - return self.pretrained_file, [v for v in tf.global_variables() - if 'vgg_16' in v.name - and 'renorm' not in v.name] - - def __call__(self, images: tf.Tensor, is_training=False): - outputs = [] - - with slim.arg_scope(nets.vgg.vgg_arg_scope(weight_decay=self.weight_decay)): - with tf.variable_scope(None, 'vgg_16', [images]) as sc: - input_tensor = mean_substraction(images) - outputs.append(input_tensor) - end_points_collection = sc.original_name_scope + '_end_points' - # Collect outputs for conv2d, fully_connected and max_pool2d. - with slim.arg_scope( - [layers.conv2d, layers.fully_connected, layers.max_pool2d], - outputs_collections=end_points_collection): - net = layers.repeat( - input_tensor, 2, layers.conv2d, 64, [3, 3], scope='conv1') - outputs.append(net) - net = layers.max_pool2d(net, [2, 2], scope='pool1') - if self.blocks >= 2: - net = layers.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') - outputs.append(net) - net = layers.max_pool2d(net, [2, 2], scope='pool2') - if self.blocks >= 3: - net = layers.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') - outputs.append(net) - net = layers.max_pool2d(net, [2, 2], scope='pool3') - if self.blocks >= 4: - net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') - outputs.append(net) - net = layers.max_pool2d(net, [2, 2], scope='pool4') - if self.blocks >= 5: - net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') - outputs.append(net) - net = layers.max_pool2d(net, [2, 2], scope='pool5') - - # TODO : the output of the last max pool is not returned, shouldn't it be ? - return outputs diff --git a/dh_segment/network/pretrained_models/vgg16.py b/dh_segment/network/pretrained_models/vgg16.py new file mode 100644 index 0000000..30d5954 --- /dev/null +++ b/dh_segment/network/pretrained_models/vgg16.py @@ -0,0 +1,77 @@ +from tensorflow.contrib import slim, layers +import tensorflow as tf +from tensorflow.contrib.slim import nets +import numpy as np +from ..model import Encoder +import os +import tarfile +from ...utils.misc import get_data_folder, download_file + +_VGG_MEANS = [123.68, 116.78, 103.94] + + +def mean_substraction(input_tensor, means=_VGG_MEANS): + return tf.subtract(input_tensor, np.array(means)[None, None, None, :], name='MeanSubstraction') + + +class VGG16(Encoder): + """VGG-16 implementation + + :ivar blocks: number of blocks (vgg blocks) + :ivar weight_decay: weight decay value + :ivar pretrained_file: path to the file (.ckpt) containing the pretrained weights + """ + def __init__(self, blocks: int=5, weight_decay: float=0.0005): + self.blocks = blocks + self.weight_decay = weight_decay + self.pretrained_file = os.path.join(get_data_folder(), 'vgg_16.ckpt') + if not os.path.exists(self.pretrained_file): + print("Could not find pre-trained file {}, downloading it!".format(self.pretrained_file)) + tar_filename = os.path.join(get_data_folder(), 'vgg_16.tar.gz') + download_file('http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz', tar_filename) + tar = tarfile.open(tar_filename) + tar.extractall(path=get_data_folder()) + tar.close() + os.remove(tar_filename) + assert os.path.exists(self.pretrained_file) + print('Pre-trained weights downloaded!') + + def pretrained_information(self): + return self.pretrained_file, [v for v in tf.global_variables() + if 'vgg_16' in v.name + and 'renorm' not in v.name] + + def __call__(self, images: tf.Tensor, is_training=False): + outputs = [] + + with slim.arg_scope(nets.vgg.vgg_arg_scope(weight_decay=self.weight_decay)): + with tf.variable_scope(None, 'vgg_16', [images]) as sc: + input_tensor = mean_substraction(images) + outputs.append(input_tensor) + end_points_collection = sc.original_name_scope + '_end_points' + # Collect outputs for conv2d, fully_connected and max_pool2d. + with slim.arg_scope( + [layers.conv2d, layers.fully_connected, layers.max_pool2d], + outputs_collections=end_points_collection): + net = layers.repeat( + input_tensor, 2, layers.conv2d, 64, [3, 3], scope='conv1') + net = layers.max_pool2d(net, [2, 2], scope='pool1') + outputs.append(net) + if self.blocks >= 2: + net = layers.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') + net = layers.max_pool2d(net, [2, 2], scope='pool2') + outputs.append(net) + if self.blocks >= 3: + net = layers.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') + net = layers.max_pool2d(net, [2, 2], scope='pool3') + outputs.append(net) + if self.blocks >= 4: + net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') + net = layers.max_pool2d(net, [2, 2], scope='pool4') + outputs.append(net) + if self.blocks >= 5: + net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') + net = layers.max_pool2d(net, [2, 2], scope='pool5') + outputs.append(net) + + return outputs From ea11126487745feeef81c92f03d8cc4e2448820a Mon Sep 17 00:00:00 2001 From: Benoit Seguin Date: Mon, 29 Oct 2018 18:07:23 +0100 Subject: [PATCH 13/57] Documentation of exported model --- dh_segment/inference/loader.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/dh_segment/inference/loader.py b/dh_segment/inference/loader.py index be64bc7..537ffe6 100644 --- a/dh_segment/inference/loader.py +++ b/dh_segment/inference/loader.py @@ -9,6 +9,16 @@ class LoadedModel: + """ + Loads an exported dhSegment model + + :param model_base_dir: the model directory i.e. containing `saved_model.{pb|pbtxt}`. If not, it is assumed to \ + be a TF exporter directory, and the latest export directory will be automatically selected. + :param predict_mode: defines the input/output format of the prediction output (see `.predict()`) + :param num_parallel_predictions: limits the number of conccurent calls of `predict` to avoid Out-Of-Memory \ + issues if predicting on GPU + """ + def __init__(self, model_base_dir, predict_mode='filename', num_parallel_predictions=2): if os.path.exists(os.path.join(model_base_dir, 'saved_model.pbtxt')) or \ os.path.exists(os.path.join(model_base_dir, 'saved_model.pb')): @@ -52,6 +62,28 @@ def __init__(self, model_base_dir, predict_mode='filename', num_parallel_predict self.sema = Semaphore(num_parallel_predictions) def predict(self, input_tensor, prediction_key=None): + """ + Performs the prediction from the loaded model according to the prediction mode. + Prediction modes: ++-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ +| `prediction_mode` | `input_tensor` | Output prediction dictionnary | Comment | ++=============================+===============================================+======================================+===================================================================================================+ +| `filename` | Single filename string | `labels`, `probs`, `original_shape` | Loads the image, resizes it, and predicts | ++-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ +| `filename_original_shape` | Single filename string | `labels`, `probs` | Loads the image, resizes it, predicts and scale the output to the original resolution of the file | ++-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ +| `image` | Single input image [1,H,W,3] float32 (0..255) | `labels`, `probs`, `original_shape` | Resizes the image, and predicts | ++-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ +| `image_original_shape` | Single input image [1,H,W,3] float32 (0..255) | `labels`, `probs` | Resizes the image, predicts, and scale the output to the original resolution of the input | ++-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ +| `image_resized` | Single input image [1,H,W,3] float32 (0..255) | `labels`, `probs` | Predicts from the image input directly | ++-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ + + :param input_tensor: a single input whose format should match the prediction mode + :param prediction_key: if not `None`, will returns the value of the corresponding key of the output dictionnary \ + instead of the full dictionnary + :return: the prediction output + """ with self.sema: if prediction_key: desired_output = self._output_dict[prediction_key] From 82a5f220385429ccfdc0f54267f8ee83ffabc35f Mon Sep 17 00:00:00 2001 From: Raphael Barman Date: Tue, 30 Oct 2018 15:43:26 +0100 Subject: [PATCH 14/57] Fixed refactoring --- dh_segment/utils/misc.py | 6 ++++++ train.py | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/dh_segment/utils/misc.py b/dh_segment/utils/misc.py index 5a1b77a..e387190 100644 --- a/dh_segment/utils/misc.py +++ b/dh_segment/utils/misc.py @@ -5,6 +5,7 @@ import json import pickle from hashlib import sha1 +from random import shuffle def parse_json(filename): @@ -29,3 +30,8 @@ def dump_pickle(filename, obj): def hash_dict(params): return sha1(json.dumps(params, sort_keys=True).encode()).hexdigest() + +def shuffled(l: list) -> list: + ll = l.copy() + shuffle(ll) + return ll diff --git a/train.py b/train.py index 9cf0b44..c04c2de 100644 --- a/train.py +++ b/train.py @@ -4,7 +4,8 @@ from logging import WARNING # import DEBUG, INFO, ERROR for more/less verbosity tf.logging.set_verbosity(WARNING) -from dh_segment import estimator_fn, input, utils +from dh_segment import estimator_fn, utils +from dh_segment.io import input import json from glob import glob import numpy as np From 9889c7dd58333538980158afa844439e338ee8e0 Mon Sep 17 00:00:00 2001 From: soliveir Date: Thu, 1 Nov 2018 10:52:14 +0100 Subject: [PATCH 15/57] updated demo --- demo.py | 4 +- demo/interactive_demo.ipynb | 347 ++++++++++++++++++++++++++++++++++++ 2 files changed, 349 insertions(+), 2 deletions(-) create mode 100644 demo/interactive_demo.ipynb diff --git a/demo.py b/demo.py index a383823..d5df073 100644 --- a/demo.py +++ b/demo.py @@ -10,7 +10,7 @@ from tqdm import tqdm from dh_segment.io import PAGE -from dh_segment.network import LoadedModel +from dh_segment.inference import LoadedModel from dh_segment.post_processing import boxes_detection, binarization # To output results in PAGE XML format (http://www.primaresearch.org/schema/PAGE/gts/pagecontent/2013-07-15/) @@ -96,7 +96,7 @@ def format_quad_to_string(quad): # Create page region and XML file page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :])) - page_xml = PAGE.Page(filename, image_width=original_shape[1], image_height=original_shape[0], + page_xml = PAGE.Page(image_filename=filename, image_width=original_shape[1], image_height=original_shape[0], page_border=page_border) xml_filename = os.path.join(output_pagexml_dir, '{}.xml'.format(basename)) page_xml.write_to_file(xml_filename, creator_name='PageExtractor') diff --git a/demo/interactive_demo.ipynb b/demo/interactive_demo.ipynb new file mode 100644 index 0000000..a7c6df5 --- /dev/null +++ b/demo/interactive_demo.ipynb @@ -0,0 +1,347 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Interactive demo to load a trained model for page extraction and apply it to a randomly selected file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1. Get the annotated sample dataset, which already contains the folders images and labels. Unzip it into `demo/pages_sample`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! wget https://github.com/dhlab-epfl/dhSegment/releases/download/untagged-b55f9aa4fff5efd4b1b8/pages_sample.zip\n", + "! unzip pages_sample.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2. Download the provided model (download and unzip it in `demo/model`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! wget https://github.com/dhlab-epfl/dhSegment/releases/download/v0.2/model.zip\n", + "! unzip model.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 3. Run the code step by step" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import cv2\n", + "from glob import glob\n", + "import numpy as np\n", + "import random\n", + "import tensorflow as tf\n", + "from imageio import imread, imsave" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dh_segment.io import PAGE\n", + "from dh_segment.inference import LoadedModel\n", + "from dh_segment.post_processing import boxes_detection, binarization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def page_make_binary_mask(probs: np.ndarray, threshold: float=-1) -> np.ndarray:\n", + " \"\"\"\n", + " Computes the binary mask of the detected Page from the probabilities outputed by network\n", + " :param probs: array with values in range [0, 1]\n", + " :param threshold: threshold between [0 and 1], if negative Otsu's adaptive threshold will be used\n", + " :return: binary mask\n", + " \"\"\"\n", + "\n", + " mask = binarization.thresholding(probs, threshold)\n", + " mask = binarization.cleaning_binary(mask, kernel_size=5)\n", + " return mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define input and output directories / files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_dir = 'page_model/export'\n", + "if not os.path.exists(model_dir):\n", + " model_dir = 'model/'\n", + "assert(os.path.exists(model_dir))\n", + "\n", + "input_files = glob(os.path.join('pages_sample', 'images/*'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "output_dir = './processed_images'\n", + "os.makedirs(output_dir, exist_ok=True)\n", + "# PAGE XML format output\n", + "output_pagexml_dir = os.path.join(output_dir, 'page_xml')\n", + "os.makedirs(output_pagexml_dir, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Start a tensorflow session" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "session = tf.InteractiveSession()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Select a random image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "file_to_process = random.sample(input_files, 1)[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m = LoadedModel(model_dir, predict_mode='filename')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Predict each pixel's label" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# For each image, predict each pixel's label\n", + "prediction_outputs = m.predict(file_to_process)\n", + "probs = prediction_outputs['probs'][0]\n", + "original_shape = prediction_outputs['original_shape']\n", + "\n", + "probs = probs[:, :, 1] # Take only class '1' (class 0 is the background, class 1 is the page)\n", + "probs = probs / np.max(probs) # Normalize to be in [0, 1]\n", + "\n", + "# Binarize the predictions\n", + "page_bin = page_make_binary_mask(probs)\n", + "\n", + "# Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)\n", + "bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False),\n", + " tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show the probability map and binarized mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,10))\n", + "plt.subplot(1,2,1)\n", + "plt.imshow(probs, cmap='gray')\n", + "plt.axis('off')\n", + "plt.title('Probability map')\n", + "plt.subplot(1,2,2)\n", + "plt.imshow(page_bin, cmap='gray')\n", + "plt.axis('off')\n", + "plt.title('Binary mask')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Find quadrilateral enclosing the page" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pred_page_coords = boxes_detection.find_boxes(bin_upscaled.astype(np.uint8, copy=False),\n", + " mode='min_rectangle', n_max_boxes=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Draw page box on original image and export it. Add also box coordinates to the txt file\n", + "original_img = imread(file_to_process, pilmode='RGB')\n", + "if pred_page_coords is not None:\n", + " cv2.polylines(original_img, [pred_page_coords[:, None, :]], True, (0, 0, 255), thickness=5)\n", + "else:\n", + " print('No box found in {}'.format(filename))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,10))\n", + "plt.imshow(original_img)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Export image and create page region and XML file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basename = os.path.basename(file_to_process).split('.')[0]\n", + "imsave(os.path.join(output_dir, '{}_boxes.jpg'.format(basename)), original_img)\n", + "\n", + "page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :]))\n", + "page_xml = PAGE.Page(image_filename=file_to_process, image_width=original_shape[1], image_height=original_shape[0], page_border=page_border)\n", + "xml_filename = os.path.join(output_pagexml_dir, '{}.xml'.format(basename))\n", + "page_xml.write_to_file(xml_filename, creator_name='PageExtractor')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 4. Have a look at the results in ``demo/processed_images``" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:dhsegment]", + "language": "python", + "name": "conda-env-dhsegment-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 4e00913d16644e8b9b429ea2d137b1d1cea61210 Mon Sep 17 00:00:00 2001 From: soliveir Date: Fri, 2 Nov 2018 08:21:58 +0100 Subject: [PATCH 16/57] pip install --- doc/start/install.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/doc/start/install.rst b/doc/start/install.rst index 734a3de..a3bc1c4 100644 --- a/doc/start/install.rst +++ b/doc/start/install.rst @@ -1,6 +1,14 @@ Installation ------------ +Using ``pip`` +^^^^^^^^^^^^^ + +Pip install using git repository : :: + + pip install git+https://github.com/dhlab-epfl/dhSegment + + Using Anaconda ^^^^^^^^^^^^^^ @@ -19,6 +27,3 @@ Using Anaconda python setup.py install - -Using ``pip`` -^^^^^^^^^^^^^ \ No newline at end of file From 4f177b188c14e365e26db532d332fb4a8e90376d Mon Sep 17 00:00:00 2001 From: soliveir Date: Wed, 14 Nov 2018 16:07:42 +0100 Subject: [PATCH 17/57] typo in attribute --- dh_segment/io/PAGE.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py index d9d5a11..a271d8c 100644 --- a/dh_segment/io/PAGE.py +++ b/dh_segment/io/PAGE.py @@ -403,7 +403,7 @@ def from_dict(cls, dictionary: dict) -> 'TableRegion': return cls(**super().from_dict(dictionary), rows=dictionary.get('rows'), columns=dictionary.get('columns'), - embeded_text=dictionary.get('embeded_text')) + embedded_text=dictionary.get('embedded_text')) class SeparatorRegion(Region): From 932fa3c296bbdb35dede01f9c915ed12d8dd4a87 Mon Sep 17 00:00:00 2001 From: soliveir Date: Wed, 14 Nov 2018 16:58:13 +0100 Subject: [PATCH 18/57] corrected non exported segment_ids field --- dh_segment/io/PAGE.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py index a271d8c..54bdaa7 100644 --- a/dh_segment/io/PAGE.py +++ b/dh_segment/io/PAGE.py @@ -547,7 +547,8 @@ def __init__(self, id: str = None, coords: List[Point] = None, segment_ids: List @classmethod def from_dict(cls, dictionary: dict) -> 'GroupSegment': - return cls(**super().from_dict(dictionary)) + return cls(**super().from_dict(dictionary), + segment_ids=dictionary.get('segment_ids')) class Page(BaseElement): From c5a19652f62e10f14ad89217caba27c9eb5c5362 Mon Sep 17 00:00:00 2001 From: soliveir Date: Thu, 15 Nov 2018 15:36:01 +0100 Subject: [PATCH 19/57] sorting of TextLines in a TextRegion --- dh_segment/io/PAGE.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py index 54bdaa7..f6c17ad 100644 --- a/dh_segment/io/PAGE.py +++ b/dh_segment/io/PAGE.py @@ -329,6 +329,16 @@ def __init__(self, id: str=None, coords: List[Point]=None, text_lines: List[Text self.text_equiv = text_equiv if text_equiv is not None else '' self.text_lines = text_lines if text_lines is not None else [] + def sort_text_lines(self, top_to_bottom: bool=True) -> None: + """ + Sorts ``TextLine``s from top to bottom according to their mean y coordinate (centroid) + :param top_to_bottom: order lines from top to bottom of image, default=True + """ + if top_to_bottom: + self.text_lines.sort(key=lambda line: np.mean([c.y for c in line.coords])) + else: + raise NotImplementedError + @classmethod def from_xml(cls, e: ET.Element) -> 'TextRegion': cls.check_tag(e.tag) From 346e2fb000e60d042c18eafc2946a201419aa8ba Mon Sep 17 00:00:00 2001 From: soliveir Date: Tue, 20 Nov 2018 09:57:59 +0100 Subject: [PATCH 20/57] force type to be int (for JSON export compatibility) --- dh_segment/io/PAGE.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py index f6c17ad..ee9a169 100644 --- a/dh_segment/io/PAGE.py +++ b/dh_segment/io/PAGE.py @@ -19,6 +19,8 @@ def _try_to_int(d: Optional[Union[str, int]])-> Optional[int]: if isinstance(d, str): return int(d) + elif not isinstance(d, int): + int(d) else: return d @@ -584,7 +586,7 @@ class Page(BaseElement): def __init__(self, **kwargs): self.image_filename = kwargs.get('image_filename') - self.image_width = _try_to_int(kwargs.get('image_width')) + self.image_width = _try_to_int(kwargs.get('image_width')) # Needs to be int type (not np.int32/64) self.image_height = _try_to_int(kwargs.get('image_height')) self.text_regions = kwargs.get('text_regions', []) self.graphic_regions = kwargs.get('graphic_regions', []) From 7c25b56dc6b10d5ed1587ec6952e6d3f9697fe7e Mon Sep 17 00:00:00 2001 From: soliveir Date: Tue, 20 Nov 2018 10:34:43 +0100 Subject: [PATCH 21/57] specific to int32 and int64 type --- dh_segment/io/PAGE.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py index ee9a169..2d62c98 100644 --- a/dh_segment/io/PAGE.py +++ b/dh_segment/io/PAGE.py @@ -17,10 +17,8 @@ def _try_to_int(d: Optional[Union[str, int]])-> Optional[int]: - if isinstance(d, str): + if isinstance(d, (str, np.int32, np.int64)): return int(d) - elif not isinstance(d, int): - int(d) else: return d From 3eefba85791e3879326a8f4b3af8cad99d36ab71 Mon Sep 17 00:00:00 2001 From: soliveir Date: Tue, 4 Dec 2018 14:39:33 +0100 Subject: [PATCH 22/57] input csv file --- doc/start/training.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/doc/start/training.rst b/doc/start/training.rst index 1e21039..0638ce1 100644 --- a/doc/start/training.rst +++ b/doc/start/training.rst @@ -4,6 +4,8 @@ Training .. note:: A good nvidia GPU (6GB RAM at least) is most likely necessary to train your own models. We assume CUDA and cuDNN are installed. +**Input data** + You need to have your training data in a folder containing ``images`` folder and ``labels`` folder. The pairs (images, labels) need to have the same name (it is not mandatory to have the same extension file, however we recommend having the label images as ``.png`` files). @@ -14,6 +16,16 @@ a specific color. .. note:: It is now also possible to use a `csv` file containing the pairs ``original_image_filename``, ``label_image_filename`` as input data. +To input a ``csv`` file instead of the two folders ``images`` and ``labels``, +the content should be formatted in the following way: :: + + mypath/myfolder/original_image_filename1,mypath/myfolder/label_image_filename1 + mypath/myfolder/original_image_filename2,mypath/myfolder/label_image_filename2 + + + +**The class.txt file** + The file containing the classes has the format shown below, where each row corresponds to one class (including 'negative' or 'background' class) and each row has 3 values for the 3 RGB values. Of course each class needs to have a different code. :: @@ -25,6 +37,8 @@ Of course each class needs to have a different code. :: ... +**Config file with ``sacred``** + `sacred`_ package is used to deal with experiments and trainings. Have a look at the documentation to use it properly. In order to train a model, you should run ``python train.py with `` From 455a8e916f5d74407b4638c2a4d63059cfd1e31c Mon Sep 17 00:00:00 2001 From: maudehrmann Date: Tue, 11 Dec 2018 12:24:41 +0100 Subject: [PATCH 23/57] via annotation processing --- dh_segment/utils/via.py | 352 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 352 insertions(+) create mode 100644 dh_segment/utils/via.py diff --git a/dh_segment/utils/via.py b/dh_segment/utils/via.py new file mode 100644 index 0000000..bf4bda5 --- /dev/null +++ b/dh_segment/utils/via.py @@ -0,0 +1,352 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" +Script with CLI to process annotation data produced with VGG Image Annotation (VIA) tool: +- scale down original images +- parse VIA annotation (json) +- create images with masks +(cf. http://www.robots.ox.ac.uk/~vgg/software/via/; done on VIA 2.0.0) + + +Usage: + create_masks.py --task= --collection= --config-file= + +Options: + --collection document collection to work with + --task= task to do: 'original' to downscale original images or 'masks' to create masks. + --config-file= configuration file + +""" + + +import docopt +import json +import sys +import os +from tqdm import tqdm +import numpy as np +from skimage import transform +from collections import namedtuple +from imageio import imsave, imread +import logging +import requests +from requests.auth import HTTPBasicAuth + +from dask.diagnostics import ProgressBar +import dask.bag as db + + +__author__ = "maudehrmann" + +iiif_password = os.environ["IIIF_PWD"] + +logger = logging.getLogger(__name__) + +WorkingItem = namedtuple( # TODO: + "WorkingItem", [ + 'collection', + 'image_name', + 'original_x', + 'original_y', + 'reduced_x', + 'reduced_y', + 'iiif', + 'annotations' + ] +) + + +def init_logger(logger, log_level, log_file): + """Initialise the logger.""" + logger.setLevel(log_level) + + formatter = logging.Formatter( + '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' + ) + + if log_file is not None: + fh = logging.FileHandler(filename=log_file, mode='w') + fh.setFormatter(formatter) + logger.addHandler(fh) + + ch = logging.StreamHandler(sys.stdout) + ch.setFormatter(formatter) + logger.addHandler(ch) + + logger.info("Logger successfully initialised") + + return logger + + +def get_annotations(annotations_dict, iiif_url): + """ + From VIA json file, get annotations relative to the given `iiif_url`. + :param annotations_dict: VIA annotation output (originally json) + :param iiif_url: the file to look for + :return: dict + """ + k = iiif_url + "-1" + if k in annotations_dict['_via_img_metadata']: + myannotation = annotations_dict['_via_img_metadata'][k] + if iiif_url == myannotation['filename']: + return myannotation['regions'] + else: + return None + + +def compute_reduced_dimensions(x, y): + """ + Compute new dimensions with height set to 2000. + :param x: height + :param y: width + :return: tuple + """ + ratio = y / x + target_h = 2000 + target_w = int(target_h*ratio) + return target_h, target_w + + +def collect_working_items(image_url_file, annotation_file, collection): + """ + Given VIA annotation input, collect all info on `WorkingItem` object. + :param image_url_file: file listing IIIF URLs files + :param annotation_file: VIA json file, output of manual annotation + :param collection: target collection to consider + :return: list of `WorkingItem` + """ + logger.info(f"Collecting working items for {collection}") + working_items = [] + session = requests.Session() + + # load manual annotation json data + with open(annotation_file, 'r') as a: + annotations = json.load(a) + + # iterate over image IIIF URLs and build working items + with open(image_url_file) as current_file: + lines = current_file.readlines() + for line in tqdm(lines, desc='URL2WorkingItem'): + x = None + y = None + target_h = None + target_w = None + + # line is e.g. 'https://myserver.ch/iiif_project/image-name/full/full/0/default.jpg' + basename = "https://myserver.ch/iiif_project/" # todo: update, or even pass as param + iiif = line.strip("\n") + + # get image-name + image_name = line.split(basename)[1].split("/full/full/0/default.jpg")[0] + + # get image dimensions + iiif_json = iiif.replace("default.jpg", "info.json") + resp_image = session.get(iiif, auth=('epfl-team', iiif_password)) # need to request image first + resp_json = session.get(iiif_json, auth=('epfl-team', iiif_password)) + if resp_json.status_code == requests.codes.ok: + x = resp_json.json()['height'] + y = resp_json.json()['width'] + target_h, target_w = compute_reduced_dimensions(x, y) + else: + resp_json.raise_for_status() + + regions = get_annotations(annotations, iiif) + + wk_item = WorkingItem( + collection, + image_name, + x, + y, + target_h, + target_w, + iiif, + regions + ) + working_items.append(wk_item) + + logger.info(f"Collected {len(working_items)} items.") + return working_items + + +def scale_down_original(working_item, img_out_dir): + """ + Copy and reduce original image files + :param img_out_dir: where to put the downscaled images. + :param working_items: dict of `WorkingItems` + :return: None + """ + image_set_dir = os.path.join(img_out_dir, working_item.collection, "images") + if not os.path.exists(image_set_dir): + try: + os.makedirs(image_set_dir) + except OSError as e: + if e.errno != os.errno.EEXIST: + raise + pass + + outfile = os.path.join(image_set_dir, working_item.image_name + "_ds.png") + if not os.path.isfile(outfile): + img = getimage_from_iiif(working_item.iiif, 'epfl-team', iiif_password) + img_resized = transform.resize( + img, + [working_item.reduced_x, working_item.reduced_y], + anti_aliasing=False, + preserve_range=True + ) + imsave(outfile, img_resized.astype(np.uint8)) + + +def getimage_from_iiif(url, user, pwd): + img = requests.get(url, auth=(user, pwd)) + return imread(img.content) + + +def write_mask(mask, masks_dir, collection, image_name, label): + """ Save a mask with filename containing 'label' """ + outdir = os.path.join(masks_dir, collection, image_name) + if not os.path.exists(outdir): + os.makedirs(outdir) + label = label.strip(' \n').replace(" ", "_").lower() if label is not None else 'nolabel' + outfile = os.path.join(outdir, image_name + "-mask-" + label + ".png") + #if not os.path.isfile(outfile): + imsave(outfile, mask.astype(np.uint8)) + + +def get_labels(annotation_file): + """ + Get labels from annotation tool (VIA) settings + :param annotation_file: manual annotation json file + :return: dict (k=label, v=RGB code) + """ + with open(annotation_file, 'r') as a: + annotations = json.load(a) + + label_list = list(annotations['_via_attributes']['region']['Label']['options']) + + # todo: keep a list of colors and assign randomly to label (to avoid hard coding labels/color correspondance) + # todo: or take it from a config file + label_color = dict() + for label in label_list: + if label == "MYLABEL1": + label_color[label] = (255, 255, 255) # white + elif label == "MYLABEL2": + label_color[label] = (255, 255, 0) # yellow + # etc. + return label_color + + +def create_masks(masks_dir, working_items, annotation_file, collection): + """ + For each annotation, create a corresponding binary mask and resize it (h = 2000). + Several annotations of the same class on the same image produce one image with several masks. + :param masks_dir: where to output the masks + :param working_items: infos to work with + :param annotation_file: + :return: None + """ + logger.info(f"Creating masks in {masks_dir}...") + + annotation_summary = dict() + + for wi in tqdm(working_items, desc="workingItem2mask"): + labels = [] + label_list = get_labels(annotation_file) + # the image has no annotation, writing a black mask: + if not wi.annotations: + mask = np.zeros([wi.original_x, wi.original_y], np.uint8) + mask_resized = transform.resize(mask, [wi.reduced_x, wi.reduced_y], anti_aliasing=False, + preserve_range=True, order=0) + write_mask(mask_resized, masks_dir, collection, wi.image_name, None) + labels.append("nolabel") + # check all possible labels for the image and create mask: + else: + for label in label_list.keys(): + # get annotation corresponding to current label + selected_regions = list(filter(lambda r: r['region_attributes']['Label'] == label, wi.annotations)) + if selected_regions: + # create a 0 matrix (black background) + mask = np.zeros([wi.original_x, wi.original_y], np.uint8) + # add one or several mask for current label + # nb: if 2 labels are on the same page, they belongs to the same mask + for sr in selected_regions: + x = sr['shape_attributes']['x'] + y = sr['shape_attributes']['y'] + w = sr['shape_attributes']['width'] + h = sr['shape_attributes']['height'] + # project region(s) on the mask (binary b/w) + mask[y:y + h, x:x + w] = 255 + + # resize + mask_resized = transform.resize(mask, [wi.reduced_x, wi.reduced_y], anti_aliasing=False, + preserve_range=True, order=0) + # write + write_mask(mask_resized, masks_dir, collection, wi.image_name, label) + # add to existing labels + labels.append(label.strip(' \n').replace(" ", "_").lower()) + + # write summary: list of existing labels per image + annotation_summary[wi.image_name] = labels + outfile = os.path.join(masks_dir, collection, collection + "-classes.txt") + fh = open(outfile, 'w') + for a in annotation_summary: + fh.write(a + "\t" + str(annotation_summary[a]) + "\n") + fh.close() + + logger.info("Done.") + return annotation_summary + + +def main(args): + + # logger + global logger + init_logger(logger, logging.INFO, log_file=None) + + # read config + config_file = args["--config-file"] + task = args["--task"] + collection = args["--collection"] + + if config_file and os.path.isfile(config_file): + logger.info(f"Found config file: {os.path.realpath(config_file)}") + with open(config_file, 'r') as f: + config = json.load(f) + else: + logging.info("Provide a config file") + + annotation_file = config.get("annotation_file") # manual annotation json file + image_url_file = config.get("image_url_file") # url image list + experiments_dir = config.get("experiments_dir") # output expe + masks_dir = config.get("masks_dir") # output annotation_objects + img_out_dir = config.get("img_out_dir") # re-scaled images + + logger.info(f"\nGot the following paths:\n" + f"annotation_file: {annotation_file}\n" + f"image_url_file: {image_url_file}\n" + f"experiments_dir: {experiments_dir}\n" + f"masks_dir: {masks_dir}\n" + f"img_out_dir: {img_out_dir}\n" + ) + + # to test working items loading + if task == "test-collect": + collect_working_items(image_url_file, annotation_file, collection) + + # scale down and write original images + elif task == "original": + working_items = collect_working_items(image_url_file, annotation_file, collection) + wi_bag = db.from_sequence(working_items, partition_size=100) + wi_bag2 = wi_bag.map(scale_down_original, img_out_dir=img_out_dir) + with ProgressBar(): + wi_bag2.compute() + + # create masks + elif task == "masks": + working_items = collect_working_items(image_url_file, annotation_file, collection) + create_masks(masks_dir, working_items, annotation_file, collection) + + +if __name__ == "__main__": + arguments = docopt.docopt(__doc__) + main(arguments) \ No newline at end of file From 811af9c37cd501038e41f79056428b8036616ee9 Mon Sep 17 00:00:00 2001 From: maudehrmann Date: Tue, 11 Dec 2018 12:27:54 +0100 Subject: [PATCH 24/57] via annotation processing - typo --- dh_segment/utils/via.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dh_segment/utils/via.py b/dh_segment/utils/via.py index bf4bda5..a8f8e2e 100644 --- a/dh_segment/utils/via.py +++ b/dh_segment/utils/via.py @@ -10,7 +10,7 @@ Usage: - create_masks.py --task= --collection= --config-file= + via.py --task= --collection= --config-file= Options: --collection document collection to work with From 48efe8797a947777dce25775690cd14f1a18cbff Mon Sep 17 00:00:00 2001 From: soliveir Date: Tue, 11 Dec 2018 16:15:43 +0100 Subject: [PATCH 25/57] type correction --- dh_segment/post_processing/binarization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dh_segment/post_processing/binarization.py b/dh_segment/post_processing/binarization.py index 6f4df98..0345eb5 100644 --- a/dh_segment/post_processing/binarization.py +++ b/dh_segment/post_processing/binarization.py @@ -38,7 +38,7 @@ def cleaning_binary(mask: np.ndarray, kernel_size: int=5) -> np.ndarray: ksize_close = (kernel_size, kernel_size) mask = cv2.morphologyEx((mask.astype(np.uint8, copy=False) * 255), cv2.MORPH_OPEN, kernel=np.ones(ksize_open)) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel=np.ones(ksize_close)) - return mask / 255 + return np.uint8(mask / 255) def hysteresis_thresholding(probs: np.array, low_threshold: float, high_threshold: float, From f736aaa7fb22c65e35ef86e94a911fc946d35984 Mon Sep 17 00:00:00 2001 From: soliveir Date: Wed, 12 Dec 2018 10:31:15 +0100 Subject: [PATCH 26/57] added doc --- dh_segment/io/__init__.py | 17 ++++++++ dh_segment/{utils => io}/via.py | 70 ++++++++++++++++++++------------- doc/reference/io.rst | 5 +++ 3 files changed, 65 insertions(+), 27 deletions(-) rename dh_segment/{utils => io}/via.py (85%) diff --git a/dh_segment/io/__init__.py b/dh_segment/io/__init__.py index ca0fabb..e326568 100644 --- a/dh_segment/io/__init__.py +++ b/dh_segment/io/__init__.py @@ -63,6 +63,21 @@ PAGE.json_serialize ---- + +VGG Image Annotator helpers +--------------------------- + +.. autosummary:: + via.get_annotations + via.get_labels + via.collect_working_items + via.create_masks + via.compute_reduced_dimensions + via.scale_down_original + via.getimage_from_iiif + +---- + """ @@ -103,3 +118,5 @@ from .input import * from .input_utils import * from . import PAGE +from . import via + diff --git a/dh_segment/utils/via.py b/dh_segment/io/via.py similarity index 85% rename from dh_segment/utils/via.py rename to dh_segment/io/via.py index a8f8e2e..8a195d0 100644 --- a/dh_segment/utils/via.py +++ b/dh_segment/io/via.py @@ -31,7 +31,7 @@ from imageio import imsave, imread import logging import requests -from requests.auth import HTTPBasicAuth +from typing import List, Tuple from dask.diagnostics import ProgressBar import dask.bag as db @@ -79,9 +79,10 @@ def init_logger(logger, log_level, log_file): return logger -def get_annotations(annotations_dict, iiif_url): +def get_annotations(annotations_dict: dict, iiif_url: str) -> dict: """ From VIA json file, get annotations relative to the given `iiif_url`. + :param annotations_dict: VIA annotation output (originally json) :param iiif_url: the file to look for :return: dict @@ -95,28 +96,30 @@ def get_annotations(annotations_dict, iiif_url): return None -def compute_reduced_dimensions(x, y): +def compute_reduced_dimensions(x: int, y: int, target_h: int=2000) -> Tuple[int, int]: """ - Compute new dimensions with height set to 2000. + Compute new dimensions with height set to `target_h`. + :param x: height :param y: width + :param target_h: target height :return: tuple """ ratio = y / x - target_h = 2000 target_w = int(target_h*ratio) return target_h, target_w -def collect_working_items(image_url_file, annotation_file, collection): +def collect_working_items(image_url_file: List[str], annotation_file: str, collection: str) -> List[WorkingItem]: """ Given VIA annotation input, collect all info on `WorkingItem` object. + :param image_url_file: file listing IIIF URLs files :param annotation_file: VIA json file, output of manual annotation :param collection: target collection to consider :return: list of `WorkingItem` """ - logger.info(f"Collecting working items for {collection}") + logger.info("Collecting working items for {}".format(collection)) working_items = [] session = requests.Session() @@ -165,15 +168,16 @@ def collect_working_items(image_url_file, annotation_file, collection): ) working_items.append(wk_item) - logger.info(f"Collected {len(working_items)} items.") + logger.info("Collected {} items.".format(len(working_items))) return working_items -def scale_down_original(working_item, img_out_dir): +def scale_down_original(working_item, img_out_dir: str) -> None: """ - Copy and reduce original image files - :param img_out_dir: where to put the downscaled images. - :param working_items: dict of `WorkingItems` + Copy and reduce original image files. + + :param img_out_dir: where to put the downscaled images + :param working_item: dict of `WorkingItems` :return: None """ image_set_dir = os.path.join(img_out_dir, working_item.collection, "images") @@ -202,20 +206,30 @@ def getimage_from_iiif(url, user, pwd): return imread(img.content) -def write_mask(mask, masks_dir, collection, image_name, label): - """ Save a mask with filename containing 'label' """ +def write_mask(mask: np.ndarray, masks_dir: str, collection: str, image_name: str, label: str) -> None: + """ + Save a mask with filename containing 'label'. + + :param mask: + :param masks_dir: + :param collection: + :param image_name: + :param label: + :return: + """ outdir = os.path.join(masks_dir, collection, image_name) if not os.path.exists(outdir): os.makedirs(outdir) label = label.strip(' \n').replace(" ", "_").lower() if label is not None else 'nolabel' outfile = os.path.join(outdir, image_name + "-mask-" + label + ".png") - #if not os.path.isfile(outfile): + # if not os.path.isfile(outfile): imsave(outfile, mask.astype(np.uint8)) -def get_labels(annotation_file): +def get_labels(annotation_file: str) -> dict: """ - Get labels from annotation tool (VIA) settings + Get labels from annotation tool (VIA) settings. + :param annotation_file: manual annotation json file :return: dict (k=label, v=RGB code) """ @@ -236,16 +250,18 @@ def get_labels(annotation_file): return label_color -def create_masks(masks_dir, working_items, annotation_file, collection): +def create_masks(masks_dir: str, working_items: List[WorkingItem], annotation_file: str, collection: str) -> None: """ For each annotation, create a corresponding binary mask and resize it (h = 2000). Several annotations of the same class on the same image produce one image with several masks. + :param masks_dir: where to output the masks :param working_items: infos to work with :param annotation_file: + :param collection: :return: None """ - logger.info(f"Creating masks in {masks_dir}...") + logger.info("Creating masks in {}...".format(masks_dir)) annotation_summary = dict() @@ -309,7 +325,7 @@ def main(args): collection = args["--collection"] if config_file and os.path.isfile(config_file): - logger.info(f"Found config file: {os.path.realpath(config_file)}") + logger.info("Found config file: {}".format(os.path.realpath(config_file))) with open(config_file, 'r') as f: config = json.load(f) else: @@ -321,12 +337,12 @@ def main(args): masks_dir = config.get("masks_dir") # output annotation_objects img_out_dir = config.get("img_out_dir") # re-scaled images - logger.info(f"\nGot the following paths:\n" - f"annotation_file: {annotation_file}\n" - f"image_url_file: {image_url_file}\n" - f"experiments_dir: {experiments_dir}\n" - f"masks_dir: {masks_dir}\n" - f"img_out_dir: {img_out_dir}\n" + logger.info("\nGot the following paths:\n" + "annotation_file: {}\n" + "image_url_file: {}\n" + "experiments_dir: {}\n" + "masks_dir: {}\n" + "img_out_dir: {}\n".format(annotation_file, image_url_file, experiments_dir, masks_dir, img_out_dir) ) # to test working items loading @@ -349,4 +365,4 @@ def main(args): if __name__ == "__main__": arguments = docopt.docopt(__doc__) - main(arguments) \ No newline at end of file + main(arguments) diff --git a/doc/reference/io.rst b/doc/reference/io.rst index 48756f1..f3159a8 100644 --- a/doc/reference/io.rst +++ b/doc/reference/io.rst @@ -76,3 +76,8 @@ Input / Output .. automodule:: dh_segment.io.PAGE :members: :undoc-members: + +.. automodule:: dh_segment.io.via + :members: + :undoc-members: + :exclude-members: main, init_logger \ No newline at end of file From 7f65ad4935596c0c03770391485fe404bf1af48e Mon Sep 17 00:00:00 2001 From: soliveir Date: Thu, 17 Jan 2019 15:33:58 +0100 Subject: [PATCH 27/57] updated doc --- dh_segment/utils/labels.py | 18 +++++++++++++++++- doc/start/demo.rst | 3 +++ doc/start/install.rst | 2 +- doc/start/training.rst | 35 +++++++++++++++++++++++++++++++++-- 4 files changed, 54 insertions(+), 4 deletions(-) diff --git a/dh_segment/utils/labels.py b/dh_segment/utils/labels.py index 2f35ae6..4bb4277 100644 --- a/dh_segment/utils/labels.py +++ b/dh_segment/utils/labels.py @@ -4,6 +4,7 @@ import tensorflow as tf import numpy as np import os +from typing import Tuple def label_image_to_class(label_image: tf.Tensor, classes_file: str) -> tf.Tensor: @@ -29,6 +30,13 @@ def class_to_label_image(class_label: tf.Tensor, classes_file: str) -> tf.Tensor def multilabel_image_to_class(label_image: tf.Tensor, classes_file: str) -> tf.Tensor: + """ + Combines image annotations with classes info of the txt file to create the input label for the training. + + :param label_image: annotated image [H,W,Ch] or [B,H,W,Ch] (Ch = color channels) + :param classes_file: the filename of the txt file containing the class info + :return: [H,W,Cl] or [B,H,W,Cl] (Cl = number of classes) + """ classes_color_values, colors_labels = get_classes_color_from_file_multilabel(classes_file) # Convert label_image [H,W,3] to the classes [H,W,C],int32 according to the classes [C,3] with tf.name_scope('LabelAssign'): @@ -71,7 +79,15 @@ def get_n_classes_from_file(classes_file: str) -> int: return get_classes_color_from_file(classes_file).shape[0] -def get_classes_color_from_file_multilabel(classes_file: str) -> np.ndarray: +def get_classes_color_from_file_multilabel(classes_file: str) -> Tuple[np.ndarray, np.array]: + """ + Get classes and code labels from txt file. + This function deals with the case of elements with multiple labels. + + :param classes_file: file containing the classes (usually named *classes.txt*) + :return: for each class the RGB color (array size [N, 3]); and the label's code (array size [N, C]), + with N the number of combinations and C the number of classes + """ if not os.path.exists(classes_file): raise FileNotFoundError(classes_file) result = np.loadtxt(classes_file).astype(np.float32) diff --git a/doc/start/demo.rst b/doc/start/demo.rst index d8b4d74..67d4a93 100644 --- a/doc/start/demo.rst +++ b/doc/start/demo.rst @@ -10,6 +10,9 @@ In order to limit memory usage, the images in the dataset we provide have been d **How to** +0. If you have not yet done so, clone the repository : :: + + git clone https://github.com/dhlab-epfl/dhSegment.git 1. Get the annotated dataset `here`_, which already contains the folders ``images`` and ``labels`` for training, validation and testing set. Unzip it into ``model/pages``. :: diff --git a/doc/start/install.rst b/doc/start/install.rst index a3bc1c4..61e5b8d 100644 --- a/doc/start/install.rst +++ b/doc/start/install.rst @@ -12,7 +12,7 @@ Pip install using git repository : :: Using Anaconda ^^^^^^^^^^^^^^ -- Install Anaconda or Miniconda +- Install Anaconda or Miniconda (`installation procedure `_) - Create a virtual environment with all the packages ``conda env create -f environment.yml`` diff --git a/doc/start/training.rst b/doc/start/training.rst index 0638ce1..f033aec 100644 --- a/doc/start/training.rst +++ b/doc/start/training.rst @@ -30,7 +30,7 @@ The file containing the classes has the format shown below, where each row corre (including 'negative' or 'background' class) and each row has 3 values for the 3 RGB values. Of course each class needs to have a different code. :: - class.txt + classes.txt 0 0 0 0 255 0 @@ -43,4 +43,35 @@ Of course each class needs to have a different code. :: In order to train a model, you should run ``python train.py with `` -.. _sacred: https://sacred.readthedocs.io/en/latest/quickstart.html \ No newline at end of file +.. _sacred: https://sacred.readthedocs.io/en/latest/quickstart.html + + +Multilabel classification training +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In case you want to be able to assign multiple labels to elements, the ``classes.txt`` file must be changed. +Besides the color code, you need to add an *attribution* code to each color. The attribution code has length `n_classes` +and indicates which classes are assigned to the color. + +Take for example 3 classes {A, B, C} and the following possible labelling combinations: + +- A (color code ``(0 255 0)``) with attribution code ``1 0 0`` +- B (color code ``(255 0 0)``) with attribution code ``0 1 0`` +- C (color code ``(0 0 255)``) with attribution code ``0 0 1`` +- AB (color code ``(128 128 128)``) with attribution code ``1 1 0`` +- BC (color code ``(0 255 255)``) with attribution code ``0 1 1`` + +The attributions code has value ``1`` when the label is assigned and ``0`` when it's not. +(The attribution code ``1 0 1`` would mean that the color annotates elements that belong to classes A and C) + +In our example the ``classes.txt`` file would then look like : :: + + + classes.txt + + 0 0 0 0 0 0 + 0 255 0 1 0 0 + 255 0 0 0 1 0 + 0 0 255 0 0 1 + 128 128 128 1 1 0 + 0 255 255 0 1 1 From 4509bc55447c5fbc45126a36866fb389b8118c65 Mon Sep 17 00:00:00 2001 From: soliveir Date: Fri, 18 Jan 2019 09:45:45 +0100 Subject: [PATCH 28/57] updated installation doc --- doc/start/demo.rst | 1 + doc/start/install.rst | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/doc/start/demo.rst b/doc/start/demo.rst index 67d4a93..e342bb4 100644 --- a/doc/start/demo.rst +++ b/doc/start/demo.rst @@ -10,6 +10,7 @@ In order to limit memory usage, the images in the dataset we provide have been d **How to** + 0. If you have not yet done so, clone the repository : :: git clone https://github.com/dhlab-epfl/dhSegment.git diff --git a/doc/start/install.rst b/doc/start/install.rst index 61e5b8d..e11ffa2 100644 --- a/doc/start/install.rst +++ b/doc/start/install.rst @@ -4,26 +4,34 @@ Installation Using ``pip`` ^^^^^^^^^^^^^ -Pip install using git repository : :: +1. Clone the repository using ``git clone https://github.com/dhlab-epfl/dhSegment.git`` - pip install git+https://github.com/dhlab-epfl/dhSegment +2. Create and activate a virtualenv :: + virtualenv myvirtualenvs/dh_segment + source myvirtualenvs/dh_segment/bin/activate + +3. Install the dependencies using ``pip`` (this will look for the ``setup.py`` file) :: + + pip install git+https://github.com/dhlab-epfl/dhSegment Using Anaconda ^^^^^^^^^^^^^^ -- Install Anaconda or Miniconda (`installation procedure `_) +1. Install Anaconda or Miniconda (`installation procedure `_) + +2. Clone the repository: ``git clone https://github.com/dhlab-epfl/dhSegment.git`` -- Create a virtual environment with all the packages ``conda env create -f environment.yml`` +3. Create a virtual environment with all the packages: ``conda env create -f environment.yml`` -- Then activate the environment with ``source activate dh_segment`` +4. Then activate the environment with ``source activate dh_segment`` -- It might be possible that the following needs to be added to your ``~/.bashrc`` :: +5. It might be possible that the following needs to be added to your ``~/.bashrc`` :: export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" export CUDA_HOME=/usr/local/cuda -- To be able to import the package (i.e ``import dh_segment``) in your code, you have to run : :: +6. To be able to import the package (i.e ``import dh_segment``) in your code, you have to run: :: python setup.py install From e61079ff9325d4c22ff810c73bc97c4de87cf828 Mon Sep 17 00:00:00 2001 From: soliveir Date: Fri, 18 Jan 2019 09:46:16 +0100 Subject: [PATCH 29/57] packages versions --- environment.yml | 1 + setup.py | 28 +++++++++++++++------------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/environment.yml b/environment.yml index 05f572f..bf22905 100644 --- a/environment.yml +++ b/environment.yml @@ -4,6 +4,7 @@ channels: dependencies: - imageio=2.3.0 - opencv=3.4.1 + - numpy=1.14.5 - pandas=0.23.0 - pillow=5.1.0 - python=3.6 diff --git a/setup.py b/setup.py index aca9532..d6da753 100644 --- a/setup.py +++ b/setup.py @@ -12,22 +12,24 @@ 'Source Code': 'https://github.com/dhlab-epfl/dhSegment' }, install_requires=[ - 'tensorflow', - 'numpy', - 'imageio', - 'pandas', - 'scipy', - 'shapely', - 'scikit-learn', - 'opencv-python', - 'tqdm', + 'tensorflow-gpu==1.11', + 'numpy==1.14.5', + 'imageio==2.3.0', + 'pandas==0.23.0', + 'scipy==1.1.0', + 'shapely==1.6.4', + 'scikit-learn==0.19.1', + 'scikit-image==0.13.1', + 'opencv-python==3.4.1.15', + 'tqdm==4.23.3', + 'sacred==0.7.3' ], extras_require={ 'doc': [ - 'sphinx', - 'sphinx-autodoc-typehints', - 'sphinx-rtd-theme', - 'sphinxcontrib-bibtex', + 'sphinx==1.8.1', + 'sphinx-autodoc-typehints==1.3.0', + 'sphinx-rtd-theme==0.4.1', + 'sphinxcontrib-bibtex==0.4.0', 'sphinxcontrib-websupport' ], }, From db46c3514698ae28a6489543ee9764c156a25357 Mon Sep 17 00:00:00 2001 From: soliveir Date: Mon, 21 Jan 2019 16:10:24 +0100 Subject: [PATCH 30/57] detected contour should have at least 3 points --- dh_segment/post_processing/polygon_detection.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dh_segment/post_processing/polygon_detection.py b/dh_segment/post_processing/polygon_detection.py index 0614612..a970bec 100644 --- a/dh_segment/post_processing/polygon_detection.py +++ b/dh_segment/post_processing/polygon_detection.py @@ -25,6 +25,8 @@ def find_polygonal_regions(image_mask: np.ndarray, min_area: float=0.1, n_max_po found_polygons = list() for c in contours: + if len(c) < 3: # A polygon cannot have less than 3 points + continue polygon = geometry.Polygon([point[0] for point in c]) # Check that polygon has area greater than minimal area if polygon.area >= min_area*np.prod(image_mask.shape[:2]): From 7c53e27f654c9dbdf3fe96df5019f0f11d5b60de Mon Sep 17 00:00:00 2001 From: soliveir Date: Thu, 24 Jan 2019 15:57:12 +0100 Subject: [PATCH 31/57] LatestExporter if no eval data is provided --- train.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index c04c2de..69bae23 100644 --- a/train.py +++ b/train.py @@ -94,7 +94,11 @@ def get_dirs_or_files(input_data): # Configure exporter serving_input_fn = input.serving_input_filename(training_params.input_resized_size) - exporter = tf.estimator.BestExporter(serving_input_receiver_fn=serving_input_fn, exports_to_keep=2) + if eval_data is not None: + exporter = tf.estimator.BestExporter(serving_input_receiver_fn=serving_input_fn, exports_to_keep=2) + else: + exporter = tf.estimator.LatestExporter(name='SimpleExporter', serving_input_receiver_fn=serving_input_fn, + exports_to_keep=5) for i in trange(0, training_params.n_epochs, training_params.evaluate_every_epoch, desc='Evaluated epochs'): estimator.train(input.input_fn(train_input, From e07f996131f55a9e7e8434492ca942e08626538f Mon Sep 17 00:00:00 2001 From: soliveir Date: Fri, 14 Dec 2018 16:40:05 +0100 Subject: [PATCH 32/57] update --- dh_segment/io/__init__.py | 11 +- dh_segment/io/via.py | 468 +++++++++++++++++++++++++++----------- 2 files changed, 345 insertions(+), 134 deletions(-) diff --git a/dh_segment/io/__init__.py b/dh_segment/io/__init__.py index e326568..2121ce4 100644 --- a/dh_segment/io/__init__.py +++ b/dh_segment/io/__init__.py @@ -68,13 +68,14 @@ --------------------------- .. autosummary:: + via.load_annotation_data + via.export_annotation_dict via.get_annotations - via.get_labels + via.get_via_attributes_regions + via.get_labels_per_attribute via.collect_working_items - via.create_masks - via.compute_reduced_dimensions - via.scale_down_original - via.getimage_from_iiif + via.create_masks_v1 + via.create_masks_v2 ---- diff --git a/dh_segment/io/via.py b/dh_segment/io/via.py index 8a195d0..18cf525 100644 --- a/dh_segment/io/via.py +++ b/dh_segment/io/via.py @@ -1,47 +1,24 @@ #!/usr/bin/env python # coding: utf-8 -""" -Script with CLI to process annotation data produced with VGG Image Annotation (VIA) tool: -- scale down original images -- parse VIA annotation (json) -- create images with masks -(cf. http://www.robots.ox.ac.uk/~vgg/software/via/; done on VIA 2.0.0) - - -Usage: - via.py --task= --collection= --config-file= - -Options: - --collection document collection to work with - --task= task to do: 'original' to downscale original images or 'masks' to create masks. - --config-file= configuration file - -""" - - -import docopt import json -import sys import os from tqdm import tqdm import numpy as np from skimage import transform from collections import namedtuple from imageio import imsave, imread -import logging import requests +from itertools import filterfalse from typing import List, Tuple - -from dask.diagnostics import ProgressBar -import dask.bag as db +from enum import Enum +import cv2 __author__ = "maudehrmann" -iiif_password = os.environ["IIIF_PWD"] - -logger = logging.getLogger(__name__) +# iiif_password = os.environ["IIIF_PWD"] +iiif_password = '' WorkingItem = namedtuple( # TODO: "WorkingItem", [ @@ -57,40 +34,43 @@ ) -def init_logger(logger, log_level, log_file): - """Initialise the logger.""" - logger.setLevel(log_level) - - formatter = logging.Formatter( - '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' - ) - - if log_file is not None: - fh = logging.FileHandler(filename=log_file, mode='w') - fh.setFormatter(formatter) - logger.addHandler(fh) +class Color(Enum): + WHITE = (255, 255, 255) + BLACK = (0, 0, 0) + GREY = (128, 128, 128) + RED = (255, 0, 0) + GREEN = (0, 255, 0) + BLUE = (0, 0, 255) + YELLOW = (255, 255, 0) - ch = logging.StreamHandler(sys.stdout) - ch.setFormatter(formatter) - logger.addHandler(ch) - logger.info("Logger successfully initialised") - - return logger - - -def get_annotations(annotations_dict: dict, iiif_url: str) -> dict: +def get_annotations(via_dict: dict, name_file: str) -> dict: """ - From VIA json file, get annotations relative to the given `iiif_url`. + From VIA json file, get annotations relative to the given `name_file`. - :param annotations_dict: VIA annotation output (originally json) - :param iiif_url: the file to look for + :param via_dict: VIA annotation output (originally json) + :param name_file: the file to look for (it can be a iiif path or a file path) :return: dict """ - k = iiif_url + "-1" - if k in annotations_dict['_via_img_metadata']: - myannotation = annotations_dict['_via_img_metadata'][k] - if iiif_url == myannotation['filename']: + + # Check that the annotation_dict is a "via_project" file (project export), + # or a "via_region" file (annotation export) + if '_via_img_metadata' in via_dict.keys(): + annotation_dict = via_dict['_via_img_metadata'] + else: + annotation_dict = via_dict + + # If it looks like a iiif path add "-1" + if 'http' in name_file: + key = name_file + "-1" + else: # find the key that contains the name_file + list_keys = list(filterfalse(lambda x: name_file not in x, list(annotation_dict.keys()))) + assert len(list_keys) == 1, "There is more than one key for the file '{} : \n{}'".format(name_file, list_keys) + key = list_keys[0] + + if key in annotation_dict.keys(): + myannotation = annotation_dict[key] + if name_file == myannotation['filename']: return myannotation['regions'] else: return None @@ -119,7 +99,7 @@ def collect_working_items(image_url_file: List[str], annotation_file: str, colle :param collection: target collection to consider :return: list of `WorkingItem` """ - logger.info("Collecting working items for {}".format(collection)) + print("Collecting working items for {}".format(collection)) working_items = [] session = requests.Session() @@ -168,7 +148,7 @@ def collect_working_items(image_url_file: List[str], annotation_file: str, colle ) working_items.append(wk_item) - logger.info("Collected {} items.".format(len(working_items))) + print("Collected {} items.".format(len(working_items))) return working_items @@ -194,7 +174,7 @@ def scale_down_original(working_item, img_out_dir: str) -> None: img = getimage_from_iiif(working_item.iiif, 'epfl-team', iiif_password) img_resized = transform.resize( img, - [working_item.reduced_x, working_item.reduced_y], + [working_item.reduced_y, working_item.reduced_x], anti_aliasing=False, preserve_range=True ) @@ -206,6 +186,31 @@ def getimage_from_iiif(url, user, pwd): return imread(img.content) +def load_annotation_data(via_data_filename: str) -> dict: + """ + Load the content of via annotation files. + + :param via_data_filename: via annotations json file + :return: the content of json file containing the region annotated + """ + with open(via_data_filename, 'r') as f: + content = json.load(f) + + return content + + +def export_annotation_dict(annotation_dict: dict, filename: str) -> None: + """ + Export the annotations to json file. + + :param annotation_dict: VIA annotations + :param filename: filename to export the data (json file) + :return: + """ + with open(filename, 'w') as f: + json.dump(annotation_dict, f) + + def write_mask(mask: np.ndarray, masks_dir: str, collection: str, image_name: str, label: str) -> None: """ Save a mask with filename containing 'label'. @@ -228,7 +233,7 @@ def write_mask(mask: np.ndarray, masks_dir: str, collection: str, image_name: st def get_labels(annotation_file: str) -> dict: """ - Get labels from annotation tool (VIA) settings. + Get labels from annotation tool (VIA) settings. Only compatible with VIA v2.0. :param annotation_file: manual annotation json file :return: dict (k=label, v=RGB code) @@ -236,6 +241,8 @@ def get_labels(annotation_file: str) -> dict: with open(annotation_file, 'r') as a: annotations = json.load(a) + # todo : this is not generic + # list(annotations['_via_attributes']['region'][{name_attributes}][{args}] label_list = list(annotations['_via_attributes']['region']['Label']['options']) # todo: keep a list of colors and assign randomly to label (to avoid hard coding labels/color correspondance) @@ -243,16 +250,110 @@ def get_labels(annotation_file: str) -> dict: label_color = dict() for label in label_list: if label == "MYLABEL1": - label_color[label] = (255, 255, 255) # white + label_color[label] = Color.WHITE # white elif label == "MYLABEL2": - label_color[label] = (255, 255, 0) # yellow + label_color[label] = Color.YELLOW # yellow # etc. return label_color -def create_masks(masks_dir: str, working_items: List[WorkingItem], annotation_file: str, collection: str) -> None: +def get_via_attributes_regions(annotation_dict: dict, via_version: int=2) -> List[str]: """ - For each annotation, create a corresponding binary mask and resize it (h = 2000). + Gets the attributes of the annotataed data. + + :param annotation_dict: + :param via_version: either 1 or 2 (for VIA v 1.0 or VIA v 2.0) + :return: A list containing the attributes names§ + """ + if via_version == 1: + list_categories = list() + + for value in annotation_dict.values(): + regions = value['regions'] + for region in regions.values(): + list_categories += list(region['region_attributes'].keys()) + + return list(np.unique(list_categories)) + elif via_version == 2: + # If project_export is given + if '_via_attributes' in annotation_dict.keys(): + return list(annotation_dict['_via_attributes']['region'].keys()) + # else if annotation_export is given + else: + list_categories = list() + for value in annotation_dict.values(): + regions = value['regions'] + for region in regions: + list_categories += list(region['region_attributes'].keys()) + + return list(np.unique(list_categories)) + else: + raise NotImplementedError + + +def get_labels_per_attribute(annotation_dict: dict, attribute_regions: List[str], via_version: int=2) -> Tuple: + """ + For each attribute, get all the possible label variants. + + :param annotation_dict: + :param attribute_regions: + :param via_version: + :return: (unique_labels, dict_labels) : `dict_labels` is a dictionary containing all the labels per attribute + + Usage + ----- + + >>> annotations_dict = load_annotation_data('via_annotations.json') + >>> list_attributes = get_via_attributes_regions(annotations_dict) + >>> list_attributes + >>> ['object', 'text'] + >>> + >>> unique_labels, dict_labels = get_labels_per_attribute(annotations_dict, list_attributes) + >>> unique_labels + >>> [['animal', 'car', ''], ['handwritten', 'typed', '']] + >>> + >>> dict_labels + >>> {'object': ['animal', 'animal', 'car', 'animal', ''], 'text': ['handwritten', '', 'typed', '', 'handwritten']} + """ + + if via_version == 1: + dict_labels = {ar: list() for ar in attribute_regions} + for value in annotation_dict.values(): + regions = value['regions'] + for region in regions.values(): + for k, v in region['region_attributes'].items(): + dict_labels[k].append(v) + + unique_labels = list() + for ar in attribute_regions: + unique_labels.append(list(np.unique(dict_labels[ar]))) + + return unique_labels, dict_labels + elif via_version == 2: + # If project_export is given + if '_via_attributes' in annotation_dict.keys(): + raise NotImplementedError + # else if annotation_export is given + else: + dict_labels = {ar: list() for ar in attribute_regions} + for value in annotation_dict.values(): + regions = value['regions'] + for region in regions: + for k, v in region['region_attributes'].items(): + dict_labels[k].append(v) + + unique_labels = list() + for ar in attribute_regions: + unique_labels.append(list(np.unique(dict_labels[ar]))) + + return unique_labels, dict_labels + else: + raise NotImplementedError + + +def create_masks_v2(masks_dir: str, working_items: List[WorkingItem], annotation_file: str, collection: str) -> None: + """ + For each annotation, create a corresponding binary mask and resize it (h = 2000). Only valid for VIA 2.0. Several annotations of the same class on the same image produce one image with several masks. :param masks_dir: where to output the masks @@ -261,108 +362,217 @@ def create_masks(masks_dir: str, working_items: List[WorkingItem], annotation_fi :param collection: :return: None """ - logger.info("Creating masks in {}...".format(masks_dir)) + print("Creating masks in {}...".format(masks_dir)) annotation_summary = dict() + def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, label_item: str): + """ + Resize only if needed (if working_item.reduced != working_item.original) + :param mask_image: + :param working_item: + :param label_item: + :return: + """ + if not working_item.reduced_y and not working_item.reduced_x: + write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) + elif working_item.reduced_x != working_item.original_x and working_item.reduced_y != working_item.original_y: + mask_resized = transform.resize(mask_image, [working_item.reduced_y, working_item.reduced_x], + anti_aliasing=False, preserve_range=True, order=0) + write_mask(mask_resized, masks_dir, collection, working_item.image_name, label_item) + else: + write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) + for wi in tqdm(working_items, desc="workingItem2mask"): labels = [] label_list = get_labels(annotation_file) # the image has no annotation, writing a black mask: if not wi.annotations: - mask = np.zeros([wi.original_x, wi.original_y], np.uint8) - mask_resized = transform.resize(mask, [wi.reduced_x, wi.reduced_y], anti_aliasing=False, - preserve_range=True, order=0) - write_mask(mask_resized, masks_dir, collection, wi.image_name, None) + mask = np.zeros([wi.original_y, wi.original_x], np.uint8) + resize_and_write_mask(mask, wi, None) labels.append("nolabel") # check all possible labels for the image and create mask: else: for label in label_list.keys(): # get annotation corresponding to current label + # todo : the 'Label' key is not generic selected_regions = list(filter(lambda r: r['region_attributes']['Label'] == label, wi.annotations)) if selected_regions: # create a 0 matrix (black background) - mask = np.zeros([wi.original_x, wi.original_y], np.uint8) + mask = np.zeros([wi.original_y, wi.original_x], np.uint8) # add one or several mask for current label # nb: if 2 labels are on the same page, they belongs to the same mask for sr in selected_regions: + if sr['shape_attributes']['name'] == 'rect': + x = sr['shape_attributes']['x'] + y = sr['shape_attributes']['y'] + w = sr['shape_attributes']['width'] + h = sr['shape_attributes']['height'] + # project region(s) on the mask (binary b/w) + mask[y:y + h, x:x + w] = 255 + elif sr['shape_attributes']['name'] == 'polygon': + points_polygon = np.stack([sr['shape_attributes']['all_points_x'], + sr['shape_attributes']['all_points_y']], axis=1)[:, None, :] + + mask = cv2.fillPoly(mask, [points_polygon], 255) + else: + raise NotImplementedError('Mask annotation for shape of type "{}" has not been implemented ' + 'yet'.format(sr['shape_attributes']['name'])) + + # resize + resize_and_write_mask(mask, wi, label) + # add to existing labels + labels.append(label.strip(' \n').replace(" ", "_").lower()) + + # write summary: list of existing labels per image + annotation_summary[wi.image_name] = labels + outfile = os.path.join(masks_dir, collection, collection + "-classes.txt") + fh = open(outfile, 'w') + for a in annotation_summary: + fh.write(a + "\t" + str(annotation_summary[a]) + "\n") + fh.close() + + print("Done.") + return annotation_summary + + +def create_masks_v1(masks_dir: str, working_items: List[WorkingItem], collection: str, label_name: str) -> None: + """ + For each annotation, create a corresponding binary mask and resize it (h = 2000). Only valid for VIA 1.0. + Several annotations of the same class on the same image produce one image with several masks. + + :param masks_dir: where to output the masks + :param working_items: infos to work with + :param collection: + :param label_name: name of the label to create mask + :return: None + """ + + annotation_summary = dict() + + def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, label_item: str): + """ + Resize only if needed (if working_item.reduced != working_item.original) + :param mask_image: + :param working_item: + :param label_item: + :return: + """ + if not working_item.reduced_y and not working_item.reduced_x: + write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) + elif working_item.reduced_x != working_item.original_x and working_item.reduced_y != working_item.original_y: + mask_resized = transform.resize(mask_image, [working_item.reduced_y, working_item.reduced_x], + anti_aliasing=False, preserve_range=True, order=0) + write_mask(mask_resized, masks_dir, collection, working_item.image_name, label_item) + else: + write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) + + for wi in tqdm(working_items, desc="workingItem2mask"): + labels = [] + # the image has no annotation, writing a black mask: + if not wi.annotations: + mask = np.zeros([wi.original_y, wi.original_x], np.uint8) + resize_and_write_mask(mask, wi, None) + labels.append("nolabel") + # check all possible labels for the image and create mask: + else: + # get annotation corresponding to current label + selected_regions = wi.annotations + if selected_regions: + # create a 0 matrix (black background) + mask = np.zeros([wi.original_y, wi.original_x], np.uint8) + # add one or several mask for current label + # nb: if 2 labels are on the same page, they belongs to the same mask + elem_to_iterate = selected_regions.values() if isinstance(selected_regions, dict) else selected_regions + for sr in elem_to_iterate: + if sr['shape_attributes']['name'] == 'rect': x = sr['shape_attributes']['x'] y = sr['shape_attributes']['y'] w = sr['shape_attributes']['width'] h = sr['shape_attributes']['height'] # project region(s) on the mask (binary b/w) mask[y:y + h, x:x + w] = 255 + elif sr['shape_attributes']['name'] == 'polygon': + points_polygon = np.stack([sr['shape_attributes']['all_points_x'], + sr['shape_attributes']['all_points_y']], axis=1)[:, None, :] - # resize - mask_resized = transform.resize(mask, [wi.reduced_x, wi.reduced_y], anti_aliasing=False, - preserve_range=True, order=0) - # write - write_mask(mask_resized, masks_dir, collection, wi.image_name, label) - # add to existing labels - labels.append(label.strip(' \n').replace(" ", "_").lower()) + mask = cv2.fillPoly(mask, [points_polygon], 255) + else: + raise NotImplementedError('Mask annotation for shape of type "{}" has not been implemented yet' + .format(sr['shape_attributes']['name'])) + + # resize + resize_and_write_mask(mask, wi, label_name) + # add to existing labels + labels.append(label_name.strip(' \n').replace(" ", "_").lower()) # write summary: list of existing labels per image annotation_summary[wi.image_name] = labels outfile = os.path.join(masks_dir, collection, collection + "-classes.txt") - fh = open(outfile, 'w') + fh = open(outfile, 'a') for a in annotation_summary: fh.write(a + "\t" + str(annotation_summary[a]) + "\n") fh.close() - logger.info("Done.") return annotation_summary -def main(args): +# def main(args): +# +# # read config +# config_file = args["--config-file"] +# task = args["--task"] +# collection = args["--collection"] +# +# if config_file and os.path.isfile(config_file): +# print("Found config file: {}".format(os.path.realpath(config_file))) +# with open(config_file, 'r') as f: +# config = json.load(f) +# else: +# print("Provide a config file") +# +# annotation_file = config.get("annotation_file") # manual annotation json file +# image_url_file = config.get("image_url_file") # url image list +# experiments_dir = config.get("experiments_dir") # output expe +# masks_dir = config.get("masks_dir") # output annotation_objects +# img_out_dir = config.get("img_out_dir") # re-scaled images +# +# print("\nGot the following paths:\n" +# "annotation_file: {}\n" +# "image_url_file: {}\n" +# "experiments_dir: {}\n" +# "masks_dir: {}\n" +# "img_out_dir: {}\n".format(annotation_file, image_url_file, experiments_dir, masks_dir, img_out_dir) +# ) +# +# # to test working items loading +# if task == "test-collect": +# collect_working_items(image_url_file, annotation_file, collection) +# +# # scale down and write original images +# elif task == "original": +# working_items = collect_working_items(image_url_file, annotation_file, collection) +# wi_bag = db.from_sequence(working_items, partition_size=100) +# wi_bag2 = wi_bag.map(scale_down_original, img_out_dir=img_out_dir) +# with ProgressBar(): +# wi_bag2.compute() +# +# # create masks +# elif task == "masks": +# working_items = collect_working_items(image_url_file, annotation_file, collection) +# create_masks_v2(masks_dir, working_items, annotation_file, collection) +# - # logger - global logger - init_logger(logger, logging.INFO, log_file=None) +""" +Example of usage - # read config - config_file = args["--config-file"] - task = args["--task"] - collection = args["--collection"] - if config_file and os.path.isfile(config_file): - logger.info("Found config file: {}".format(os.path.realpath(config_file))) - with open(config_file, 'r') as f: - config = json.load(f) - else: - logging.info("Provide a config file") - - annotation_file = config.get("annotation_file") # manual annotation json file - image_url_file = config.get("image_url_file") # url image list - experiments_dir = config.get("experiments_dir") # output expe - masks_dir = config.get("masks_dir") # output annotation_objects - img_out_dir = config.get("img_out_dir") # re-scaled images - - logger.info("\nGot the following paths:\n" - "annotation_file: {}\n" - "image_url_file: {}\n" - "experiments_dir: {}\n" - "masks_dir: {}\n" - "img_out_dir: {}\n".format(annotation_file, image_url_file, experiments_dir, masks_dir, img_out_dir) - ) - - # to test working items loading - if task == "test-collect": - collect_working_items(image_url_file, annotation_file, collection) - - # scale down and write original images - elif task == "original": - working_items = collect_working_items(image_url_file, annotation_file, collection) - wi_bag = db.from_sequence(working_items, partition_size=100) - wi_bag2 = wi_bag.map(scale_down_original, img_out_dir=img_out_dir) - with ProgressBar(): - wi_bag2.compute() - - # create masks - elif task == "masks": - working_items = collect_working_items(image_url_file, annotation_file, collection) - create_masks(masks_dir, working_items, annotation_file, collection) - - -if __name__ == "__main__": - arguments = docopt.docopt(__doc__) - main(arguments) +collection = 'mycollection' +annotation_file = 'via_regions_annotated.json' +image_url_file = 'list_files_image_url.txt' +masks_dir = '/home/project/generated_masks' + +working_items = collect_working_items(image_url_file, annotation_file, collection) +create_masks_v2(masks_dir, working_items, annotation_file, collection) +""" + From b090906bfc3d922bce341128bde38bbd06ab5333 Mon Sep 17 00:00:00 2001 From: soliveir Date: Thu, 24 Jan 2019 15:44:10 +0100 Subject: [PATCH 33/57] contour option in mask creation --- dh_segment/io/via.py | 52 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/dh_segment/io/via.py b/dh_segment/io/via.py index 18cf525..8d64890 100644 --- a/dh_segment/io/via.py +++ b/dh_segment/io/via.py @@ -351,7 +351,8 @@ def get_labels_per_attribute(annotation_dict: dict, attribute_regions: List[str] raise NotImplementedError -def create_masks_v2(masks_dir: str, working_items: List[WorkingItem], annotation_file: str, collection: str) -> None: +def create_masks_v2(masks_dir: str, working_items: List[WorkingItem], annotation_file: str, + collection: str, contours_only: bool=False) -> None: """ For each annotation, create a corresponding binary mask and resize it (h = 2000). Only valid for VIA 2.0. Several annotations of the same class on the same image produce one image with several masks. @@ -360,6 +361,7 @@ def create_masks_v2(masks_dir: str, working_items: List[WorkingItem], annotation :param working_items: infos to work with :param annotation_file: :param collection: + :param contours_only: creates the binary masks only for the contours of the object (thickness of contours : 20 px) :return: None """ print("Creating masks in {}...".format(masks_dir)) @@ -402,23 +404,35 @@ def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, lab mask = np.zeros([wi.original_y, wi.original_x], np.uint8) # add one or several mask for current label # nb: if 2 labels are on the same page, they belongs to the same mask + + contours_points = list() for sr in selected_regions: + if sr['shape_attributes']['name'] == 'rect': x = sr['shape_attributes']['x'] y = sr['shape_attributes']['y'] w = sr['shape_attributes']['width'] h = sr['shape_attributes']['height'] - # project region(s) on the mask (binary b/w) - mask[y:y + h, x:x + w] = 255 + + contours_points.append(np.array([[x, y], + [x + w, y], + [x + w, y + h], + [x, y + h] + ]).reshape((-1, 1, 2))) + elif sr['shape_attributes']['name'] == 'polygon': - points_polygon = np.stack([sr['shape_attributes']['all_points_x'], - sr['shape_attributes']['all_points_y']], axis=1)[:, None, :] + contours_points.append(np.stack([sr['shape_attributes']['all_points_x'], + sr['shape_attributes']['all_points_y']], axis=1)[:, None, :]) - mask = cv2.fillPoly(mask, [points_polygon], 255) else: raise NotImplementedError('Mask annotation for shape of type "{}" has not been implemented ' 'yet'.format(sr['shape_attributes']['name'])) + if contours_only: + mask = cv2.polylines(mask, contours_points, True, 255, thickness=15) + else: + mask = cv2.fillPoly(mask, contours_points, 255) + # resize resize_and_write_mask(mask, wi, label) # add to existing labels @@ -436,7 +450,8 @@ def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, lab return annotation_summary -def create_masks_v1(masks_dir: str, working_items: List[WorkingItem], collection: str, label_name: str) -> None: +def create_masks_v1(masks_dir: str, working_items: List[WorkingItem], collection: str, + label_name: str, contours_only: bool=False) -> None: """ For each annotation, create a corresponding binary mask and resize it (h = 2000). Only valid for VIA 1.0. Several annotations of the same class on the same image produce one image with several masks. @@ -445,6 +460,7 @@ def create_masks_v1(masks_dir: str, working_items: List[WorkingItem], collection :param working_items: infos to work with :param collection: :param label_name: name of the label to create mask + :param contours_only: creates the binary masks only for the contours of the object (thickness of contours : 20 px) :return: None """ @@ -484,23 +500,35 @@ def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, lab # add one or several mask for current label # nb: if 2 labels are on the same page, they belongs to the same mask elem_to_iterate = selected_regions.values() if isinstance(selected_regions, dict) else selected_regions + + # Todo : use only cv2.fillPoly method -> format the data for 'rect' case to fit fillPoly args + contours_points = list() for sr in elem_to_iterate: if sr['shape_attributes']['name'] == 'rect': x = sr['shape_attributes']['x'] y = sr['shape_attributes']['y'] w = sr['shape_attributes']['width'] h = sr['shape_attributes']['height'] - # project region(s) on the mask (binary b/w) - mask[y:y + h, x:x + w] = 255 + + contours_points.append(np.array([[x, y], + [x + w, y], + [x + w, y + h], + [x, y + h] + ]).reshape((-1, 1, 2))) + elif sr['shape_attributes']['name'] == 'polygon': - points_polygon = np.stack([sr['shape_attributes']['all_points_x'], - sr['shape_attributes']['all_points_y']], axis=1)[:, None, :] + contours_points.append(np.stack([sr['shape_attributes']['all_points_x'], + sr['shape_attributes']['all_points_y']], axis=1)[:, None, :]) - mask = cv2.fillPoly(mask, [points_polygon], 255) else: raise NotImplementedError('Mask annotation for shape of type "{}" has not been implemented yet' .format(sr['shape_attributes']['name'])) + if contours_only: + mask = cv2.polylines(mask, contours_points, True, 255, thickness=15) + else: + mask = cv2.fillPoly(mask, contours_points, 255) + # resize resize_and_write_mask(mask, wi, label_name) # add to existing labels From ba92f50b5c7697e60820032169c131b27931d655 Mon Sep 17 00:00:00 2001 From: soliveir Date: Wed, 30 Jan 2019 23:33:56 +0100 Subject: [PATCH 34/57] export regions coordinates to VIA compatible format --- dh_segment/io/via.py | 128 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 108 insertions(+), 20 deletions(-) diff --git a/dh_segment/io/via.py b/dh_segment/io/via.py index 8d64890..44a7b77 100644 --- a/dh_segment/io/via.py +++ b/dh_segment/io/via.py @@ -10,7 +10,7 @@ from imageio import imsave, imread import requests from itertools import filterfalse -from typing import List, Tuple +from typing import List, Tuple, Dict from enum import Enum import cv2 @@ -44,7 +44,7 @@ class Color(Enum): YELLOW = (255, 255, 0) -def get_annotations(via_dict: dict, name_file: str) -> dict: +def _get_annotations(via_dict: dict, name_file: str) -> dict: """ From VIA json file, get annotations relative to the given `name_file`. @@ -76,7 +76,7 @@ def get_annotations(via_dict: dict, name_file: str) -> dict: return None -def compute_reduced_dimensions(x: int, y: int, target_h: int=2000) -> Tuple[int, int]: +def _compute_reduced_dimensions(x: int, y: int, target_h: int=2000) -> Tuple[int, int]: """ Compute new dimensions with height set to `target_h`. @@ -130,11 +130,11 @@ def collect_working_items(image_url_file: List[str], annotation_file: str, colle if resp_json.status_code == requests.codes.ok: x = resp_json.json()['height'] y = resp_json.json()['width'] - target_h, target_w = compute_reduced_dimensions(x, y) + target_h, target_w = _compute_reduced_dimensions(x, y) else: resp_json.raise_for_status() - regions = get_annotations(annotations, iiif) + regions = _get_annotations(annotations, iiif) wk_item = WorkingItem( collection, @@ -171,7 +171,7 @@ def scale_down_original(working_item, img_out_dir: str) -> None: outfile = os.path.join(image_set_dir, working_item.image_name + "_ds.png") if not os.path.isfile(outfile): - img = getimage_from_iiif(working_item.iiif, 'epfl-team', iiif_password) + img = _getimage_from_iiif(working_item.iiif, 'epfl-team', iiif_password) img_resized = transform.resize( img, [working_item.reduced_y, working_item.reduced_x], @@ -181,7 +181,7 @@ def scale_down_original(working_item, img_out_dir: str) -> None: imsave(outfile, img_resized.astype(np.uint8)) -def getimage_from_iiif(url, user, pwd): +def _getimage_from_iiif(url, user, pwd): img = requests.get(url, auth=(user, pwd)) return imread(img.content) @@ -211,7 +211,7 @@ def export_annotation_dict(annotation_dict: dict, filename: str) -> None: json.dump(annotation_dict, f) -def write_mask(mask: np.ndarray, masks_dir: str, collection: str, image_name: str, label: str) -> None: +def _write_mask(mask: np.ndarray, masks_dir: str, collection: str, image_name: str, label: str) -> None: """ Save a mask with filename containing 'label'. @@ -377,13 +377,13 @@ def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, lab :return: """ if not working_item.reduced_y and not working_item.reduced_x: - write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) + _write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) elif working_item.reduced_x != working_item.original_x and working_item.reduced_y != working_item.original_y: mask_resized = transform.resize(mask_image, [working_item.reduced_y, working_item.reduced_x], anti_aliasing=False, preserve_range=True, order=0) - write_mask(mask_resized, masks_dir, collection, working_item.image_name, label_item) + _write_mask(mask_resized, masks_dir, collection, working_item.image_name, label_item) else: - write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) + _write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) for wi in tqdm(working_items, desc="workingItem2mask"): labels = [] @@ -475,13 +475,13 @@ def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, lab :return: """ if not working_item.reduced_y and not working_item.reduced_x: - write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) + _write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) elif working_item.reduced_x != working_item.original_x and working_item.reduced_y != working_item.original_y: mask_resized = transform.resize(mask_image, [working_item.reduced_y, working_item.reduced_x], anti_aliasing=False, preserve_range=True, order=0) - write_mask(mask_resized, masks_dir, collection, working_item.image_name, label_item) + _write_mask(mask_resized, masks_dir, collection, working_item.image_name, label_item) else: - write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) + _write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) for wi in tqdm(working_items, desc="workingItem2mask"): labels = [] @@ -501,7 +501,6 @@ def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, lab # nb: if 2 labels are on the same page, they belongs to the same mask elem_to_iterate = selected_regions.values() if isinstance(selected_regions, dict) else selected_regions - # Todo : use only cv2.fillPoly method -> format the data for 'rect' case to fit fillPoly args contours_points = list() for sr in elem_to_iterate: if sr['shape_attributes']['name'] == 'rect': @@ -516,19 +515,32 @@ def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, lab [x, y + h] ]).reshape((-1, 1, 2))) + if contours_only: + mask = cv2.polylines(mask, contours_points, True, 255, thickness=15) + else: + mask = cv2.fillPoly(mask, contours_points, 255) + elif sr['shape_attributes']['name'] == 'polygon': contours_points.append(np.stack([sr['shape_attributes']['all_points_x'], sr['shape_attributes']['all_points_y']], axis=1)[:, None, :]) + if contours_only: + mask = cv2.polylines(mask, contours_points, True, 255, thickness=15) + else: + mask = cv2.fillPoly(mask, contours_points, 255) + + elif sr['shape_attributes']['name'] == 'circle': + center_point = (sr['shape_attributes']['cx'], sr['shape_attributes']['cy']) + radius = sr['shape_attributes']['r'] + + if contours_only: + mask = cv2.circle(mask, center_point, radius, 255, thickness=15) + else: + mask = cv2.circle(mask, center_point, radius, 255, thickness=-1) else: raise NotImplementedError('Mask annotation for shape of type "{}" has not been implemented yet' .format(sr['shape_attributes']['name'])) - if contours_only: - mask = cv2.polylines(mask, contours_points, True, 255, thickness=15) - else: - mask = cv2.fillPoly(mask, contours_points, 255) - # resize resize_and_write_mask(mask, wi, label_name) # add to existing labels @@ -591,6 +603,82 @@ def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, lab # create_masks_v2(masks_dir, working_items, annotation_file, collection) # + +def _get_xywh_from_coordinates(coordinates: np.array) -> Tuple[int, int, int, int]: + """ + From cooridnates points get x,y, width height + :param coordinates: (N,2) coordinates (x,y) + :return: x, y, w, h + """ + + x = np.min(coordinates[:, 0]) + y = np.min(coordinates[:, 1]) + w = np.max(coordinates[:, 0]) - x + h = np.max(coordinates[:, 1]) - y + + return x, y, w, h + + +def create_via_region_from_coordinates(coordinates: np.array, region_attributes: dict, type_region: str) -> dict: + """ + + :param coordinates: (N, 2) coordinates (x, y) + :param region_attributes: dictionary with keys : name of labels, values : values of labels + :param type_region: via region annotation type ('rect', 'polygon') + :return: a region in VIA style (dict/json) + """ + assert type_region in ['rect', 'polygon', 'circle'] + + if type_region == 'rect': + x, y, w, h = _get_xywh_from_coordinates(coordinates) + shape_atributes = { + 'name': 'rect', + 'height': int(h), + 'width': int(w), + 'x': int(x), + 'y': int(y) + } + elif type_region == 'polygon': + points_x = list(coordinates[:, 0]) + points_y = list(coordinates[:, 1]) + + shape_atributes = { + 'name': 'polygon', + 'all_points_x': [int(p) for p in points_x], + 'all_points_y': [int(p) for p in points_y], + } + elif type_region == 'circle': + raise NotImplementedError('The type {} is not supported for the export.'.format(type)) + + return {'region_attributes': region_attributes, + 'shape_attributes': shape_atributes} + + +def create_via_annotation_single_image(img_filename: str, via_regions: List[dict], + file_attributes: dict=None) -> Dict[str, dict]: + """ + + :param img_filename: + :param via_regions: + :param file_attributes: + :return: + """ + + basename = os.path.basename(img_filename) + file_size = os.path.getsize(img_filename) + + via_key = '{}{}'.format(basename, file_size) + + via_annotation = { + 'file_attributes': file_attributes if file_attributes is not None else dict(), + 'filename': basename, + 'size': file_size, + 'regions': via_regions + } + + return {via_key: via_annotation} + + """ Example of usage From fbb9350ae7b0f9831c113b95a2bb282e39b6f5a1 Mon Sep 17 00:00:00 2001 From: soliveir Date: Tue, 5 Feb 2019 11:06:35 +0100 Subject: [PATCH 35/57] doc and typos --- dh_segment/io/via.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/dh_segment/io/via.py b/dh_segment/io/via.py index 44a7b77..06706da 100644 --- a/dh_segment/io/via.py +++ b/dh_segment/io/via.py @@ -245,7 +245,7 @@ def get_labels(annotation_file: str) -> dict: # list(annotations['_via_attributes']['region'][{name_attributes}][{args}] label_list = list(annotations['_via_attributes']['region']['Label']['options']) - # todo: keep a list of colors and assign randomly to label (to avoid hard coding labels/color correspondance) + # todo: keep a list of colors and assign randomly to label (to avoid hard coding labels/color correspondence) # todo: or take it from a config file label_color = dict() for label in label_list: @@ -259,7 +259,7 @@ def get_labels(annotation_file: str) -> dict: def get_via_attributes_regions(annotation_dict: dict, via_version: int=2) -> List[str]: """ - Gets the attributes of the annotataed data. + Gets the attributes of the annotated data. :param annotation_dict: :param via_version: either 1 or 2 (for VIA v 1.0 or VIA v 2.0) @@ -371,6 +371,7 @@ def create_masks_v2(masks_dir: str, working_items: List[WorkingItem], annotation def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, label_item: str): """ Resize only if needed (if working_item.reduced != working_item.original) + :param mask_image: :param working_item: :param label_item: @@ -469,6 +470,7 @@ def create_masks_v1(masks_dir: str, working_items: List[WorkingItem], collection def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, label_item: str): """ Resize only if needed (if working_item.reduced != working_item.original) + :param mask_image: :param working_item: :param label_item: @@ -603,10 +605,13 @@ def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, lab # create_masks_v2(masks_dir, working_items, annotation_file, collection) # +# EXPORT +# ------ def _get_xywh_from_coordinates(coordinates: np.array) -> Tuple[int, int, int, int]: """ - From cooridnates points get x,y, width height + From coordinates points get x,y, width, height + :param coordinates: (N,2) coordinates (x,y) :return: x, y, w, h """ @@ -621,6 +626,7 @@ def _get_xywh_from_coordinates(coordinates: np.array) -> Tuple[int, int, int, in def create_via_region_from_coordinates(coordinates: np.array, region_attributes: dict, type_region: str) -> dict: """ + Formats coordinates to a VIA region (dict). :param coordinates: (N, 2) coordinates (x, y) :param region_attributes: dictionary with keys : name of labels, values : values of labels @@ -657,11 +663,12 @@ def create_via_region_from_coordinates(coordinates: np.array, region_attributes: def create_via_annotation_single_image(img_filename: str, via_regions: List[dict], file_attributes: dict=None) -> Dict[str, dict]: """ + Returns a dictionary item {key: annotation} in VIA forat to further export in .json file - :param img_filename: - :param via_regions: - :param file_attributes: - :return: + :param img_filename: path to the image + :param via_regions: regions in VIA format (output from ``create_via_region_from_coordinates``) + :param file_attributes: file attributes (usually None) + :return: dictionary item with key and annotations in VIA format """ basename = os.path.basename(img_filename) From 600acaab5f68b86f7dbf3a44e90a532d5cf0679c Mon Sep 17 00:00:00 2001 From: soliveir Date: Mon, 11 Feb 2019 16:08:10 +0100 Subject: [PATCH 36/57] simlified via.py and updated doc --- dh_segment/io/__init__.py | 28 +- dh_segment/io/via.py | 974 ++++++++++++++++++++++---------------- doc/start/annotating.rst | 23 + doc/start/index.rst | 1 + environment.yml | 1 + setup.py | 4 + 6 files changed, 621 insertions(+), 410 deletions(-) create mode 100644 doc/start/annotating.rst diff --git a/dh_segment/io/__init__.py b/dh_segment/io/__init__.py index 2121ce4..531ba66 100644 --- a/dh_segment/io/__init__.py +++ b/dh_segment/io/__init__.py @@ -64,18 +64,36 @@ ---- +.. _ref_via: + VGG Image Annotator helpers --------------------------- + +**VIA objects** + +.. autosummary:: + via.WorkingItem + via.VIAttribute + + +**Creating masks with VIA annotations** + .. autosummary:: via.load_annotation_data via.export_annotation_dict - via.get_annotations - via.get_via_attributes_regions - via.get_labels_per_attribute + via.get_annotations_per_file + via.parse_via_attributes + via.get_via_attributes via.collect_working_items - via.create_masks_v1 - via.create_masks_v2 + via.create_masks + + +**Formatting in VIA JSON format** + +.. autosummary:: + via.create_via_region_from_coordinates + via.create_via_annotation_single_image ---- diff --git a/dh_segment/io/via.py b/dh_segment/io/via.py index 06706da..8d34fe4 100644 --- a/dh_segment/io/via.py +++ b/dh_segment/io/via.py @@ -1,26 +1,30 @@ #!/usr/bin/env python # coding: utf-8 +__author__ = "maudehrmann, solivr" +__license__ = "GPL" + import json import os +import re from tqdm import tqdm import numpy as np from skimage import transform from collections import namedtuple from imageio import imsave, imread import requests -from itertools import filterfalse +from itertools import filterfalse, chain from typing import List, Tuple, Dict -from enum import Enum import cv2 +from taputapu.io.image import get_image_shape_without_loading -__author__ = "maudehrmann" - +# To define before using the corresponding functions # iiif_password = os.environ["IIIF_PWD"] iiif_password = '' -WorkingItem = namedtuple( # TODO: + +WorkingItem = namedtuple( "WorkingItem", [ 'collection', 'image_name', @@ -32,23 +36,68 @@ 'annotations' ] ) +WorkingItem.__doc__ = """ +A container for annotated images. + +:param str collection: name of the collection +:param str image_name: name of the image +:param int original_x: original image x size (width) +:param int original_y: original image y size (height) +:param int reduced_x: resized x size +:param int reduced_y: resized y size +:param str iiif: iiif url +:param dict annotations: VIA 'region_attributes' +""" -class Color(Enum): - WHITE = (255, 255, 255) - BLACK = (0, 0, 0) - GREY = (128, 128, 128) - RED = (255, 0, 0) - GREEN = (0, 255, 0) - BLUE = (0, 0, 255) - YELLOW = (255, 255, 0) +VIAttribute = namedtuple( + "VIAttribute", [ + 'name', + 'type', + 'options' + ] +) +VIAttribute.__doc__ = """ +A container for VIA attributes. +:param str name: The name of attribute +:param str type: The type of the annotation (dropdown, markbox, ...) +:param list options: The options / labels possible for this attribute. +""" -def _get_annotations(via_dict: dict, name_file: str) -> dict: + +def parse_via_attributes(via_attributes: dict) -> List[VIAttribute]: + """ + Parses the VIA attribute dictionary and returns a list of VIAttribute instances + + :param via_attributes: attributes from VIA annotation ('_via_attributes' field) + :return: list of ``VIAttribute`` """ - From VIA json file, get annotations relative to the given `name_file`. - :param via_dict: VIA annotation output (originally json) + if {'file', 'region'}.issubset(set(via_attributes.keys())): + via_attributes = via_attributes['region'] + + list_attributes = list() + for k, v in via_attributes.items(): + if v['type'] == 'text': + print('WARNING : Please do not use text type for attributes because it is more prone to errors/typos which ' + 'can make the parsing fail. Use instead "checkbox", "dropdown" or "radio" with defined options.') + options = None + else: + options = list(v['options'].keys()) + + list_attributes.append(VIAttribute(k, + v['type'], + options)) + + return list_attributes + + +def get_annotations_per_file(via_dict: dict, name_file: str) -> dict: + """ + From VIA json content, get annotations relative to the given `name_file`. + + :param via_dict: VIA annotations content (originally json) :param name_file: the file to look for (it can be a iiif path or a file path) :return: dict """ @@ -63,7 +112,8 @@ def _get_annotations(via_dict: dict, name_file: str) -> dict: # If it looks like a iiif path add "-1" if 'http' in name_file: key = name_file + "-1" - else: # find the key that contains the name_file + else: + # find the key that contains the name_file list_keys = list(filterfalse(lambda x: name_file not in x, list(annotation_dict.keys()))) assert len(list_keys) == 1, "There is more than one key for the file '{} : \n{}'".format(name_file, list_keys) key = list_keys[0] @@ -86,73 +136,133 @@ def _compute_reduced_dimensions(x: int, y: int, target_h: int=2000) -> Tuple[int :return: tuple """ ratio = y / x - target_w = int(target_h*ratio) + target_w = int(target_h * ratio) return target_h, target_w -def collect_working_items(image_url_file: List[str], annotation_file: str, collection: str) -> List[WorkingItem]: +def _collect_working_items_from_local_images(via_annotations: dict, images_dir: str, collection_name: str) \ + -> List[WorkingItem]: """ - Given VIA annotation input, collect all info on `WorkingItem` object. + Given VIA annotation input, collect all info on `WorkingItem` object, when images come from local files - :param image_url_file: file listing IIIF URLs files - :param annotation_file: VIA json file, output of manual annotation - :param collection: target collection to consider - :return: list of `WorkingItem` + :param via_annotations: via_annotations: via annotations ('regions' field) + :param images_dir: directory where to find the images + :param collection_name: name of the collection + :return: + """ + + def _formatting(name_id: str) -> str: + name_id = re.sub('.jpg\d*', '.jpg', name_id) + name_id = re.sub('.png\d*', '.png', name_id) + return name_id + + working_items = list() + + for key, v in tqdm(via_annotations.items()): + filename = _formatting(key) + + absolute_filename = os.path.join(images_dir, filename) + shape_image = get_image_shape_without_loading(absolute_filename) + + regions = v['regions'] + + if regions: + wk_item = WorkingItem(collection=collection_name, + image_name=filename.split('.')[0], + original_x=shape_image[0], + original_y=shape_image[1], + reduced_x=None, + reduced_y=None, + iiif=None, + annotations=regions) + + working_items.append(wk_item) + + return working_items + + +def _collect_working_items_from_iiif(via_annotations: dict, collection_name: str, iiif_user='my-team') -> dict: + """ + Given VIA annotation input, collect all info on `WorkingItem` object, when the images come from IIIF urls + + :param via_annotations: via_annotations: via annotations ('regions' field) + :param collection_name: name of the collection + :param iiif_user: user param for requests.Session().get() + :return: """ - print("Collecting working items for {}".format(collection)) - working_items = [] + + working_items = list() session = requests.Session() - # load manual annotation json data - with open(annotation_file, 'r') as a: - annotations = json.load(a) - - # iterate over image IIIF URLs and build working items - with open(image_url_file) as current_file: - lines = current_file.readlines() - for line in tqdm(lines, desc='URL2WorkingItem'): - x = None - y = None - target_h = None - target_w = None - - # line is e.g. 'https://myserver.ch/iiif_project/image-name/full/full/0/default.jpg' - basename = "https://myserver.ch/iiif_project/" # todo: update, or even pass as param - iiif = line.strip("\n") - - # get image-name - image_name = line.split(basename)[1].split("/full/full/0/default.jpg")[0] - - # get image dimensions - iiif_json = iiif.replace("default.jpg", "info.json") - resp_image = session.get(iiif, auth=('epfl-team', iiif_password)) # need to request image first - resp_json = session.get(iiif_json, auth=('epfl-team', iiif_password)) - if resp_json.status_code == requests.codes.ok: - x = resp_json.json()['height'] - y = resp_json.json()['width'] - target_h, target_w = _compute_reduced_dimensions(x, y) - else: - resp_json.raise_for_status() - - regions = _get_annotations(annotations, iiif) - - wk_item = WorkingItem( - collection, - image_name, - x, - y, - target_h, - target_w, - iiif, - regions - ) + for key, v in tqdm(via_annotations.items()): + iiif_url = v['filename'] + + image_name = os.path.basename(iiif_url.split('/full/full/')[0]) + + # get image dimensions + iiif_json = iiif_url.replace("default.jpg", "info.json") + resp_json = session.get(iiif_json, auth=(iiif_user, iiif_password)) + if resp_json.status_code == requests.codes.ok: + y = resp_json.json()['height'] + x = resp_json.json()['width'] + # target_h, target_w = _compute_reduced_dimensions(x, y) + target_h, target_w = None, None + else: + x, y, target_w, target_h = None, None, None, None + resp_json.raise_for_status() + + regions = v['regions'] + + if regions: + wk_item = WorkingItem(collection=collection_name, + image_name=image_name.split('.')[0], + original_x=x, + original_y=y, + reduced_x=target_w, + reduced_y=target_h, + iiif=iiif_url, + annotations=regions) + working_items.append(wk_item) - print("Collected {} items.".format(len(working_items))) return working_items -def scale_down_original(working_item, img_out_dir: str) -> None: +def collect_working_items(via_annotations: dict, collection_name: str, images_dir: str=None, + via_version: int=2) -> List[WorkingItem]: + """ + Given VIA annotation input, collect all info on `WorkingItem` object. + This function will take care of separating images from local files and images from IIIF urls. + + :param via_annotations: via annotations ('regions' field) + :param images_dir: directory where to find the images + :param collection_name: name of the collection + :param via_version: version of the VIA tool used to produce the annotations (1 or 2) + :return: list of `WorkingItem` + """ + + via_annotations_v2 = via_annotations.copy() + if via_version == 1: + for key, value in via_annotations_v2.items(): + list_regions = list() + for v_region in value['regions'].values(): + list_regions.append(v_region) + via_annotations_v2[key]['regions'] = list_regions + + local_annotations = {k: v for k, v in via_annotations_v2.items() if 'http' not in k} + url_annotations = {k: v for k, v in via_annotations_v2.items() if 'http' in k} + + working_items = list() + if local_annotations: + assert images_dir is not None + working_items += _collect_working_items_from_local_images(local_annotations, images_dir, collection_name) + if url_annotations: + working_items += _collect_working_items_from_iiif(url_annotations, collection_name) + + return working_items + + +def _scale_down_original(working_item, img_out_dir: str) -> None: """ Copy and reduce original image files. @@ -160,6 +270,11 @@ def scale_down_original(working_item, img_out_dir: str) -> None: :param working_item: dict of `WorkingItems` :return: None """ + + def _getimage_from_iiif(url, user, pwd): + img = requests.get(url, auth=(user, pwd)) + return imread(img.content) + image_set_dir = os.path.join(img_out_dir, working_item.collection, "images") if not os.path.exists(image_set_dir): try: @@ -173,30 +288,32 @@ def scale_down_original(working_item, img_out_dir: str) -> None: if not os.path.isfile(outfile): img = _getimage_from_iiif(working_item.iiif, 'epfl-team', iiif_password) img_resized = transform.resize( - img, - [working_item.reduced_y, working_item.reduced_x], - anti_aliasing=False, - preserve_range=True + img, + [working_item.reduced_y, working_item.reduced_x], + anti_aliasing=False, + preserve_range=True ) imsave(outfile, img_resized.astype(np.uint8)) -def _getimage_from_iiif(url, user, pwd): - img = requests.get(url, auth=(user, pwd)) - return imread(img.content) - - -def load_annotation_data(via_data_filename: str) -> dict: +def load_annotation_data(via_data_filename: str, only_img_annotations: bool=False) -> dict: """ Load the content of via annotation files. :param via_data_filename: via annotations json file + :param only_img_annotations: load only the images annotations ('_via_img_metadata' field) :return: the content of json file containing the region annotated """ + with open(via_data_filename, 'r') as f: content = json.load(f) - return content + assert '_via_img_metadata' in content.keys(), "The file is not a valid VIA project export." + + if only_img_annotations: + return content['_via_img_metadata'] + else: + return content def export_annotation_dict(annotation_dict: dict, filename: str) -> None: @@ -211,400 +328,238 @@ def export_annotation_dict(annotation_dict: dict, filename: str) -> None: json.dump(annotation_dict, f) -def _write_mask(mask: np.ndarray, masks_dir: str, collection: str, image_name: str, label: str) -> None: +def get_via_attributes(annotation_dict: dict, via_version: int=2) -> List[VIAttribute]: """ - Save a mask with filename containing 'label'. + Gets the attributes of the annotated data and returns a list of `VIAttribute`. - :param mask: - :param masks_dir: - :param collection: - :param image_name: - :param label: - :return: + :param annotation_dict: json content of the VIA exported file + :param via_version: either 1 or 2 (for VIA v 1.0 or VIA v 2.0) + :return: A list containing VIAttributes """ - outdir = os.path.join(masks_dir, collection, image_name) - if not os.path.exists(outdir): - os.makedirs(outdir) - label = label.strip(' \n').replace(" ", "_").lower() if label is not None else 'nolabel' - outfile = os.path.join(outdir, image_name + "-mask-" + label + ".png") - # if not os.path.isfile(outfile): - imsave(outfile, mask.astype(np.uint8)) + if via_version == 1: -def get_labels(annotation_file: str) -> dict: - """ - Get labels from annotation tool (VIA) settings. Only compatible with VIA v2.0. + list_attributes = [list(region['region_attributes'].keys()) + for value in annotation_dict.values() + for region in value['regions'].values()] - :param annotation_file: manual annotation json file - :return: dict (k=label, v=RGB code) - """ - with open(annotation_file, 'r') as a: - annotations = json.load(a) + # Find options + unique_attributes = list(np.unique(list(chain.from_iterable(list_attributes)))) - # todo : this is not generic - # list(annotations['_via_attributes']['region'][{name_attributes}][{args}] - label_list = list(annotations['_via_attributes']['region']['Label']['options']) + dict_labels = {rgn_att: list() for rgn_att in unique_attributes} + for value in annotation_dict.values(): + regions = value['regions'] + for region in regions.values(): + for k, v in region['region_attributes'].items(): + dict_labels[k].append(v) - # todo: keep a list of colors and assign randomly to label (to avoid hard coding labels/color correspondence) - # todo: or take it from a config file - label_color = dict() - for label in label_list: - if label == "MYLABEL1": - label_color[label] = Color.WHITE # white - elif label == "MYLABEL2": - label_color[label] = Color.YELLOW # yellow - # etc. - return label_color + elif via_version == 2: + if '_via_attributes' in annotation_dict.keys(): # If project_export is given + return parse_via_attributes(annotation_dict['_via_attributes']) -def get_via_attributes_regions(annotation_dict: dict, via_version: int=2) -> List[str]: - """ - Gets the attributes of the annotated data. + else: # else if annotation_export is given - :param annotation_dict: - :param via_version: either 1 or 2 (for VIA v 1.0 or VIA v 2.0) - :return: A list containing the attributes names§ - """ - if via_version == 1: - list_categories = list() + list_attributes = [list(region['region_attributes'].keys()) + for value in annotation_dict.values() + for region in value['regions']] - for value in annotation_dict.values(): - regions = value['regions'] - for region in regions.values(): - list_categories += list(region['region_attributes'].keys()) + # Find options + unique_attributes = list(np.unique(list(chain.from_iterable(list_attributes)))) - return list(np.unique(list_categories)) - elif via_version == 2: - # If project_export is given - if '_via_attributes' in annotation_dict.keys(): - return list(annotation_dict['_via_attributes']['region'].keys()) - # else if annotation_export is given - else: - list_categories = list() + dict_labels = {rgn_att: list() for rgn_att in unique_attributes} for value in annotation_dict.values(): regions = value['regions'] for region in regions: - list_categories += list(region['region_attributes'].keys()) + for k, v in region['region_attributes'].items(): + dict_labels[k].append(v) - return list(np.unique(list_categories)) else: raise NotImplementedError + # Instantiate VIAttribute objects + viattribute_list = list() + for attribute, options in dict_labels.items(): -def get_labels_per_attribute(annotation_dict: dict, attribute_regions: List[str], via_version: int=2) -> Tuple: - """ - For each attribute, get all the possible label variants. + if all(isinstance(opt, str) for opt in options): + viattribute_list.append(VIAttribute(name=attribute, + type=None, + options=list(np.unique(options)))) - :param annotation_dict: - :param attribute_regions: - :param via_version: - :return: (unique_labels, dict_labels) : `dict_labels` is a dictionary containing all the labels per attribute + elif all(isinstance(opt, dict) for opt in options): + viattribute_list.append(VIAttribute(name=attribute, + type=None, + options=list(np.unique(list(chain.from_iterable(options)))))) + + else: + raise NotImplementedError + return viattribute_list - Usage - ----- - >>> annotations_dict = load_annotation_data('via_annotations.json') - >>> list_attributes = get_via_attributes_regions(annotations_dict) - >>> list_attributes - >>> ['object', 'text'] - >>> - >>> unique_labels, dict_labels = get_labels_per_attribute(annotations_dict, list_attributes) - >>> unique_labels - >>> [['animal', 'car', ''], ['handwritten', 'typed', '']] - >>> - >>> dict_labels - >>> {'object': ['animal', 'animal', 'car', 'animal', ''], 'text': ['handwritten', '', 'typed', '', 'handwritten']} +def _draw_mask(via_region: dict, mask: np.array, contours_only: bool=False) -> np.array: """ - if via_version == 1: - dict_labels = {ar: list() for ar in attribute_regions} - for value in annotation_dict.values(): - regions = value['regions'] - for region in regions.values(): - for k, v in region['region_attributes'].items(): - dict_labels[k].append(v) + :param via_region: region to draw (in VIA format) + :param mask: image mask to draw on + :param contours_only: if `True`, draws only the contours of the region, if `False`, fills the region + :return: the drawn mask + """ - unique_labels = list() - for ar in attribute_regions: - unique_labels.append(list(np.unique(dict_labels[ar]))) + shape_attributes_dict = via_region['shape_attributes'] - return unique_labels, dict_labels - elif via_version == 2: - # If project_export is given - if '_via_attributes' in annotation_dict.keys(): - raise NotImplementedError - # else if annotation_export is given - else: - dict_labels = {ar: list() for ar in attribute_regions} - for value in annotation_dict.values(): - regions = value['regions'] - for region in regions: - for k, v in region['region_attributes'].items(): - dict_labels[k].append(v) + if shape_attributes_dict['name'] == 'rect': + x = shape_attributes_dict['x'] + y = shape_attributes_dict['y'] + w = shape_attributes_dict['width'] + h = shape_attributes_dict['height'] - unique_labels = list() - for ar in attribute_regions: - unique_labels.append(list(np.unique(dict_labels[ar]))) + contours = np.array([[x, y], + [x + w, y], + [x + w, y + h], + [x, y + h] + ]).reshape((-1, 1, 2)) - return unique_labels, dict_labels - else: - raise NotImplementedError + mask = cv2.polylines(mask, [contours], True, 255, thickness=15) if contours_only \ + else cv2.fillPoly(mask, [contours], 255) + elif shape_attributes_dict['name'] == 'polygon': + contours = np.stack([shape_attributes_dict['all_points_x'], + shape_attributes_dict['all_points_y']], axis=1)[:, None, :] -def create_masks_v2(masks_dir: str, working_items: List[WorkingItem], annotation_file: str, - collection: str, contours_only: bool=False) -> None: - """ - For each annotation, create a corresponding binary mask and resize it (h = 2000). Only valid for VIA 2.0. - Several annotations of the same class on the same image produce one image with several masks. + mask = cv2.polylines(mask, [contours], True, 255, thickness=15) if contours_only \ + else cv2.fillPoly(mask, [contours], 255) - :param masks_dir: where to output the masks - :param working_items: infos to work with - :param annotation_file: - :param collection: - :param contours_only: creates the binary masks only for the contours of the object (thickness of contours : 20 px) - :return: None - """ - print("Creating masks in {}...".format(masks_dir)) + elif shape_attributes_dict['name'] == 'circle': + center_point = (shape_attributes_dict['cx'], shape_attributes_dict['cy']) + radius = shape_attributes_dict['r'] - annotation_summary = dict() + mask = cv2.circle(mask, center_point, radius, 255, thickness=15) if contours_only \ + else cv2.circle(mask, center_point, radius, 255, thickness=-1) - def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, label_item: str): - """ - Resize only if needed (if working_item.reduced != working_item.original) + elif shape_attributes_dict['name'] == 'polyline': + contours = np.stack([shape_attributes_dict['all_points_x'], + shape_attributes_dict['all_points_y']], axis=1)[:, None, :] - :param mask_image: - :param working_item: - :param label_item: - :return: - """ - if not working_item.reduced_y and not working_item.reduced_x: - _write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) - elif working_item.reduced_x != working_item.original_x and working_item.reduced_y != working_item.original_y: - mask_resized = transform.resize(mask_image, [working_item.reduced_y, working_item.reduced_x], - anti_aliasing=False, preserve_range=True, order=0) - _write_mask(mask_resized, masks_dir, collection, working_item.image_name, label_item) - else: - _write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) + mask = cv2.polylines(mask, [contours], False, 255, thickness=15) - for wi in tqdm(working_items, desc="workingItem2mask"): - labels = [] - label_list = get_labels(annotation_file) - # the image has no annotation, writing a black mask: - if not wi.annotations: - mask = np.zeros([wi.original_y, wi.original_x], np.uint8) - resize_and_write_mask(mask, wi, None) - labels.append("nolabel") - # check all possible labels for the image and create mask: - else: - for label in label_list.keys(): - # get annotation corresponding to current label - # todo : the 'Label' key is not generic - selected_regions = list(filter(lambda r: r['region_attributes']['Label'] == label, wi.annotations)) - if selected_regions: - # create a 0 matrix (black background) - mask = np.zeros([wi.original_y, wi.original_x], np.uint8) - # add one or several mask for current label - # nb: if 2 labels are on the same page, they belongs to the same mask - - contours_points = list() - for sr in selected_regions: - - if sr['shape_attributes']['name'] == 'rect': - x = sr['shape_attributes']['x'] - y = sr['shape_attributes']['y'] - w = sr['shape_attributes']['width'] - h = sr['shape_attributes']['height'] - - contours_points.append(np.array([[x, y], - [x + w, y], - [x + w, y + h], - [x, y + h] - ]).reshape((-1, 1, 2))) - - elif sr['shape_attributes']['name'] == 'polygon': - contours_points.append(np.stack([sr['shape_attributes']['all_points_x'], - sr['shape_attributes']['all_points_y']], axis=1)[:, None, :]) - - else: - raise NotImplementedError('Mask annotation for shape of type "{}" has not been implemented ' - 'yet'.format(sr['shape_attributes']['name'])) - - if contours_only: - mask = cv2.polylines(mask, contours_points, True, 255, thickness=15) - else: - mask = cv2.fillPoly(mask, contours_points, 255) - - # resize - resize_and_write_mask(mask, wi, label) - # add to existing labels - labels.append(label.strip(' \n').replace(" ", "_").lower()) + else: + raise NotImplementedError( + 'Mask annotation for shape of type "{}" has not been implemented yet' + .format(shape_attributes_dict['name'])) - # write summary: list of existing labels per image - annotation_summary[wi.image_name] = labels - outfile = os.path.join(masks_dir, collection, collection + "-classes.txt") - fh = open(outfile, 'w') - for a in annotation_summary: - fh.write(a + "\t" + str(annotation_summary[a]) + "\n") - fh.close() + return mask - print("Done.") - return annotation_summary +def _write_mask(mask: np.ndarray, masks_dir: str, collection: str, image_name: str, label: str) -> None: + """ + Save a mask with filename containing 'label'. -def create_masks_v1(masks_dir: str, working_items: List[WorkingItem], collection: str, - label_name: str, contours_only: bool=False) -> None: + :param mask: mask b&w image (H, W) + :param masks_dir: directory to output mask + :param collection: name of the collection + :param image_name: name of the image + :param label: label of the mask + :return: """ - For each annotation, create a corresponding binary mask and resize it (h = 2000). Only valid for VIA 1.0. + + outdir = os.path.join(masks_dir, collection, image_name) + if not os.path.exists(outdir): + os.makedirs(outdir) + label = label.strip(' \n').replace(" ", "_").lower() if label is not None else 'nolabel' + outfile = os.path.join(outdir, image_name + "-mask-" + label + ".png") + imsave(outfile, mask.astype(np.uint8)) + + +def create_masks(masks_dir: str, working_items: List[WorkingItem], via_attributes: List[VIAttribute], + collection: str, contours_only: bool=False) -> dict: + """ + For each annotation, create a corresponding binary mask and resize it (h = 2000). Only valid for VIA 2.0. Several annotations of the same class on the same image produce one image with several masks. :param masks_dir: where to output the masks :param working_items: infos to work with - :param collection: - :param label_name: name of the label to create mask + :param via_attributes: VIAttributes computed by ``get_via_attributes`` function. + :param collection: name of the nollection :param contours_only: creates the binary masks only for the contours of the object (thickness of contours : 20 px) - :return: None + :return: annotation_summary, a dictionary containing a list of labels per image """ - annotation_summary = dict() - - def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, label_item: str): + def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, label_item: str) -> None: """ Resize only if needed (if working_item.reduced != working_item.original) - :param mask_image: - :param working_item: - :param label_item: + :param mask_image: mask image to write + :param working_item: `WorkingItem` object + :param label_item: label name to append to filename :return: """ + if not working_item.reduced_y and not working_item.reduced_x: _write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) + elif working_item.reduced_x != working_item.original_x and working_item.reduced_y != working_item.original_y: - mask_resized = transform.resize(mask_image, [working_item.reduced_y, working_item.reduced_x], - anti_aliasing=False, preserve_range=True, order=0) + mask_resized = transform.resize(mask_image, + [working_item.reduced_y, working_item.reduced_x], + anti_aliasing=False, + preserve_range=True, + order=0) _write_mask(mask_resized, masks_dir, collection, working_item.image_name, label_item) + else: _write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item) + # ------------------- + + print("Creating masks in {}...".format(masks_dir)) + + annotation_summary = dict() for wi in tqdm(working_items, desc="workingItem2mask"): - labels = [] + labels = list() + # the image has no annotation, writing a black mask: if not wi.annotations: mask = np.zeros([wi.original_y, wi.original_x], np.uint8) resize_and_write_mask(mask, wi, None) labels.append("nolabel") + # check all possible labels for the image and create mask: else: - # get annotation corresponding to current label - selected_regions = wi.annotations - if selected_regions: - # create a 0 matrix (black background) - mask = np.zeros([wi.original_y, wi.original_x], np.uint8) - # add one or several mask for current label - # nb: if 2 labels are on the same page, they belongs to the same mask - elem_to_iterate = selected_regions.values() if isinstance(selected_regions, dict) else selected_regions - - contours_points = list() - for sr in elem_to_iterate: - if sr['shape_attributes']['name'] == 'rect': - x = sr['shape_attributes']['x'] - y = sr['shape_attributes']['y'] - w = sr['shape_attributes']['width'] - h = sr['shape_attributes']['height'] - - contours_points.append(np.array([[x, y], - [x + w, y], - [x + w, y + h], - [x, y + h] - ]).reshape((-1, 1, 2))) - - if contours_only: - mask = cv2.polylines(mask, contours_points, True, 255, thickness=15) - else: - mask = cv2.fillPoly(mask, contours_points, 255) - - elif sr['shape_attributes']['name'] == 'polygon': - contours_points.append(np.stack([sr['shape_attributes']['all_points_x'], - sr['shape_attributes']['all_points_y']], axis=1)[:, None, :]) - - if contours_only: - mask = cv2.polylines(mask, contours_points, True, 255, thickness=15) - else: - mask = cv2.fillPoly(mask, contours_points, 255) - - elif sr['shape_attributes']['name'] == 'circle': - center_point = (sr['shape_attributes']['cx'], sr['shape_attributes']['cy']) - radius = sr['shape_attributes']['r'] - - if contours_only: - mask = cv2.circle(mask, center_point, radius, 255, thickness=15) - else: - mask = cv2.circle(mask, center_point, radius, 255, thickness=-1) + for attribute in via_attributes: + for option in attribute.options: + # get annotations that have the current attribute + selected_regions = list(filter(lambda r: attribute.name in r['region_attributes'].keys(), + wi.annotations)) + # get annotations that have the current attribute and option + if selected_regions: + selected_regions = list(filter(lambda r: r['region_attributes'][attribute.name] == option, + selected_regions)) else: - raise NotImplementedError('Mask annotation for shape of type "{}" has not been implemented yet' - .format(sr['shape_attributes']['name'])) + continue - # resize - resize_and_write_mask(mask, wi, label_name) - # add to existing labels - labels.append(label_name.strip(' \n').replace(" ", "_").lower()) + if selected_regions: + # create a 0 matrix (black background) + mask = np.zeros([wi.original_y, wi.original_x], np.uint8) + + # nb: if 2 labels are on the same page, they belongs to the same mask + for sr in selected_regions: + mask = _draw_mask(sr, mask, contours_only) + + label = '{}-{}'.format(attribute.name, option).lower() + resize_and_write_mask(mask, wi, label) + # add to existing labels + labels.append(label) # write summary: list of existing labels per image annotation_summary[wi.image_name] = labels outfile = os.path.join(masks_dir, collection, collection + "-classes.txt") - fh = open(outfile, 'a') - for a in annotation_summary: - fh.write(a + "\t" + str(annotation_summary[a]) + "\n") - fh.close() + with open(outfile, 'a') as fh: + for a in annotation_summary: + fh.write(a + "\t" + str(annotation_summary[a]) + "\n") + print("Done.") return annotation_summary -# def main(args): -# -# # read config -# config_file = args["--config-file"] -# task = args["--task"] -# collection = args["--collection"] -# -# if config_file and os.path.isfile(config_file): -# print("Found config file: {}".format(os.path.realpath(config_file))) -# with open(config_file, 'r') as f: -# config = json.load(f) -# else: -# print("Provide a config file") -# -# annotation_file = config.get("annotation_file") # manual annotation json file -# image_url_file = config.get("image_url_file") # url image list -# experiments_dir = config.get("experiments_dir") # output expe -# masks_dir = config.get("masks_dir") # output annotation_objects -# img_out_dir = config.get("img_out_dir") # re-scaled images -# -# print("\nGot the following paths:\n" -# "annotation_file: {}\n" -# "image_url_file: {}\n" -# "experiments_dir: {}\n" -# "masks_dir: {}\n" -# "img_out_dir: {}\n".format(annotation_file, image_url_file, experiments_dir, masks_dir, img_out_dir) -# ) -# -# # to test working items loading -# if task == "test-collect": -# collect_working_items(image_url_file, annotation_file, collection) -# -# # scale down and write original images -# elif task == "original": -# working_items = collect_working_items(image_url_file, annotation_file, collection) -# wi_bag = db.from_sequence(working_items, partition_size=100) -# wi_bag2 = wi_bag.map(scale_down_original, img_out_dir=img_out_dir) -# with ProgressBar(): -# wi_bag2.compute() -# -# # create masks -# elif task == "masks": -# working_items = collect_working_items(image_url_file, annotation_file, collection) -# create_masks_v2(masks_dir, working_items, annotation_file, collection) -# - # EXPORT # ------ @@ -663,16 +618,19 @@ def create_via_region_from_coordinates(coordinates: np.array, region_attributes: def create_via_annotation_single_image(img_filename: str, via_regions: List[dict], file_attributes: dict=None) -> Dict[str, dict]: """ - Returns a dictionary item {key: annotation} in VIA forat to further export in .json file + Returns a dictionary item {key: annotation} in VIA format to further export to .json file :param img_filename: path to the image :param via_regions: regions in VIA format (output from ``create_via_region_from_coordinates``) :param file_attributes: file attributes (usually None) :return: dictionary item with key and annotations in VIA format """ - - basename = os.path.basename(img_filename) - file_size = os.path.getsize(img_filename) + if 'http' in img_filename: + basename = img_filename + file_size = -1 + else: + basename = os.path.basename(img_filename) + file_size = os.path.getsize(img_filename) via_key = '{}{}'.format(basename, file_size) @@ -691,11 +649,217 @@ def create_via_annotation_single_image(img_filename: str, via_regions: List[dict collection = 'mycollection' -annotation_file = 'via_regions_annotated.json' -image_url_file = 'list_files_image_url.txt' +annotation_file = 'via_sample.json' masks_dir = '/home/project/generated_masks' +images_dir = './my_images' + +# Load all the data in the annotation file (the file may be an exported project or an export of the annotations) +via_data = load_annotation_data(annotation_file) + +# In the case of an exported project file, you can set ``only_img_annotations=True`` to get only +# the region annotations +via_annotations = load_annotation_data(annotation_file, only_img_annotations=True) + +# Collect the annotated regions +working_items = collect_working_items(via_annotations, collection, images_dir) + +# Collect the attributes and options +if '_via_attributes' in via_data.keys(): + list_attributes = parse_via_attributes(via_data['_via_attributes']) +else: + list_attributes = get_via_attributes(via_annotations) + +# Create one mask per option per attribute +create_masks(masks_dir, wi,via_attributes, collection) +""" + + +""" +Content of a via_project exported file + +{'_via_attributes': { + ... + }, + '_via_img_metadata': { + ... + }, + '_via_settings': { + 'core': { + 'buffer_size': 18, + 'default_filepath': '', + 'filepath': {} + }, + 'project': { + 'name': 'via_project_7Feb2019_10h7m' + }, + 'ui': { + 'annotation_editor_fontsize': 0.8, + 'annotation_editor_height': 25, + 'image': { + 'region_label': 'region_id', + 'region_label_font': '10px Sans' + }, + 'image_grid': { + 'img_height': 80, + 'rshape_fill': 'none', + 'rshape_fill_opacity': 0.3, + 'rshape_stroke': 'yellow', + 'rshape_stroke_width': 2, + 'show_image_policy': 'all', + 'show_region_shape': True + }, + 'leftsidebar_width': 18 + } + } +} + +""" + +""" +"_via_attributes": { + "region": { + "attribute1": { + "type":"text", + "description":"", + "default_value":"" + }, + "attribute2": { + "type":"dropdown", + "description":"", + "options": { + "op1":"", + "op2":"" + }, + "default_options":{} + }, + "attribute3": { + "type":"checkbox", + "description":"", + "options": { + "op1":"", + "op2":"" + }, + "default_options":{} + }, + "attribute 4": { + "type":"radio", + "description":"", + "options": { + "op1":"", + "op2":"" + }, + "default_options":{} + } + }, + "file":{} +} -working_items = collect_working_items(image_url_file, annotation_file, collection) -create_masks_v2(masks_dir, working_items, annotation_file, collection) """ +""" +'_via_img_metadata': { + 'image_filename1.jpg2209797': { + 'file_attributes': {}, + 'filename': 'image_filename1.jpg', + 'regions': + [{ + 'region_attributes': { + 'attribute1': { + 'op1': True, + 'op2': True + }, + 'attribute 2': 'label1', + 'attribute 3': 'op1' + }, + 'shape_attributes': { + 'height': 2277, + 'name': 'rect', + 'width': 1541, + 'x': 225, + 'y': 458 + } + }, + { + 'region_attributes': { + 'attribute 4': 'op1', + 'attribute 1': {}, + 'attribute 2': 'label1', + 'attribute 3': 'op2' + }, + 'shape_attributes': { + 'height': 2255, + 'name': 'rect', + 'width': 1554, + 'x': 1845, + 'y': 476 + } + }], + 'size': 2209797}, + 'https://libimages.princeton.edu/loris/pudl0001/5138415/00000011.jp2/full/full/0/default.jpg-1': { + 'file_attributes': {}, + 'filename': 'https://libimages.princeton.edu/loris/pudl0001/5138415/00000011.jp2/full/full/0/default.jpg', + 'regions': + [{ + 'region_attributes': { + 'attribute 4': 'op2', + 'attribute 1': { + 'op1': True + }, + 'attribute 2': 'label3', + 'attribute 3': 'op1' + }, + 'shape_attributes': { + 'height': 1026, + 'name': 'rect', + 'width': 1430, + 'x': 145, + 'y': 525 + } + }, + { + 'region_attributes': { + 'attribute 4': 'op2', + 'attribute 1': { + 'op1': True}, + 'attribute 2': 'label 3 ', + 'attribute 3': 'op1', + }, + 'shape_attributes': { + 'all_points_x': [2612, 2498, 2691, 2757, 2962, 3034, 2636], + 'all_points_y': [5176, 5616, 5659, 5363, 5375, 5110, 5122], + 'name': 'polygon' + } + }, + { + 'region_attributes': { + 'attribute 4': 'op2', + 'attribute 1': { + 'op1': True}, + 'attribute 2': 'label 3 ', + 'attribute 3': 'op1', + }, + 'shape_attributes': { + 'cx': 2793, + 'cy': 881, + 'name': 'circle', + 'r': 524 + } + }, + { + 'region_attributes': { + 'attribute 4': 'op1', + 'attribute 1': { + 'op2': True}, + 'attribute 2': 'label1', + 'attribute 3': 'op2', + }, + 'shape_attributes': { + 'all_points_x': [3246, 5001], + 'all_points_y': [422, 380], + 'name': 'polyline' + } + }], + 'size': -1 + } +} +""" diff --git a/doc/start/annotating.rst b/doc/start/annotating.rst new file mode 100644 index 0000000..519e06b --- /dev/null +++ b/doc/start/annotating.rst @@ -0,0 +1,23 @@ +Creating groundtruth data +------------------------- + +Using GIMP or Photoshop +^^^^^^^^^^^^^^^^^^^^^^^ +Create directly your masks using your favorite image editor. You just have to draw the regions you want to extract +with a different color for each label. + +Using VGG Image Annotator (VIA) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +`VGG Image Annotator (VIA) `_ is an image annotation tool that can be +used to define regions in an image and create textual descriptions of those regions. You can either use it +`online `_ or +`download the application `_. + +From the exported annotations (in JSON format), you'll have to generate the corresponding image masks. +See the :ref:`ref_via` in the ``via`` module. + +When assigning attributes to your annotated regions, you should favour attributes of type "dropdown", "checkbox" +and "radio" and avoid "text" type in order to ease the parsing of the exported file (avoid typos and formatting errors). + + + diff --git a/doc/start/index.rst b/doc/start/index.rst index 208f598..e48554d 100644 --- a/doc/start/index.rst +++ b/doc/start/index.rst @@ -3,5 +3,6 @@ Quickstart .. toctree:: install + annotating training demo \ No newline at end of file diff --git a/environment.yml b/environment.yml index 05f572f..5f84ccf 100644 --- a/environment.yml +++ b/environment.yml @@ -21,4 +21,5 @@ dependencies: - sphinx-autodoc-typehints==1.3.0 - sphinx-rtd-theme==0.4.1 - sphinxcontrib-bibtex==0.4.0 + - "--editable=git+https://github.com/solivr/taputapu.git@master#egg=taputapu" diff --git a/setup.py b/setup.py index aca9532..e132523 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,10 @@ 'scikit-learn', 'opencv-python', 'tqdm', + 'taputapu' + ], + dependency_links=[ + 'git+ssh://git@github.com/solivr/taputapu.git#egg=taputapu' ], extras_require={ 'doc': [ From 665af9929e8388952e16d6c59acbe5b87ac7dcde Mon Sep 17 00:00:00 2001 From: soliveir Date: Mon, 11 Feb 2019 16:18:28 +0100 Subject: [PATCH 37/57] doc formatting --- dh_segment/io/PAGE.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py index 2d62c98..2df94c0 100644 --- a/dh_segment/io/PAGE.py +++ b/dh_segment/io/PAGE.py @@ -332,6 +332,7 @@ def __init__(self, id: str=None, coords: List[Point]=None, text_lines: List[Text def sort_text_lines(self, top_to_bottom: bool=True) -> None: """ Sorts ``TextLine``s from top to bottom according to their mean y coordinate (centroid) + :param top_to_bottom: order lines from top to bottom of image, default=True """ if top_to_bottom: From 84ec4ddd989049ec9faf91686d5d0207c5e9e7a1 Mon Sep 17 00:00:00 2001 From: soliveir Date: Tue, 4 Dec 2018 14:41:01 +0100 Subject: [PATCH 38/57] parse attributes of TextRegion and TextLines 'custom' and 'type' --- dh_segment/io/PAGE.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py index 2df94c0..c17257e 100644 --- a/dh_segment/io/PAGE.py +++ b/dh_segment/io/PAGE.py @@ -160,12 +160,15 @@ class Region(BaseElement): :ivar id: identifier of the `Region` :ivar coords: coordinates of the `Region` + :ivar custom_attribute: Any custom attribute that may be linked with the region + (usually this is added in PAGEXML files, not in JSON files) """ tag = 'Region' - def __init__(self, id: str=None, coords: List[Point]=None): + def __init__(self, id: str=None, coords: List[Point]=None, custom_attribute: str=None): self.coords = coords if coords is not None else [] self.id = id + self.custom_attribute = custom_attribute if custom_attribute is not None else '' @classmethod def from_xml(cls, etree_element: ET.Element) -> dict: @@ -175,6 +178,7 @@ def from_xml(cls, etree_element: ET.Element) -> dict: :return: a dictionary with keys 'id' and 'coords' """ return {'id': etree_element.attrib.get('id'), + 'custom_attribute': etree_element.attrib.get('custom'), 'coords': Point.list_from_xml(etree_element.find('p:Coords', _ns))} def to_xml(self, name_element: str=None) -> ET.Element: @@ -185,6 +189,7 @@ def to_xml(self, name_element: str=None) -> ET.Element: """ et = ET.Element(name_element if name_element is not None else '') et.set('id', self.id if self.id is not None else '') + et.set('custom', self.custom_attribute if self.custom_attribute is not None else '') if not not self.coords: coords = ET.SubElement(et, 'Coords') coords.set('points', Point.list_point_to_string(self.coords)) @@ -209,6 +214,7 @@ def from_dict(cls, dictionary: dict) -> dict: :return: non serialized dictionary """ return {'id': dictionary.get('id'), + 'custom_attribute': dictionary.get('custom_attribute'), 'coords': Point.list_to_point(dictionary.get('coords')) } @@ -222,13 +228,14 @@ class TextLine(Region): :ivar text: `Text` class containing the transcription of the `TextLine` :ivar line_group_id: identifier of the line group the instance belongs to :ivar column_group_id: identifier of the column group the instance belongs to - + :ivar custom_attribute: Any custom attribute that may be linked with the region + (usually this is added in PAGEXML files, not in JSON files) """ tag = 'TextLine' def __init__(self, id: str = None, coords: List[Point] = None, baseline: List[Point] = None, text: Text = None, - line_group_id: str = None, column_group_id: str = None): - super().__init__(id=id if id is not None else str(uuid4()), coords=coords) + line_group_id: str = None, column_group_id: str = None, custom_attribute: str=None): + super().__init__(id=id if id is not None else str(uuid4()), coords=coords, custom_attribute=custom_attribute) self.baseline = baseline if baseline is not None else [] self.text = text if text is not None else Text() self.line_group_id = line_group_id if line_group_id is not None else '' @@ -321,13 +328,18 @@ class TextRegion(Region): :ivar coords: coordinates of the `TextRegion` :ivar text_equiv: the resulting text of the `Text` contained in the `TextLines` :ivar text_lines: a list of `TextLine` objects + :ivar region_type: the type of a TextRegion (can be any string). Example : header, paragraph, page-number... + :ivar custom_attribute: Any custom attribute that may be linked with the region + (usually this is added in PAGEXML files, not in JSON files) """ tag = 'TextRegion' - def __init__(self, id: str=None, coords: List[Point]=None, text_lines: List[TextLine]=None, text_equiv: str=''): - super().__init__(id=id, coords=coords) + def __init__(self, id: str=None, coords: List[Point]=None, text_lines: List[TextLine]=None, text_equiv: str='', + region_type: str=None, custom_attribute: str=None): + super().__init__(id=id, coords=coords, custom_attribute=custom_attribute) self.text_equiv = text_equiv if text_equiv is not None else '' self.text_lines = text_lines if text_lines is not None else [] + self.type = region_type if region_type is not None else '' def sort_text_lines(self, top_to_bottom: bool=True) -> None: """ @@ -346,11 +358,13 @@ def from_xml(cls, e: ET.Element) -> 'TextRegion': return TextRegion( **super().from_xml(e), text_lines=[TextLine.from_xml(tl) for tl in e.findall('p:TextLine', _ns)], - text_equiv=_get_text_equiv(e) + text_equiv=_get_text_equiv(e), + region_type=e.attrib.get('type') ) def to_xml(self, name_element='TextRegion') -> ET.Element: text_et = super().to_xml(name_element=name_element) + text_et.set('type', self.type if self.type is not None else '') for tl in self.text_lines: text_et.append(tl.to_xml()) text_equiv = ET.SubElement(text_et, 'TextEquiv') @@ -366,7 +380,8 @@ def to_dict(self, non_serializable_keys: List[str]=list()): def from_dict(cls, dictionary: dict) -> 'TextRegion': return cls(**super().from_dict(dictionary), text_lines=[TextLine.from_dict(tl) for tl in dictionary.get('text_lines', list())], - text_equiv=dictionary.get('text_equiv') + text_equiv=dictionary.get('text_equiv'), + region_type=dictionary.get('region_type') ) From 77bb4f32cb5163501a45af5f650d06b2d771531a Mon Sep 17 00:00:00 2001 From: soliveir Date: Mon, 11 Feb 2019 18:16:20 +0100 Subject: [PATCH 39/57] remove git repo dependency --- dh_segment/io/via.py | 10 ++++++++-- environment.yml | 3 +-- setup.py | 5 +---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/dh_segment/io/via.py b/dh_segment/io/via.py index 8d34fe4..7443a98 100644 --- a/dh_segment/io/via.py +++ b/dh_segment/io/via.py @@ -13,10 +13,10 @@ from collections import namedtuple from imageio import imsave, imread import requests +from PIL import Image from itertools import filterfalse, chain from typing import List, Tuple, Dict import cv2 -from taputapu.io.image import get_image_shape_without_loading # To define before using the corresponding functions @@ -156,13 +156,19 @@ def _formatting(name_id: str) -> str: name_id = re.sub('.png\d*', '.png', name_id) return name_id + def _get_image_shape_without_loading(filename: str) -> Tuple[int, int]: + image = Image.open(filename) + shape = image.size + image.close() + return shape + working_items = list() for key, v in tqdm(via_annotations.items()): filename = _formatting(key) absolute_filename = os.path.join(images_dir, filename) - shape_image = get_image_shape_without_loading(absolute_filename) + shape_image = _get_image_shape_without_loading(absolute_filename) regions = v['regions'] diff --git a/environment.yml b/environment.yml index 5f84ccf..c0dfa1e 100644 --- a/environment.yml +++ b/environment.yml @@ -13,6 +13,7 @@ dependencies: - setuptools=39.1.0 - shapely=1.6.4 - tqdm=4.23.3 + - requests=2.21.0 - pip: - better-exceptions==0.2.1 - sacred==0.7.3 @@ -21,5 +22,3 @@ dependencies: - sphinx-autodoc-typehints==1.3.0 - sphinx-rtd-theme==0.4.1 - sphinxcontrib-bibtex==0.4.0 - - "--editable=git+https://github.com/solivr/taputapu.git@master#egg=taputapu" - diff --git a/setup.py b/setup.py index e132523..fd956a5 100644 --- a/setup.py +++ b/setup.py @@ -21,10 +21,7 @@ 'scikit-learn', 'opencv-python', 'tqdm', - 'taputapu' - ], - dependency_links=[ - 'git+ssh://git@github.com/solivr/taputapu.git#egg=taputapu' + 'requests==2.21.0', ], extras_require={ 'doc': [ From 909e8b1779c592a6b497ec2b168f0307dbfb13bb Mon Sep 17 00:00:00 2001 From: soliveir Date: Wed, 13 Feb 2019 17:36:24 +0100 Subject: [PATCH 40/57] corrected wrong argument names --- train.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/train.py b/train.py index 69bae23..4c7d759 100644 --- a/train.py +++ b/train.py @@ -75,18 +75,17 @@ def run(train_data, eval_data, model_output_dir, gpu, training_params, _config): def get_dirs_or_files(input_data): if os.path.isdir(input_data): - train_input, train_labels_input = os.path.join(input_data, 'images'), os.path.join(input_data, 'labels') + image_input, labels_input = os.path.join(input_data, 'images'), os.path.join(input_data, 'labels') # Check if training dir exists - if not os.path.isdir(train_input): - raise FileNotFoundError(train_input) - if not os.path.isdir(train_labels_input): - raise FileNotFoundError(train_labels_input) - elif os.path.isfile(train_data) and train_data.endswith('.csv'): - train_input = train_data - train_labels_input = None + assert os.path.isdir(image_input), "{} is not a directory".format(image_input) + assert os.path.isdir(labels_input), "{} is not a directory".format(labels_input) + + elif os.path.isfile(input_data) and input_data.endswith('.csv'): + image_input = input_data + labels_input = None else: raise TypeError('input_data {} is neither a directory nor a csv file'.format(input_data)) - return train_input, train_labels_input + return image_input, labels_input train_input, train_labels_input = get_dirs_or_files(train_data) if eval_data is not None: From 67173326e2128607dd62ccded347740fce6ed85d Mon Sep 17 00:00:00 2001 From: soliveir Date: Wed, 13 Feb 2019 18:08:36 +0100 Subject: [PATCH 41/57] wrong variable name --- dh_segment/io/input.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dh_segment/io/input.py b/dh_segment/io/input.py index 453ca2d..dd3bd7e 100644 --- a/dh_segment/io/input.py +++ b/dh_segment/io/input.py @@ -161,9 +161,9 @@ def _assign_color_to_class_id(input_image, label_image): if not os.path.exists(img_filename): raise FileNotFoundError(img_filename) if has_labelled_data: - for img_filename in input_image_filenames: - if not os.path.exists(img_filename): - raise FileNotFoundError(img_filename) + for label_filename in label_image_filenames: + if not os.path.exists(label_filename): + raise FileNotFoundError(label_filename) # Tensorflow input_fn def fn(): From 704087a733b9d30df5e79dfe10a5942e62a6dd45 Mon Sep 17 00:00:00 2001 From: soliveir Date: Tue, 12 Feb 2019 09:24:59 +0100 Subject: [PATCH 42/57] via example and doc formatting --- dh_segment/inference/loader.py | 29 +++++++++++++++-------------- dh_segment/io/PAGE.py | 2 +- doc/start/annotating.rst | 29 +++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/dh_segment/inference/loader.py b/dh_segment/inference/loader.py index 537ffe6..4949673 100644 --- a/dh_segment/inference/loader.py +++ b/dh_segment/inference/loader.py @@ -63,21 +63,22 @@ def __init__(self, model_base_dir, predict_mode='filename', num_parallel_predict def predict(self, input_tensor, prediction_key=None): """ - Performs the prediction from the loaded model according to the prediction mode. + Performs the prediction from the loaded model according to the prediction mode. \n Prediction modes: -+-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ -| `prediction_mode` | `input_tensor` | Output prediction dictionnary | Comment | -+=============================+===============================================+======================================+===================================================================================================+ -| `filename` | Single filename string | `labels`, `probs`, `original_shape` | Loads the image, resizes it, and predicts | -+-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ -| `filename_original_shape` | Single filename string | `labels`, `probs` | Loads the image, resizes it, predicts and scale the output to the original resolution of the file | -+-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ -| `image` | Single input image [1,H,W,3] float32 (0..255) | `labels`, `probs`, `original_shape` | Resizes the image, and predicts | -+-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ -| `image_original_shape` | Single input image [1,H,W,3] float32 (0..255) | `labels`, `probs` | Resizes the image, predicts, and scale the output to the original resolution of the input | -+-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ -| `image_resized` | Single input image [1,H,W,3] float32 (0..255) | `labels`, `probs` | Predicts from the image input directly | -+-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ + + +-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ + | `prediction_mode` | `input_tensor` | Output prediction dictionnary | Comment | + +=============================+===============================================+======================================+===================================================================================================+ + | `filename` | Single filename string | `labels`, `probs`, `original_shape` | Loads the image, resizes it, and predicts | + +-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ + | `filename_original_shape` | Single filename string | `labels`, `probs` | Loads the image, resizes it, predicts and scale the output to the original resolution of the file | + +-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ + | `image` | Single input image [1,H,W,3] float32 (0..255) | `labels`, `probs`, `original_shape` | Resizes the image, and predicts | + +-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ + | `image_original_shape` | Single input image [1,H,W,3] float32 (0..255) | `labels`, `probs` | Resizes the image, predicts, and scale the output to the original resolution of the input | + +-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ + | `image_resized` | Single input image [1,H,W,3] float32 (0..255) | `labels`, `probs` | Predicts from the image input directly | + +-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+ :param input_tensor: a single input whose format should match the prediction mode :param prediction_key: if not `None`, will returns the value of the corresponding key of the output dictionnary \ diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py index c17257e..a51b6ee 100644 --- a/dh_segment/io/PAGE.py +++ b/dh_segment/io/PAGE.py @@ -343,7 +343,7 @@ def __init__(self, id: str=None, coords: List[Point]=None, text_lines: List[Text def sort_text_lines(self, top_to_bottom: bool=True) -> None: """ - Sorts ``TextLine``s from top to bottom according to their mean y coordinate (centroid) + Sorts ``TextLine`` from top to bottom according to their mean y coordinate (centroid) :param top_to_bottom: order lines from top to bottom of image, default=True """ diff --git a/doc/start/annotating.rst b/doc/start/annotating.rst index 519e06b..b07cfd9 100644 --- a/doc/start/annotating.rst +++ b/doc/start/annotating.rst @@ -19,5 +19,34 @@ See the :ref:`ref_via` in the ``via`` module. When assigning attributes to your annotated regions, you should favour attributes of type "dropdown", "checkbox" and "radio" and avoid "text" type in order to ease the parsing of the exported file (avoid typos and formatting errors). +**Example of how to create individual masks from VIA annotation file** +.. code:: python + + from dh_segment.io import via + + collection = 'mycollection' + annotation_file = 'via_sample.json' + masks_dir = '/home/project/generated_masks' + images_dir = './my_images' + + # Load all the data in the annotation file + # (the file may be an exported project or an export of the annotations) + via_data = via.load_annotation_data(annotation_file) + + # In the case of an exported project file, you can set ``only_img_annotations=True`` + # to get only the image annotations + via_annotations = via.load_annotation_data(annotation_file, only_img_annotations=True) + + # Collect the annotated regions + working_items = via.collect_working_items(via_annotations, collection, images_dir) + + # Collect the attributes and options + if '_via_attributes' in via_data.keys(): + list_attributes = via.parse_via_attributes(via_data['_via_attributes']) + else: + list_attributes = via.get_via_attributes(via_annotations) + + # Create one mask per option per attribute + via.create_masks(masks_dir, working_items, via_attributes, collection) From 04ce8b6db9a3fef3840c7fbbb8e65950851a3355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alix=20Chagu=C3=A9?= <33317799+alix-tz@users.noreply.github.com> Date: Wed, 20 Feb 2019 17:36:10 +0100 Subject: [PATCH 43/57] Correcting typo masks creation script --- doc/start/annotating.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/start/annotating.rst b/doc/start/annotating.rst index b07cfd9..8ea11ed 100644 --- a/doc/start/annotating.rst +++ b/doc/start/annotating.rst @@ -48,5 +48,5 @@ and "radio" and avoid "text" type in order to ease the parsing of the exported f list_attributes = via.get_via_attributes(via_annotations) # Create one mask per option per attribute - via.create_masks(masks_dir, working_items, via_attributes, collection) + via.create_masks(masks_dir, working_items, list_attributes, collection) From 1262b5940aae9554873f709e23a67cd3c9be7f70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alix=20Chagu=C3=A9?= <33317799+alix-tz@users.noreply.github.com> Date: Tue, 26 Feb 2019 15:15:07 +0100 Subject: [PATCH 44/57] Fixing instruction --- doc/start/demo.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/start/demo.rst b/doc/start/demo.rst index e342bb4..0a4c68d 100644 --- a/doc/start/demo.rst +++ b/doc/start/demo.rst @@ -16,7 +16,7 @@ In order to limit memory usage, the images in the dataset we provide have been d git clone https://github.com/dhlab-epfl/dhSegment.git 1. Get the annotated dataset `here`_, which already contains the folders ``images`` and ``labels`` -for training, validation and testing set. Unzip it into ``model/pages``. :: +for training, validation and testing set. Unzip it into ``demo/pages``. :: cd demo/ wget https://github.com/dhlab-epfl/dhSegment/releases/download/v0.2/pages.zip From 6fdfcbdff5612dca50860b03ff6267495fc68b13 Mon Sep 17 00:00:00 2001 From: soliveir Date: Thu, 7 Mar 2019 15:20:05 +0100 Subject: [PATCH 45/57] do not export attribute 'type' if it's empty --- dh_segment/io/PAGE.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py index a51b6ee..e849a23 100644 --- a/dh_segment/io/PAGE.py +++ b/dh_segment/io/PAGE.py @@ -364,7 +364,8 @@ def from_xml(cls, e: ET.Element) -> 'TextRegion': def to_xml(self, name_element='TextRegion') -> ET.Element: text_et = super().to_xml(name_element=name_element) - text_et.set('type', self.type if self.type is not None else '') + if self.type is not None and self.type != '': + text_et.set('type', self.type) for tl in self.text_lines: text_et.append(tl.to_xml()) text_equiv = ET.SubElement(text_et, 'TextEquiv') @@ -680,7 +681,7 @@ def _write_xml(): root.append(self.to_xml()) for k, v in _attribs.items(): root.attrib[k] = v - ET.ElementTree(element=root).write(filename) + ET.ElementTree(element=root).write(filename, encoding='utf-8') def _write_json(): self_dict = vars(self) From 8fbd882e640b263028a91dd5519175aaee82e457 Mon Sep 17 00:00:00 2001 From: soliveir Date: Mon, 25 Feb 2019 12:22:31 +0100 Subject: [PATCH 46/57] array to list of Point method --- dh_segment/io/PAGE.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py index e849a23..a374a2e 100644 --- a/dh_segment/io/PAGE.py +++ b/dh_segment/io/PAGE.py @@ -99,12 +99,21 @@ def array_to_list(cls, array: np.ndarray) -> list: """ return [list(pt) for pt in array] + @classmethod + def array_to_point(cls, array: np.ndarray) -> list: + """Converts an `np.array` to a list of `Point` + + :param array: an array of coordinates. Must be of shape (N, 2) + :return: list of `Point` + """ + return cls.list_to_point(list(array)) + @classmethod def list_to_point(cls, list_coords: list) -> List['Point']: """Converts a list of coordinates to a list of `Point` :param list_coords: list of coordinates, shape (N, 2) - :return: list of Points + :return: list of `Point` """ return [cls(coord[1], coord[0]) for coord in list_coords if list_coords] From 2af56f21ec4a9a1461dcdfd9ddabba24ac9e64bc Mon Sep 17 00:00:00 2001 From: soliveir Date: Tue, 12 Mar 2019 15:14:46 +0100 Subject: [PATCH 47/57] update parsing + get list of tags from xml --- dh_segment/io/PAGE.py | 29 +++++++++++++++++++++++++++-- dh_segment/io/input.py | 2 ++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py index a374a2e..35258e4 100644 --- a/dh_segment/io/PAGE.py +++ b/dh_segment/io/PAGE.py @@ -8,6 +8,7 @@ from uuid import uuid4 from shapely.geometry import Polygon from abc import ABC +import re # https://docs.python.org/3.5/library/xml.etree.elementtree.html#parsing-xml-with-namespaces _ns = {'p': 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15'} @@ -54,6 +55,9 @@ def list_from_xml(cls, etree_elem: ET.Element) -> List['Point']: if etree_elem is None: # print('warning, trying to construct list of points from None, defaulting to []') return [] + if etree_elem.attrib['points'] == "": + # print('warning, trying to construct list of points from empty string, defaulting to []') + return [] t = etree_elem.attrib['points'] result = [] for p in t.split(' '): @@ -454,8 +458,8 @@ class SeparatorRegion(Region): tag = 'SeparatorRegion' - def __init__(self, id: str, coords: List[Point]=None): - super().__init__(id=id, coords=coords) + def __init__(self, id: str, coords: List[Point]=None, custom_attribute: str=None): + super().__init__(id=id, coords=coords, custom_attribute=custom_attribute) @classmethod def from_xml(cls, e: ET.Element) -> 'SeparatorRegion': @@ -1042,3 +1046,24 @@ def save_baselines(filename, baselines, ratio=(1, 1), initial_shape=None): image_height=int(initial_shape[0]*ratio[0]) if initial_shape is not None else None, image_width=int(initial_shape[1]*ratio[1]) if initial_shape is not None else None) page.write_to_file(filename) + + +def get_unique_tags_from_xml_text_regions(xml_filename: str, + tag_pattern: str='{type:.*;}'): + """ + Get a list of all the values of labels/tags + + :param xml_filename: filename of the xml file + :param tag_pattern: regular expression pattern to look for in `TextRegion.custom_attribute` + :return: + """ + tagset = list() + page = parse_file(xml_filename) + for tr in page.text_regions: + custom_attribute = tr.custom_attribute + matches = re.findall(tag_pattern, custom_attribute) + assert len(matches) <= 1, "Found multiple matches in {}".format(custom_attribute) + if matches: + tagset.append(matches[0][6:-2]) + + return list(np.unique(tagset)) diff --git a/dh_segment/io/input.py b/dh_segment/io/input.py index dd3bd7e..3f29627 100644 --- a/dh_segment/io/input.py +++ b/dh_segment/io/input.py @@ -193,6 +193,8 @@ def fn(): if make_patches and input_label_dir: base_shape_images = list(training_params.patch_shape) + elif make_patches and input_case == InputCase.INPUT_CSV: + base_shape_images = list(training_params.patch_shape) else: base_shape_images = [-1, -1] # Pad things From 8deae4477b1a8d7ad2d1d856f9308ac53c3675c6 Mon Sep 17 00:00:00 2001 From: soliveir Date: Fri, 8 Mar 2019 17:08:42 +0100 Subject: [PATCH 48/57] miou metric weights in IOU evaluation --- dh_segment/estimator_fn.py | 10 ++++++++-- dh_segment/utils/params_config.py | 3 ++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/dh_segment/estimator_fn.py b/dh_segment/estimator_fn.py index d25303b..83050fa 100644 --- a/dh_segment/estimator_fn.py +++ b/dh_segment/estimator_fn.py @@ -207,14 +207,20 @@ def _fn(_in): # ---------- if mode == tf.estimator.ModeKeys.EVAL: if prediction_type == PredictionType.CLASSIFICATION: - metrics = {'eval/accuracy': tf.metrics.accuracy(labels, predictions=prediction_labels)} + metrics = { + 'eval/accuracy': tf.metrics.accuracy(labels, predictions=prediction_labels), + 'eval/mIOU': tf.metrics.mean_iou(labels, prediction_labels, num_classes=model_params.n_classes,) + # weights=tf.cast(training_params.weights_evaluation_miou, tf.float32)) + } elif prediction_type == PredictionType.REGRESSION: metrics = {'eval/accuracy': tf.metrics.mean_squared_error(labels, predictions=prediction_labels)} elif prediction_type == PredictionType.MULTILABEL: metrics = {'eval/MSE': tf.metrics.mean_squared_error(tf.cast(labels, tf.float32), predictions=prediction_probs), 'eval/accuracy': tf.metrics.accuracy(tf.cast(labels, tf.bool), - predictions=tf.cast(prediction_labels, tf.bool)) + predictions=tf.cast(prediction_labels, tf.bool)), + 'eval/mIOU': tf.metrics.mean_iou(labels, prediction_labels, num_classes=model_params.n_classes) + # weights=training_params.weights_evaluation_miou) } else: metrics = None diff --git a/dh_segment/utils/params_config.py b/dh_segment/utils/params_config.py index e926b06..313f40c 100644 --- a/dh_segment/utils/params_config.py +++ b/dh_segment/utils/params_config.py @@ -208,6 +208,7 @@ def __init__(self, **kwargs): self.patch_shape = kwargs.get('patch_shape', (300, 300)) self.input_resized_size = int(kwargs.get('input_resized_size', 72e4)) # (600*1200) self.weights_labels = kwargs.get('weights_labels') + self.weights_evaluation_miou = kwargs.get('weights_evaluation_miou', None) self.training_margin = kwargs.get('training_margin', 16) self.local_entropy_ratio = kwargs.get('local_entropy_ratio', 0.) self.local_entropy_sigma = kwargs.get('local_entropy_sigma', 3) @@ -216,4 +217,4 @@ def __init__(self, **kwargs): def check_params(self) -> None: """Checks if there is no parameter inconsistency """ - assert self.training_margin*2 < min(self.patch_shape) \ No newline at end of file + assert self.training_margin*2 < min(self.patch_shape) From 540eb36563da5c7721339b55db2ce27eb31a5997 Mon Sep 17 00:00:00 2001 From: soliveir Date: Thu, 4 Apr 2019 17:45:22 +0200 Subject: [PATCH 49/57] to_json method for Page class --- dh_segment/io/PAGE.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py index 35258e4..5214735 100644 --- a/dh_segment/io/PAGE.py +++ b/dh_segment/io/PAGE.py @@ -676,6 +676,14 @@ def to_xml(self) -> ET.Element: # page_et.append(self.metadata.to_xml()) return page_et + def to_json(self) -> dict: + self_dict = vars(self) + + serializable_keys = ['image_filename', 'image_height', 'image_width'] + json_dict = json_serialize(self_dict, [k for k in self_dict.keys() if k not in serializable_keys]) + + return json_dict + def write_to_file(self, filename: str, creator_name: str='dhSegment', comments: str='') -> None: """ Export Page object to json or page-xml format. Will assume the format based on the extension of the filename, @@ -697,14 +705,8 @@ def _write_xml(): ET.ElementTree(element=root).write(filename, encoding='utf-8') def _write_json(): - self_dict = vars(self) - - # json_dict = dict() - serializable_keys = ['image_filename', 'image_height', 'image_width'] - json_dict = json_serialize(self_dict, [k for k in self_dict.keys() if k not in serializable_keys]) - with open(filename, 'w', encoding='utf8') as file: - json.dump(json_dict, file, indent=4, sort_keys=True, allow_nan=False) + json.dump(self.to_json(), file, indent=4, sort_keys=True, allow_nan=False) # Updating metadata self.metadata.creator = creator_name From 605a930b5a6b7bd6c603abc7e8e4b7b16eb81c5c Mon Sep 17 00:00:00 2001 From: soliveir Date: Tue, 9 Apr 2019 16:14:37 +0200 Subject: [PATCH 50/57] updated via helpers --- dh_segment/io/via.py | 102 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 95 insertions(+), 7 deletions(-) diff --git a/dh_segment/io/via.py b/dh_segment/io/via.py index 7443a98..a42e85a 100644 --- a/dh_segment/io/via.py +++ b/dh_segment/io/via.py @@ -17,6 +17,7 @@ from itertools import filterfalse, chain from typing import List, Tuple, Dict import cv2 +from . import PAGE # To define before using the corresponding functions @@ -302,22 +303,25 @@ def _getimage_from_iiif(url, user, pwd): imsave(outfile, img_resized.astype(np.uint8)) -def load_annotation_data(via_data_filename: str, only_img_annotations: bool=False) -> dict: +def load_annotation_data(via_data_filename: str, only_img_annotations: bool=False, via_version: int=2) -> dict: """ Load the content of via annotation files. :param via_data_filename: via annotations json file :param only_img_annotations: load only the images annotations ('_via_img_metadata' field) + :param via_version: :return: the content of json file containing the region annotated """ - with open(via_data_filename, 'r') as f: + with open(via_data_filename, 'r', encoding='utf8') as f: content = json.load(f) + if via_version == 2: + assert '_via_img_metadata' in content.keys(), "The file is not a valid VIA project export." - assert '_via_img_metadata' in content.keys(), "The file is not a valid VIA project export." - - if only_img_annotations: - return content['_via_img_metadata'] + if only_img_annotations: + return content['_via_img_metadata'] + else: + return content else: return content @@ -330,7 +334,7 @@ def export_annotation_dict(annotation_dict: dict, filename: str) -> None: :param filename: filename to export the data (json file) :return: """ - with open(filename, 'w') as f: + with open(filename, 'w', encoding='utf8') as f: json.dump(annotation_dict, f) @@ -566,6 +570,43 @@ def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, lab return annotation_summary +def _get_coordinates_from_xywh(via_regions: List[dict]) -> List[np.array]: + """ + From VIA region dictionaries, get the coordinates array (N,2) of the annotations + + :param via_regions: + :return: + """ + list_coordinates_regions = list() + for region in via_regions: + shape_attributes_dict = region['shape_attributes'] + if shape_attributes_dict['name'] == 'rect': + x = shape_attributes_dict['x'] + y = shape_attributes_dict['y'] + w = shape_attributes_dict['width'] + h = shape_attributes_dict['height'] + + coordinates = np.array([[x, y], + [x + w, y], + [x + w, y + h], + [x, y + h] + ]) + list_coordinates_regions.append(coordinates) + elif shape_attributes_dict['name'] == 'polygon': + coordinates = np.stack([shape_attributes_dict['all_points_x'], + shape_attributes_dict['all_points_y']], axis=1) + list_coordinates_regions.append(coordinates) + elif shape_attributes_dict['name'] == 'polyline': + coordinates = np.stack([shape_attributes_dict['all_points_x'], + shape_attributes_dict['all_points_y']], axis=1) + list_coordinates_regions.append(coordinates) + else: + raise NotImplementedError( + "This method has not been implemenetd yet for {}".format(shape_attributes_dict['name'])) + + return list_coordinates_regions + + # EXPORT # ------ @@ -650,6 +691,53 @@ def create_via_annotation_single_image(img_filename: str, via_regions: List[dict return {via_key: via_annotation} +# PAGE CONVERSION +# --------------- + +def convert_via_region_page_text_region(working_item: WorkingItem, structure_label: str) -> PAGE.Page: + """ + + :param working_item: + :param structure_label: + :return: + """ + + # TODO : this is not yet generic because we're missing the automatic detection of the structure label + + region_coordinates = _get_coordinates_from_xywh(working_item.annotations) + + page = PAGE.Page(image_filename=working_item.image_name + 'jpg', + image_width=working_item.original_x, + image_height=working_item.original_y, + graphic_regions=[ + PAGE.TextRegion(coords=PAGE.Point.array_to_point(coords), + custom_attribute='structure{{type:{};}}'.format(structure_label)) + for coords in region_coordinates]) + return page + + +# def convert_via_region_to_text_region(via_regions: List[dict], structure_label: str) -> PAGE.TextRegion: +# """ +# +# :param via_region: +# :param structure_label: +# :return: +# """ +# +# # TODO : this is not yet generic because we're missing the automatic detection of the structure label +# +# region_coordinates = _get_coordinates_from_xywh(working_item.annotations) +# +# page = PAGE.Page(image_filename=working_item.image_name + 'jpg', +# image_width=working_item.original_x, +# image_height=working_item.original_y, +# graphic_regions=[ +# PAGE.TextRegion(coords=PAGE.Point.array_to_point(coords), +# custom_attribute='structure{{type:{};}}'.format(structure_label)) +# for coords in region_coordinates]) +# return page + + """ Example of usage From 6456a69699a642cc370ed8f3d375272385a9818f Mon Sep 17 00:00:00 2001 From: soliveir Date: Tue, 9 Apr 2019 16:15:08 +0200 Subject: [PATCH 51/57] update packages version --- environment.yml | 32 ++++++++++++++++---------------- setup.py | 22 +++++++++++----------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/environment.yml b/environment.yml index ba91049..f032a88 100644 --- a/environment.yml +++ b/environment.yml @@ -2,24 +2,24 @@ name: dh_segment channels: - defaults dependencies: - - imageio=2.3.0 - - opencv=3.4.1 - - numpy=1.14.5 - - pandas=0.23.0 - - pillow=5.1.0 + - imageio=2.5.0 + - numpy=1.16.2 + - pandas=0.24.2 + - pillow=5.4.1 - python=3.6 - - scikit-image=0.13.1 - - scikit-learn=0.19.1 - - scipy=1.1.0 - - setuptools=39.1.0 + - scikit-image=0.14.2 + - scikit-learn=0.20.3 + - scipy=1.2.1 + - setuptools=40.8.0 - shapely=1.6.4 - - tqdm=4.23.3 + - tensorflow-gpu==1.13.1 + - tqdm=4.31.1 - requests=2.21.0 - pip: - better-exceptions==0.2.1 - - sacred==0.7.3 - - tensorflow-gpu==1.11 - - sphinx==1.8.1 - - sphinx-autodoc-typehints==1.3.0 - - sphinx-rtd-theme==0.4.1 - - sphinxcontrib-bibtex==0.4.0 + - opencv-python==4.0.1.23 + - sacred==0.7.4 + - sphinx + - sphinx-autodoc-typehints + - sphinx-rtd-theme + - sphinxcontrib-bibtex diff --git a/setup.py b/setup.py index b4670aa..79d54ca 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup(name='dh_segment', - version='0.3', + version='0.4.0', license='GPL', url='https://github.com/dhlab-epfl/dhSegment', description='Generic framework for historical document processing', @@ -12,17 +12,17 @@ 'Source Code': 'https://github.com/dhlab-epfl/dhSegment' }, install_requires=[ - 'tensorflow-gpu==1.11', - 'numpy==1.14.5', - 'imageio==2.3.0', - 'pandas==0.23.0', - 'scipy==1.1.0', + 'tensorflow-gpu==1.13', + 'numpy==1.16.2', + 'imageio==2.5.0', + 'pandas==0.24.2', + 'scipy==1.2.1', 'shapely==1.6.4', - 'scikit-learn==0.19.1', - 'scikit-image==0.13.1', - 'opencv-python==3.4.1.15', - 'tqdm==4.23.3', - 'sacred==0.7.3', + 'scikit-learn==0.20.3', + 'scikit-image==0.15.0', + 'opencv-python==4.0.1.23', + 'tqdm==4.31.1', + 'sacred==0.7.4', 'requests==2.21.0' ], extras_require={ From a07244222a2cad19fb5ff6f6ea0a2b3d979c400c Mon Sep 17 00:00:00 2001 From: soliveir Date: Tue, 9 Apr 2019 17:15:11 +0200 Subject: [PATCH 52/57] update to opencv 4.0 --- dh_segment/post_processing/boxes_detection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dh_segment/post_processing/boxes_detection.py b/dh_segment/post_processing/boxes_detection.py index 04ce858..8a12d08 100644 --- a/dh_segment/post_processing/boxes_detection.py +++ b/dh_segment/post_processing/boxes_detection.py @@ -25,7 +25,7 @@ def find_boxes(boxes_mask: np.ndarray, mode: str= 'min_rectangle', min_area: flo assert len(boxes_mask.shape) == 2, \ 'Input mask must be a 2D array ! Mask is now of shape {}'.format(boxes_mask.shape) - _, contours, _ = cv2.findContours(boxes_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + contours, _ = cv2.findContours(boxes_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if contours is None: print('No contour found') return None From fbad361b643d5817db2951ac9e26b7f4a20c42b8 Mon Sep 17 00:00:00 2001 From: soliveir Date: Tue, 9 Apr 2019 16:15:19 +0200 Subject: [PATCH 53/57] changelog --- doc/changelog.rst | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/doc/changelog.rst b/doc/changelog.rst index 0ecfd19..bc7bc83 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -2,4 +2,32 @@ Changelog ========= -TBC \ No newline at end of file +Unreleased +---------- + +0.4.0 - 2019-04-10 +------------------ +Added +^^^^^ + +* Input data can be a .csv file with format ``,``. +* ``dh_segment.io.via`` helper functions to generate/export groundtruth from/to VGG Image Annotation tool. +* ``Point.array_to_point`` to export a ``np.array`` into a list of ``Point``. +* PAGEXML Regions can now contain a custom attribute (Transkribus output of region annotation) +* ``Page.to_json()`` method for json formatting. + +Changed +^^^^^^^ + +* ``tensorflow`` v1.13 and ``opencv`` v4.0 are now used. +* mIOU metric for evaluation during training (instead of accuracy). +* TextLines are sorted according to their mean `y` coordinate when exported. + +Fixed +^^^^^ + +* Variable names typos in ``input.py`` and ``train.py``. +* Documentation of the quickstart demo. + +Removed +^^^^^^^ From 9de5ca791c940b1656f2a5d5e102451572ed8021 Mon Sep 17 00:00:00 2001 From: soliveir Date: Wed, 10 Apr 2019 17:41:55 +0200 Subject: [PATCH 54/57] fix tensorflow-gpu version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 79d54ca..0d57b37 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ 'Source Code': 'https://github.com/dhlab-epfl/dhSegment' }, install_requires=[ - 'tensorflow-gpu==1.13', + 'tensorflow-gpu==1.13.1', 'numpy==1.16.2', 'imageio==2.5.0', 'pandas==0.24.2', From 875c54710c3cc6d08d006bc491f75cfcbc3538c1 Mon Sep 17 00:00:00 2001 From: soliveir Date: Wed, 15 May 2019 11:50:13 +0200 Subject: [PATCH 55/57] fixes #37 --- demo.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/demo.py b/demo.py index d5df073..2984946 100644 --- a/demo.py +++ b/demo.py @@ -89,13 +89,16 @@ def format_quad_to_string(quad): cv2.polylines(original_img, [pred_page_coords[:, None, :]], True, (0, 0, 255), thickness=5) # Write corners points into a .txt file txt_coordinates += '{},{}\n'.format(filename, format_quad_to_string(pred_page_coords)) + + # Create page region and XML file + page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :])) else: print('No box found in {}'.format(filename)) + page_border = PAGE.Border() + basename = os.path.basename(filename).split('.')[0] imsave(os.path.join(output_dir, '{}_boxes.jpg'.format(basename)), original_img) - # Create page region and XML file - page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :])) page_xml = PAGE.Page(image_filename=filename, image_width=original_shape[1], image_height=original_shape[0], page_border=page_border) xml_filename = os.path.join(output_pagexml_dir, '{}.xml'.format(basename)) From de461a7924df338eb602d6e8531b62054f265e5a Mon Sep 17 00:00:00 2001 From: Benoit Seguin Date: Wed, 22 May 2019 14:57:03 +0200 Subject: [PATCH 56/57] working version corrected --- dh_segment/estimator_fn.py | 4 +- dh_segment_train.py => dh_segment/train.py | 268 ++++++++++----------- dh_segment/utils/params_config.py | 16 +- dh_segment_train | 7 + general_config.json | 18 +- setup.py | 2 +- 6 files changed, 159 insertions(+), 156 deletions(-) rename dh_segment_train.py => dh_segment/train.py (94%) create mode 100644 dh_segment_train diff --git a/dh_segment/estimator_fn.py b/dh_segment/estimator_fn.py index d863d86..37d92bc 100644 --- a/dh_segment/estimator_fn.py +++ b/dh_segment/estimator_fn.py @@ -18,9 +18,9 @@ def model_fn(mode, features, labels, params): mode='SYMMETRIC', name='mirror_padding') encoder_class = model_params.get_encoder() - encoder = encoder_class(**model_params.encoder_params) + encoder = encoder_class(**model_params.encoder_network_params) decoder_class = model_params.get_decoder() - decoder = decoder_class(**model_params.decoder_params) + decoder = decoder_class(**model_params.decoder_network_params) is_training = (mode == tf.estimator.ModeKeys.TRAIN) feature_maps = encoder(input_images, is_training=is_training) diff --git a/dh_segment_train.py b/dh_segment/train.py similarity index 94% rename from dh_segment_train.py rename to dh_segment/train.py index e3b0bf6..234023d 100644 --- a/dh_segment_train.py +++ b/dh_segment/train.py @@ -1,135 +1,133 @@ -#!/usr/bin/env python - -import os -import tensorflow as tf -# Tensorflow logging level -from logging import WARNING # import DEBUG, INFO, ERROR for more/less verbosity - -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # or any {'0', '1', '2'} -tf.logging.set_verbosity(WARNING) -from dh_segment import estimator_fn, utils -from dh_segment.io import input -import json - -try: - import better_exceptions -except ImportError: - print('/!\ W -- Not able to import package better_exceptions') - pass -from tqdm import trange -from sacred import Experiment - -ex = Experiment('dhSegment_experiment') - - -@ex.config -def default_config(): - train_data = None # Directory with training data - eval_data = None # Directory with validation data - model_output_dir = None # Directory to output tf model - restore_model = False # Set to true to continue training - classes_file = None # txt file with classes values (unused for REGRESSION) - gpu = '' # GPU to be used for training - prediction_type = utils.PredictionType.CLASSIFICATION # One of CLASSIFICATION, REGRESSION or MULTILABEL - model_params = utils.ModelParams().to_dict() # Model parameters - training_params = utils.TrainingParams().to_dict() # Training parameters - if prediction_type == utils.PredictionType.CLASSIFICATION: - assert classes_file is not None - model_params['n_classes'] = utils.get_n_classes_from_file(classes_file) - elif prediction_type == utils.PredictionType.REGRESSION: - model_params['n_classes'] = 1 - elif prediction_type == utils.PredictionType.MULTILABEL: - assert classes_file is not None - model_params['n_classes'] = utils.get_n_classes_from_file_multilabel(classes_file) - - -@ex.automain -def run(train_data, eval_data, model_output_dir, gpu, training_params, _config): - # Create output directory - if not os.path.isdir(model_output_dir): - os.makedirs(model_output_dir) - else: - assert _config.get('restore_model'), \ - '{0} already exists, you cannot use it as output directory. ' \ - 'Set "restore_model=True" to continue training, or delete dir "rm -r {0}"'.format(model_output_dir) - # Save config - with open(os.path.join(model_output_dir, 'config.json'), 'w') as f: - json.dump(_config, f, indent=4, sort_keys=True) - - # Create export directory for saved models - saved_model_dir = os.path.join(model_output_dir, 'export') - if not os.path.isdir(saved_model_dir): - os.makedirs(saved_model_dir) - - training_params = utils.TrainingParams.from_dict(training_params) - - session_config = tf.ConfigProto() - session_config.gpu_options.visible_device_list = str(gpu) - session_config.gpu_options.per_process_gpu_memory_fraction = 0.9 - estimator_config = tf.estimator.RunConfig().replace(session_config=session_config, - save_summary_steps=10, - keep_checkpoint_max=1) - estimator = tf.estimator.Estimator(estimator_fn.model_fn, model_dir=model_output_dir, - params=_config, config=estimator_config) - - def get_dirs_or_files(input_data): - if os.path.isdir(input_data): - image_input, labels_input = os.path.join(input_data, 'images'), os.path.join(input_data, 'labels') - # Check if training dir exists - assert os.path.isdir(image_input), "{} is not a directory".format(image_input) - assert os.path.isdir(labels_input), "{} is not a directory".format(labels_input) - - elif os.path.isfile(input_data) and input_data.endswith('.csv'): - image_input = input_data - labels_input = None - else: - raise TypeError('input_data {} is neither a directory nor a csv file'.format(input_data)) - return image_input, labels_input - - train_input, train_labels_input = get_dirs_or_files(train_data) - if eval_data is not None: - eval_input, eval_labels_input = get_dirs_or_files(eval_data) - - # Configure exporter - serving_input_fn = io.input.serving_input_filename(training_params.input_resized_size) - exporter = tf.estimator.BestExporter(serving_input_receiver_fn=serving_input_fn, exports_to_keep=2) - - #if eval_data is not None: - # exporter = tf.estimator.BestExporter(serving_input_receiver_fn=serving_input_fn, exports_to_keep=2) - #else: - # exporter = tf.estimator.LatestExporter(name='SimpleExporter', serving_input_receiver_fn=serving_input_fn, - # exports_to_keep=5) - - nb_cores = os.cpu_count() - if nb_cores: - num_threads = min(nb_cores//2, 16) - else: - num_threads = 4 - - for i in trange(0, training_params.n_epochs, training_params.evaluate_every_epoch, desc='Evaluated epochs'): - estimator.train(io.input.input_fn(train_input, - input_label_dir=train_labels_input, - num_epochs=training_params.evaluate_every_epoch, - batch_size=training_params.batch_size, - data_augmentation=training_params.data_augmentation, - make_patches=training_params.make_patches, - image_summaries=True, - params=_config, - num_threads=num_threads, - progressbar_description="Training".format(i))) - - if eval_data is not None: - eval_result = estimator.evaluate(io.input.input_fn(eval_input, - input_label_dir=eval_labels_input, - batch_size=1, - data_augmentation=False, - make_patches=False, - image_summaries=False, - params=_config, - num_threads=num_threads, - progressbar_description="Evaluation")) - else: - eval_result = None - - exporter.export(estimator, saved_model_dir, checkpoint_path=None, eval_result=eval_result, - is_the_final_export=False) +import os +import tensorflow as tf +# Tensorflow logging level +from logging import WARNING # import DEBUG, INFO, ERROR for more/less verbosity + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # or any {'0', '1', '2'} +tf.logging.set_verbosity(WARNING) +from dh_segment import estimator_fn, utils +from dh_segment.io import input +import json + +try: + import better_exceptions +except ImportError: + print('/!\ W -- Not able to import package better_exceptions') + pass +from tqdm import trange +from sacred import Experiment + +ex = Experiment('dhSegment_experiment') + + +@ex.config +def default_config(): + train_data = None # Directory with training data + eval_data = None # Directory with validation data + model_output_dir = None # Directory to output tf model + restore_model = False # Set to true to continue training + classes_file = None # txt file with classes values (unused for REGRESSION) + gpu = '' # GPU to be used for training + prediction_type = utils.PredictionType.CLASSIFICATION # One of CLASSIFICATION, REGRESSION or MULTILABEL + model_params = utils.ModelParams().to_dict() # Model parameters + training_params = utils.TrainingParams().to_dict() # Training parameters + if prediction_type == utils.PredictionType.CLASSIFICATION: + assert classes_file is not None + model_params['n_classes'] = utils.get_n_classes_from_file(classes_file) + elif prediction_type == utils.PredictionType.REGRESSION: + model_params['n_classes'] = 1 + elif prediction_type == utils.PredictionType.MULTILABEL: + assert classes_file is not None + model_params['n_classes'] = utils.get_n_classes_from_file_multilabel(classes_file) + + +@ex.main +def run(train_data, eval_data, model_output_dir, gpu, training_params, _config): + # Create output directory + if not os.path.isdir(model_output_dir): + os.makedirs(model_output_dir) + else: + assert _config.get('restore_model'), \ + '{0} already exists, you cannot use it as output directory. ' \ + 'Set "restore_model=True" to continue training, or delete dir "rm -r {0}"'.format(model_output_dir) + # Save config + with open(os.path.join(model_output_dir, 'config.json'), 'w') as f: + json.dump(_config, f, indent=4, sort_keys=True) + + # Create export directory for saved models + saved_model_dir = os.path.join(model_output_dir, 'export') + if not os.path.isdir(saved_model_dir): + os.makedirs(saved_model_dir) + + training_params = utils.TrainingParams.from_dict(training_params) + + session_config = tf.ConfigProto() + session_config.gpu_options.visible_device_list = str(gpu) + session_config.gpu_options.per_process_gpu_memory_fraction = 0.9 + estimator_config = tf.estimator.RunConfig().replace(session_config=session_config, + save_summary_steps=10, + keep_checkpoint_max=1) + estimator = tf.estimator.Estimator(estimator_fn.model_fn, model_dir=model_output_dir, + params=_config, config=estimator_config) + + def get_dirs_or_files(input_data): + if os.path.isdir(input_data): + image_input, labels_input = os.path.join(input_data, 'images'), os.path.join(input_data, 'labels') + # Check if training dir exists + assert os.path.isdir(image_input), "{} is not a directory".format(image_input) + assert os.path.isdir(labels_input), "{} is not a directory".format(labels_input) + + elif os.path.isfile(input_data) and input_data.endswith('.csv'): + image_input = input_data + labels_input = None + else: + raise TypeError('input_data {} is neither a directory nor a csv file'.format(input_data)) + return image_input, labels_input + + train_input, train_labels_input = get_dirs_or_files(train_data) + if eval_data is not None: + eval_input, eval_labels_input = get_dirs_or_files(eval_data) + + # Configure exporter + serving_input_fn = input.serving_input_filename(training_params.input_resized_size) + exporter = tf.estimator.BestExporter(serving_input_receiver_fn=serving_input_fn, exports_to_keep=2) + + #if eval_data is not None: + # exporter = tf.estimator.BestExporter(serving_input_receiver_fn=serving_input_fn, exports_to_keep=2) + #else: + # exporter = tf.estimator.LatestExporter(name='SimpleExporter', serving_input_receiver_fn=serving_input_fn, + # exports_to_keep=5) + + nb_cores = os.cpu_count() + if nb_cores: + num_threads = min(nb_cores//2, 16) + else: + num_threads = 4 + + for i in trange(0, training_params.n_epochs, training_params.evaluate_every_epoch, desc='Evaluated epochs'): + estimator.train(input.input_fn(train_input, + input_label_dir=train_labels_input, + num_epochs=training_params.evaluate_every_epoch, + batch_size=training_params.batch_size, + data_augmentation=training_params.data_augmentation, + make_patches=training_params.make_patches, + image_summaries=True, + params=_config, + num_threads=num_threads, + progressbar_description="Training".format(i))) + + if eval_data is not None: + eval_result = estimator.evaluate(input.input_fn(eval_input, + input_label_dir=eval_labels_input, + batch_size=1, + data_augmentation=False, + make_patches=False, + image_summaries=False, + params=_config, + num_threads=num_threads, + progressbar_description="Evaluation")) + else: + eval_result = None + + exporter.export(estimator, saved_model_dir, checkpoint_path=None, eval_result=eval_result, + is_the_final_export=False) diff --git a/dh_segment/utils/params_config.py b/dh_segment/utils/params_config.py index a91d340..6ba245f 100644 --- a/dh_segment/utils/params_config.py +++ b/dh_segment/utils/params_config.py @@ -4,7 +4,7 @@ from .misc import get_class_from_name from ..network.model import Encoder, Decoder -from typing import Type +from typing import Type, Optional class PredictionType: @@ -58,23 +58,25 @@ class ModelParams(BaseParams): :param n_classes: """ def __init__(self, **kwargs): - self.encoder_name = kwargs.get('encoder_name', 'dh_segment.network.pretrained_models.ResnetV1_50') # type: str - self.encoder_params = kwargs.get('encoder_params', dict()) # type: dict - self.decoder_name = kwargs.get('decoder_name', 'dh_segment.network.SimpleDecoder') # type: str - self.decoder_params = kwargs.get('decoder_params', { + self.encoder_network = kwargs.get('encoder_network', 'dh_segment.network.pretrained_models.ResnetV1_50') # type: str + self.encoder_network_params = kwargs.get('encoder_network_params', dict()) # type: dict + self.decoder_network = kwargs.get('decoder_network', 'dh_segment.network.SimpleDecoder') # type: str + self.decoder_network_params = kwargs.get('decoder_network_params', { 'upsampling_dims': [32, 64, 128, 256, 512] }) # type: dict + self.full_network = kwargs.get('full_network', None) # type: Optional[str] + self.full_network_params = kwargs.get('full_network_params', dict()) # type: dict self.n_classes = kwargs.get('n_classes', None) # type: int self.check_params() def get_encoder(self) -> Type[Encoder]: - encoder = get_class_from_name(self.encoder_name) + encoder = get_class_from_name(self.encoder_network) assert issubclass(encoder, Encoder), "{} is not an Encoder".format(encoder) return encoder def get_decoder(self) -> Type[Decoder]: - decoder = get_class_from_name(self.decoder_name) + decoder = get_class_from_name(self.decoder_network) assert issubclass(decoder, Decoder), "{} is not a Decoder".format(decoder) return decoder diff --git a/dh_segment_train b/dh_segment_train new file mode 100644 index 0000000..6beaefd --- /dev/null +++ b/dh_segment_train @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +from dh_segment.train import ex +import sys + +if __name__ == '__main__': + ex.run_commandline(sys.argv+["--force"]) diff --git a/general_config.json b/general_config.json index 3101094..a49a268 100644 --- a/general_config.json +++ b/general_config.json @@ -14,17 +14,13 @@ "evaluate_every_epoch" : 10 }, "model_params": { - "batch_norm": true, - "batch_renorm": true, - "selected_levels_upscaling": [ - true, - true, - true, - true, - true - ] + "encoder_network_params": { + "weight_decay": 1e-6 + } }, - "pretrained_model_name" : "resnet50", "prediction_type": "CLASSIFICATION", - "gpu" : "0" + "train_data" : "", + "eval_data" : "", + "classes_file" : "/classes.txt", + "model_output_dir" : "" } \ No newline at end of file diff --git a/setup.py b/setup.py index 6aaf4f5..cc444cb 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ }, scripts=['dh_segment_train'], install_requires=[ - 'tensorflow-gpu==1.13.1', + #'tensorflow-gpu==1.13.1', 'numpy==1.16.2', 'imageio==2.5.0', 'pandas==0.24.2', From 1b36fca67c3ef6ca2019e2e7c1ad80255d6e65e6 Mon Sep 17 00:00:00 2001 From: soliveir Date: Fri, 26 Jul 2019 16:31:36 +0200 Subject: [PATCH 57/57] formatting --- dh_segment/train.py | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/dh_segment/train.py b/dh_segment/train.py index 234023d..3211d74 100644 --- a/dh_segment/train.py +++ b/dh_segment/train.py @@ -8,12 +8,6 @@ from dh_segment import estimator_fn, utils from dh_segment.io import input import json - -try: - import better_exceptions -except ImportError: - print('/!\ W -- Not able to import package better_exceptions') - pass from tqdm import trange from sacred import Experiment @@ -106,26 +100,26 @@ def get_dirs_or_files(input_data): for i in trange(0, training_params.n_epochs, training_params.evaluate_every_epoch, desc='Evaluated epochs'): estimator.train(input.input_fn(train_input, - input_label_dir=train_labels_input, - num_epochs=training_params.evaluate_every_epoch, - batch_size=training_params.batch_size, - data_augmentation=training_params.data_augmentation, - make_patches=training_params.make_patches, - image_summaries=True, - params=_config, - num_threads=num_threads, - progressbar_description="Training".format(i))) + input_label_dir=train_labels_input, + num_epochs=training_params.evaluate_every_epoch, + batch_size=training_params.batch_size, + data_augmentation=training_params.data_augmentation, + make_patches=training_params.make_patches, + image_summaries=True, + params=_config, + num_threads=num_threads, + progressbar_description="Training".format(i))) if eval_data is not None: eval_result = estimator.evaluate(input.input_fn(eval_input, - input_label_dir=eval_labels_input, - batch_size=1, - data_augmentation=False, - make_patches=False, - image_summaries=False, - params=_config, - num_threads=num_threads, - progressbar_description="Evaluation")) + input_label_dir=eval_labels_input, + batch_size=1, + data_augmentation=False, + make_patches=False, + image_summaries=False, + params=_config, + num_threads=num_threads, + progressbar_description="Evaluation")) else: eval_result = None