diff --git a/configs/attribute_predict_coarse/global_predictor_resnet_attr.py b/configs/attribute_predict_coarse/global_predictor_resnet_attr.py index 0cd68108c..6e9d79263 100644 --- a/configs/attribute_predict_coarse/global_predictor_resnet_attr.py +++ b/configs/attribute_predict_coarse/global_predictor_resnet_attr.py @@ -16,14 +16,18 @@ inter_channels=[2048, 4096], outchannels=4096), attr_predictor=dict( - type='AttrPredictor', inchannels=4096, outchannels=attribute_num), - loss_attr=dict( - type='BCEWithLogitsLoss', - ratio=1, - weight=None, - size_average=None, - reduce=None, - reduction='mean'), + type='AttrPredictor', + inchannels=4096, + outchannels=attribute_num, + loss_attr=dict( + type='BCEWithLogitsLoss', + ratio=1, + weight=None, + size_average=None, + reduce=None, + reduction='mean', + ), + ), pretrained='checkpoint/resnet50.pth') pooling = 'Global' diff --git a/configs/attribute_predict_coarse/global_predictor_vgg_attr.py b/configs/attribute_predict_coarse/global_predictor_vgg_attr.py index 46db612a0..5f492a974 100644 --- a/configs/attribute_predict_coarse/global_predictor_vgg_attr.py +++ b/configs/attribute_predict_coarse/global_predictor_vgg_attr.py @@ -15,14 +15,18 @@ inter_channels=[512, 4096], outchannels=4096), attr_predictor=dict( - type='AttrPredictor', inchannels=4096, outchannels=attribute_num), - loss_attr=dict( - type='BCEWithLogitsLoss', - ratio=1, - weight=None, - size_average=None, - reduce=None, - reduction='mean'), + type='AttrPredictor', + inchannels=4096, + outchannels=attribute_num, + loss_attr=dict( + type='BCEWithLogitsLoss', + ratio=1, + weight=None, + size_average=None, + reduce=None, + reduction='mean', + ), + ), pretrained='checkpoint/vgg16.pth') pooling = 'Global' diff --git a/configs/landmark_detect/landmark_detect_resnet.py b/configs/landmark_detect/landmark_detect_resnet.py index 4d7af0796..b3ffa2a95 100644 --- a/configs/landmark_detect/landmark_detect_resnet.py +++ b/configs/landmark_detect/landmark_detect_resnet.py @@ -7,7 +7,7 @@ model = dict( type='LandmarkDetector', - backbone=dict(type='ResNet', layer_setting='resnet50'), + backbone=dict(type='ResNet', setting='resnet50'), global_pool=dict( type='GlobalPooling', inplanes=(7, 7), diff --git a/demo/test_attr_predictor.py b/demo/test_attr_predictor.py index 2fc3f9566..49fb5da5b 100644 --- a/demo/test_attr_predictor.py +++ b/demo/test_attr_predictor.py @@ -40,7 +40,8 @@ def main(): img_tensor = get_img_tensor(args.input, args.use_cuda) # global attribute predictor will not use landmarks # just set a default value - landmark_tensor = torch.zeros(8) + # TODO: Add landmark demo support + landmark_tensor = torch.zeros(16).view(1, -1) cfg.model.pretrained = None model = build_predictor(cfg.model) load_checkpoint(model, args.checkpoint, map_location='cpu') diff --git a/demo/test_cate_attr_predictor.py b/demo/test_cate_attr_predictor.py index 54e057700..2da1e79e1 100644 --- a/demo/test_cate_attr_predictor.py +++ b/demo/test_cate_attr_predictor.py @@ -41,7 +41,8 @@ def main(): img_tensor = get_img_tensor(args.input, args.use_cuda) # global attribute predictor will not use landmarks # just set a default value - landmark_tensor = torch.zeros(8) + # TODO: Add landmark demo support + landmark_tensor = torch.zeros(16).view(1, -1) model = build_predictor(cfg.model) load_checkpoint(model, args.checkpoint, map_location='cpu') diff --git a/demo/test_landmark_detector.py b/demo/test_landmark_detector.py index fffde6511..e467527d5 100644 --- a/demo/test_landmark_detector.py +++ b/demo/test_landmark_detector.py @@ -45,7 +45,7 @@ def main(): args = parse_args() cfg = Config.fromfile(args.config) - + cfg.model.pretrained = None img_tensor, w, h = get_img_tensor(args.input, args.use_cuda, get_size=True) # build model and load checkpoint diff --git a/mmfashion/utils/image.py b/mmfashion/utils/image.py index 26f287065..70dbf8823 100644 --- a/mmfashion/utils/image.py +++ b/mmfashion/utils/image.py @@ -14,12 +14,13 @@ def get_img_tensor(img_path, use_cuda, get_size=False): img_size = (224, 224) # crop image to (224, 224) img.thumbnail(img_size, Image.ANTIALIAS) + w, h = img.size img = img.convert('RGB') normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ - transforms.RandomResizedCrop(img_size[0]), - transforms.RandomHorizontalFlip(), + transforms.Pad((max(h - w, 0)//2, + max(w - h, 0)//2), padding_mode='edge'), transforms.ToTensor(), normalize, ]) @@ -62,8 +63,9 @@ def show_img(img_tensor): def draw_landmarks(img_file, landmarks, r=2): img = Image.open(img_file) draw = ImageDraw.Draw(img) + w, h = img.size for i, lm in enumerate(landmarks): - x = lm[0] - y = lm[1] + x = lm[0] * (w / 224.) + y = lm[1] * (h / 224.) draw.ellipse([(x - r, y - r), (x + r, y + r)], fill=(255, 0, 0, 0)) img.show()