forked from waynehuu/edal-geospatial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess.py
191 lines (149 loc) · 6.33 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import os
from os.path import join as pathjoin
import rasterio
from rasterio.plot import reshape_as_image
import rasterio.mask
from rasterio.features import rasterize
import pandas as pd
import geopandas as gpd
from shapely.geometry import mapping, Point, Polygon, MultiPolygon
from shapely.ops import cascaded_union
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm
def crop_image(img, y, x, h, w):
"""
Crop the image with given top-left anchor and corresponding width & height
:param img: image to be cropped
:param y: height of anchor
:param x: width of anchor
:param h: height of the patch
:param w: width of the patch
:return:
"""
if len(img.shape) == 2:
return img[y:y+w, x:x+h]
else:
return img[y:y+w, x:x+h, :]
def make_grid(tile_size, patch_size, overlap=0):
"""
Extract patches at fixed locations. Output coordinates for Y,X as a list (not two lists)
:param tile_size: size of the tile (input image)
:param patch_size: size of the output patch
:param overlap: #overlapping pixels
:return:
"""
max_h = int(tile_size[0] - patch_size[0])
max_w = int(tile_size[1] - patch_size[1])
if max_h > 0 and max_w > 0:
h_step = int(np.ceil(tile_size[0] / (patch_size[0] - overlap)))
w_step = int(np.ceil(tile_size[1] / (patch_size[1] - overlap)))
else:
h_step = 1
w_step = 1
patch_grid_h = np.floor(np.linspace(0, max_h, h_step)).astype(np.int32)
patch_grid_w = np.floor(np.linspace(0, max_w, w_step)).astype(np.int32)
y, x = np.meshgrid(patch_grid_h, patch_grid_w)
return list(zip(y.flatten(), x.flatten()))
def patch_tile(rgb, gt, patch_size, pad=0, overlap=0):
"""
Extract the given rgb and gt tiles into patches
:param rgb:
:param gt:
:param patch_size: size of the patches, should be a tuple of (h, w)
:param pad: #pixels to be padded around each tile, should be either one element or four elements
:param overlap: #overlapping pixels between two patches in both vertical and horizontal direction
:return: rgb and gt patches as well as coordinates
"""
# rgb = misc_utils.load_file(rgb_file)
# gt = misc_utils.load_file(gt_file)[:, :, 0]
np.testing.assert_array_equal(rgb.shape[:2], gt.shape)
grid_list = make_grid(
np.array(rgb.shape[:2]) + 2 * pad, patch_size, overlap)
for y, x in grid_list:
rgb_patch = crop_image(
rgb, y, x, patch_size[0], patch_size[1])
gt_patch = crop_image(
gt, y, x, patch_size[0], patch_size[1])
yield rgb_patch, gt_patch, y, x
def convert_polygon(rowcol_polygon, transform):
"""
Convert polygons from geojson rowcol coordinates to pixel positions
:param rowcol_polygon: geojson polygon(s)
:param transform: affine.Affine object, read from geotiff meta
"""
polygon_points = []
for point in np.array(rowcol_polygon.exterior.coords):
# transform rowcol coords to geotiff crs, using reverse affine transformation
polygon_points.append(~transform * point)
return Polygon(polygon_points)
def rasterize_labels(labels, img_size):
"""
Draw rasterized labeled imagery based on corresponding geotiff image size.
:param labels: geopandas dataframe, must have 'geometry' column with Polygon objects
:img_size: corresponding geotiff image size
"""
new_polygons = []
for _, row in labels.iterrows():
if isinstance(row['geometry'], Polygon):
new_polygons.append(convert_polygon(
row['geometry'], img_meta['transform']))
elif isinstance(row['geometry'], MultiPolygon):
for poly in list(row['geometry']):
new_polygons.append(convert_polygon(
poly, img_meta['transform']))
else:
continue
return rasterize(shapes=new_polygons, out_shape=img_size)
def read_geotiff(geotiff_path):
"""Read geotiff, return reshaped image and metadata."""
with rasterio.open(geotiff_path, 'r') as src:
img = src.read()
img_meta = src.meta
return reshape_as_image(img), img_meta
def read_labels(labels_path, geotiff_crs):
"""Read geojson labels and convert projection, return geopandas dataframe."""
labels = gpd.read_file(labels_path)
labels = labels[labels.geometry.notnull()][labels.building == 'yes']
return labels.to_crs({'init': geotiff_crs['init']})
def make_dir_if_not_exists(path, return_path=False):
if not os.path.exists(path):
os.makedirs(path)
if return_path:
return path
def save_image(img, path, name):
make_dir_if_not_exists(path)
data = Image.fromarray(img.astype(np.uint8))
data.save(pathjoin(path, name))
if __name__ == '__main__':
patch_size = (5000, 5000)
train_folder = 'train_tier_1'
locations = [f for f in os.listdir(train_folder) if f != 'patches']
for i, loc in enumerate(locations):
print(f'Processing location {i}/{len(locations)}')
prefix = pathjoin(train_folder, loc)
tiles = [f for f in os.listdir(prefix) if os.path.isdir(
pathjoin(prefix, f)) and not f.endswith('labels')]
for tile in tqdm(tiles):
img_patches_dir = make_dir_if_not_exists(
pathjoin(train_folder, 'patches', 'rgb'), return_path=True)
gt_patches_dir = make_dir_if_not_exists(
pathjoin(train_folder, 'patches', 'gt'), return_path=True)
img, img_meta = read_geotiff(pathjoin(
train_folder, loc, tile, f'{tile}.tif'))
img_size = (img_meta['height'], img_meta['width'])
labels = read_labels(
pathjoin(train_folder, loc,
f'{tile}-labels', f'{tile}.geojson'),
img_meta['crs'])
gt = rasterize_labels(labels, img_size)
for img_patch, gt_patch, y, x in patch_tile(img, gt, patch_size):
img_patchname = f'{tile}_y{int(y)}x{int(x)}.png'
gt_patchname = f'{tile}_y{int(y)}x{int(x)}.png'
if len(np.unique(img_patch)) == 1 and np.unique(img_patch) == 0:
continue
save_image(img_patch, pathjoin(
img_patches_dir, loc), img_patchname)
save_image(gt_patch*255, pathjoin(
gt_patches_dir, loc), gt_patchname)