-
Notifications
You must be signed in to change notification settings - Fork 6
/
run_mvs.py
179 lines (149 loc) · 8.34 KB
/
run_mvs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import argparse
import numpy as np
import mvs_util
import config
from skimage import feature
from skimage.color import rgb2gray
from skimage import filters
from loss import Loss
import os
def optimize(base_dir, src_img, out_dir):
pts3D, rgb, idx = mvs_util.load_pts3d(os.path.join(base_dir, 'sparse/0/points3D.txt'))
poses, invposes, I_names, I = mvs_util.load_poses(os.path.join(base_dir, 'sparse/0/images.txt'), os.path.join(base_dir, 'images'))
cam = mvs_util.load_cameras(os.path.join(base_dir, 'sparse/0/cameras.txt'))
_, h, w, _ = I.shape
try: # TODO: test
cposeidx = I_names.index(src_img)
except ValueError:
print("ERROR! File %s does not exist in directory %d. Please make sure you entered the name correctly." % (src_img, os.path.abspath(base_dir)))
return
pts2D = mvs_util.world2view(pts3D, poses[cposeidx, :, :], cam.flatten()) # 2D Projection of 3D points
# Select only points visible in src_img
V = mvs_util.visibility4view(os.path.join(base_dir, 'sparse/0/images.txt'), cposeidx)
pts3D = pts3D[V, :]
pts2D = pts2D[V, :]
poses = torch.tensor(poses[np.r_[:cposeidx, cposeidx+1:poses.shape[0]], :, :], dtype=torch.float32, device=config.device) # poses of all images except src_img
I_invpose = torch.tensor(invposes[cposeidx, :, :], dtype=torch.float32, device=config.device) # inverse pose of src_img
cam = torch.tensor(cam, dtype=torch.float32, device=config.device)
# The data loading code assumes a single batch for now.
batch_sz = 1
P = torch.tensor(pts2D[:, :-1], dtype=torch.float32, device=config.device).unsqueeze(0).repeat(batch_sz, 1, 1) # label xy positions
D = torch.tensor(pts2D[:, -1], dtype=torch.float32, device=config.device).unsqueeze(0).repeat(batch_sz, 1) # label values
W = torch.zeros( D.shape ) # label data weights
# The smoothness weight at each pixel is initialized using the scaled gradient magnitude
# (To avoid negative weights, the actual weight is the negative exponent of the gradient. See below...)
S = torch.tensor( 6 * filters.sobel( rgb2gray(I[cposeidx, :, :, :]) ), dtype=torch.float32, device=config.device).unsqueeze(0).repeat(batch_sz, 1, 1)
S[:, [0, -1], :] = torch.max(S)
S[:, :, [0, -1]] = torch.max(S)
I_center = torch.tensor( I[cposeidx, :, :, :], dtype=torch.float32, device=config.device).unsqueeze(0).repeat(batch_sz, 1, 1, 1).permute(0, 3, 1, 2) / 255 # src_img
# multiview image set:
I = torch.tensor( I[np.r_[:cposeidx, cposeidx+1:I.shape[0]], :, :, :], dtype=torch.float32, device=config.device).unsqueeze(0).repeat(batch_sz, 1, 1, 1, 1).permute(0, 1, 4, 2, 3) / 255
step = 1
max_steps = (config.optim_xy + config.optim_disp + config.optim_dw + config.optim_smoothness) * config.num_iter * config.num_passes
li = []
def optim_param(optimizer):
nonlocal step
for iter in range(config.num_iter):
loss = Loss()
# Select a random set of views to project to
views = torch.randperm( I.shape[1] ).long()[:config.nviews_reproj]
I_rand = I[:, views, :, :, :]
poses_rand = poses[views, :, :]
optimizer.zero_grad()
sx = torch.exp(-S)
sy = torch.exp(-S)
l, o = loss.loss_mvs(P, D, torch.exp(-W), sx, sy, I_center, I_rand, poses_rand, I_invpose, cam,
torch.tensor(config.tile_max_pts), config.factor, config.loss_VGG, config.loss_grad, config.loss_smoothness)
l.backward()
optimizer_s.step()
li.append(l.item())
print(("%d/%d; Loss: %f" % (step, max_steps, l.item()) ))
step = step + 1
if config.log_progress or step >= max_steps:
if not os.path.exists(out_dir):
os.makedirs(out_dir)
np.save( os.path.join(out_dir, 'o_%d.npy' % step), o.cpu().detach().numpy().squeeze())
if step >= max_steps:
if not os.path.exists(out_dir):
os.makedirs(out_dir)
np.save( os.path.join(out_dir, 'loss.npy'), np.asarray(li) )
np.save( os.path.join(out_dir, '%.output.npy' % src_img), o.cpu().detach().numpy().squeeze())
for npass in range( config.num_passes ):
#
# Optimize Gradients
if config.optim_smoothness:
torch.cuda.empty_cache()
S.requires_grad = True
optimizer_s = optim.Adam([S], lr=config.learning_rate_smoothness * (0.9 ** npass) )
optim_param(optimizer_s)
# COLMAP points are usually very sparse.
# We densify the points by sampling along edges after the very first gradient pass.
if npass == 0:
P, D = mvs_util.densify_edge(P.squeeze(0), D.squeeze(0), o.squeeze(0), I_center.squeeze(0), 4)
P = P.unsqueeze(0).clone().detach()
D = D.unsqueeze(0).clone().detach()
W = torch.zeros( D.shape )
S.requires_grad=False
#
# Optimize Label Value (Depth/Disparity)
if config.optim_disp:
torch.cuda.empty_cache()
D.requires_grad = True
optimizer_d = optim.Adam([D], lr=config.learning_rate_disp * (0.9 ** npass))
optim_param(optimizer_d)
D.requires_grad = False
#
# Optimize Data Weights
if config.optim_dweights:
torch.cuda.empty_cache()
W.requires_grad = True
optimizer_w = optim.Adam([W], lr=learning_rate_dw * (0.8 ** npass))
optim_param(optimizer_w)
W.requires_grad = False
#
# Optimize Point Positions
if optim_xy:
torch.cuda.empty_cache()
P.requires_grad = True
optimizer_xy = optim.Adam([P], lr=learning_rate_xy * (0.75 ** npass))
optim_param(optimizer_xy)
P.requires_grad = False
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Optimize diffusion parameters for multi-view stereo')
parser.add_argument('--input_dir', help='Base COLMAP project directory with poses and camera parameters', required=True)
parser.add_argument('--src_img', help='Name of the image for which to compute depth', required=True)
parser.add_argument('--output_dir', help='Name of output directory for saving results', required=True)
parser.add_argument('--optim_xy', type=bool, default=config.optim_xy, help='Optimize label positions')
parser.add_argument('--optim_disp', type=bool, default=config.optim_disp, help='Optimize label value')
parser.add_argument('--optim_smoothness', type=bool, default=config.optim_smoothness, help='Optimize smoothness weights')
parser.add_argument('--optim_dw', type=bool, default=config.optim_dw, help='Optimize label data weights')
parser.add_argument('--nviews_reproj', type=int, default=config.nviews_reproj, help='Maximum number of views to use for reprojection loss')
parser.add_argument('--factor', type=int, default=config.factor, help='Scale factor')
parser.add_argument('--num_passes', type=int, default=config.num_passes, help='Number of optimization passes')
parser.add_argument('--num_iter', type=int, default=config.num_iter, help='Number of iterations in each optimization pass')
parser.add_argument('--log_progress', type=bool, default=config.log_progress, help='Log intermediate output')
args = parser.parse_args()
config.num_passes = args.num_passes
config.num_iter = args.num_iter
config.optim_xy = args.optim_xy
config.optim_disp = args.optim_disp
config.optim_dw = args.optim_dw
config.optim_smoothness = args.optim_smoothness
config.nview_reproj = args.nviews_reproj
config.factor = args.factor
config.log_progress = args.log_progress
if torch.cuda.is_available():
config.device = torch.device("cuda:0")
torch.cuda.empty_cache()
torch.set_default_tensor_type(torch.cuda.FloatTensor)
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
print("Running on the GPU")
else:
config.device = torch.device("cpu")
print("Running on the CPU")
optimize(args.input_dir, args.src_img, args.output_dir)