junyanz · botcs · Jul 19, 2017 · Jul 20, 2017 · NataliaDiaz · Aug 7, 2020
diff --git a/data/base_dataset.py b/data/base_dataset.py
@@ -32,8 +32,12 @@ def get_transform(opt):
         transform_list.append(transforms.RandomHorizontalFlip())
 
     transform_list += [transforms.ToTensor(),
-                       transforms.Normalize((0.5, 0.5, 0.5),
-                                            (0.5, 0.5, 0.5))]
+                       # this is wrong! because the fake samples are not normalized like this, 
+                       # still they are inferred on the same network, 
+                       #transforms.Normalize((0.5, 0.5, 0.5), 
+                       #                     (0.5, 0.5, 0.5)) 
+                       lambda x: (x - x.min()) / x.max() * 2 - 1, # [-1., 1.]
+                       ]
     return transforms.Compose(transform_list)
 
 def __scale_width(img, target_width):

diff --git a/models/cycle_gan_model.py b/models/cycle_gan_model.py
@@ -199,6 +199,20 @@ def get_current_visuals(self):
             return OrderedDict([('real_A', real_A), ('fake_B', fake_B), ('rec_A', rec_A),
                                 ('real_B', real_B), ('fake_A', fake_A), ('rec_B', rec_B)])
 
+    def forward_external(self, x, direction):
+        isBatch = x.size(0) > 1
+        if direction == 'AtoB':
+            real_A = Variable(x, volatile=True)
+            fake_B = self.netG_A.forward(real_A)
+            return util.tensor2im(fake_B.data, batch=isBatch)
+        elif direction == 'BtoA':
+            real_B = Variable(x, volatile=True)
+            fake_A = self.netG_B.forward(real_B)
+            return util.tensor2im(fake_A.data, batch=isBatch)
+
+        raise ValueError('`direction must` be "AtoB" or "BtoA"')
+
+
     def save(self, label):
         self.save_network(self.netG_A, 'G_A', label, self.gpu_ids)
         self.save_network(self.netD_A, 'D_A', label, self.gpu_ids)

diff --git a/models/networks.py b/models/networks.py
@@ -131,6 +131,20 @@ def __call__(self, input, target_is_real):
 # downsampling/upsampling operations.
 # Code and idea originally from Justin Johnson's architecture.
 # https://github.com/jcjohnson/fast-neural-style/
+
+class Printer(nn.Module):
+    def __init__(self, text='', only_size=True):
+        super(Printer, self).__init__()
+        self.only_size = only_size
+        self.text = text
+    def forward(self, x):
+        print(self.text, end=' ')
+        if self.only_size:
+            print(x.size())
+        else:
+            print(x)
+        return x
+
 class ResnetGenerator(nn.Module):
     def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, gpu_ids=[], padding_type='reflect'):
         assert(n_blocks >= 0)
@@ -149,21 +163,40 @@ def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_d
         for i in range(n_downsampling):
             mult = 2**i
             model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3,
-                                stride=2, padding=1),
+                                stride=1, padding=1),
+                      nn.MaxPool2d(2),
                       norm_layer(ngf * mult * 2),
-                      nn.ReLU(True)]
+                      nn.ReLU(True),
+                      #Printer('downsample %d'%mult)
+            ]
+            # model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3,
+            #                     stride=2, padding=1),
+            #           norm_layer(ngf * mult * 2),
+            #           nn.ReLU(True),
+            #           Printer('downsample %d'%mult)]
 
         mult = 2**n_downsampling
         for i in range(n_blocks):
             model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout)]
 
         for i in range(n_downsampling):
             mult = 2**(n_downsampling - i)
-            model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2),
-                                         kernel_size=3, stride=2,
-                                         padding=1, output_padding=1),
-                      norm_layer(int(ngf * mult / 2)),
-                      nn.ReLU(True)]
+            model += [
+                nn.UpsamplingBilinear2d(scale_factor=2),
+                # nn.Upsample(scale_factor=2, mode='nearest'),
+                nn.Conv2d(ngf * mult, int(ngf * mult / 2), 3, padding=1),
+                norm_layer(int(ngf * mult / 2)),
+                nn.ReLU(True),
+                #Printer('upsample %d'%mult)
+            ]
+
+            # model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2),
+            #                              kernel_size=3, stride=2,
+            #                              padding=1, output_padding=1),
+            #           norm_layer(int(ngf * mult / 2)),
+            #           nn.ReLU(True)]
+            #
+
         model += [nn.ReflectionPad2d(3)]
         model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
         model += [nn.Tanh()]

diff --git a/options/test_options.py b/options/test_options.py
@@ -4,6 +4,7 @@
 class TestOptions(BaseOptions):
     def initialize(self):
         BaseOptions.initialize(self)
+        self.parser.add_argument('--input_video', type=str, help='input video path')
         self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.')
         self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
         self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')

diff --git a/test-video.py b/test-video.py
@@ -0,0 +1,76 @@
+import cv2
+import time
+import os
+import sys
+import torch as th
+from PIL import Image
+from torchvision import transforms
+from options.test_options import TestOptions
+from data.data_loader import CreateDataLoader
+from models.models import create_model
+from util.visualizer import Visualizer
+from pdb import set_trace as st
+from util import html
+
+
+opt = TestOptions().parse()
+opt.nThreads = 1   # test code only supports nThreads = 1
+opt.batchSize = 1  # test code only supports batchSize = 1
+opt.serial_batches = True  # no shuffle
+opt.no_flip = True  # no flip
+
+# video
+print(opt.input_video)
+video_capture = cv2.VideoCapture(opt.input_video)
+W = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
+H = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
+W, H = 640, 480
+#W, H = 128, 128
+#W, H = 256, 256
+length = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
+fourcc = cv2.VideoWriter_fourcc(*'XVID')
+out_video = cv2.VideoWriter(opt.name+'.avi', fourcc, 20.0, (W, H))
+
+
+model = create_model(opt)
+BUFFER = 14
+# test
+it = 0
+while True:
+    it += 1
+    t = time.time()
+    x = []
+    for b in range(BUFFER):
+        ret, frame = video_capture.read()
+        if not ret:
+            break
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        img = Image.fromarray(frame)
+
+
+
+        T = transforms.Compose([
+            transforms.Scale([W, H]), 
+            transforms.ToTensor(),
+            #lambda x: x * 2. - 1.
+        ])
+        x += [T(img)[None]]
+    if len(x) == 0: break
+    x = th.cat(x, 0)
+    if opt.gpu_ids[0] > -1:
+        x = x.cuda(opt.gpu_ids[0])
+    y = -model.forward_external(x, 'BtoA')
+    for frame in y:
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        out_video.write(frame)
+
+    print('processed frame... %4d   FPS: %5.2f,' % (
+        it*BUFFER, BUFFER/(time.time()-t)))
+
+    if not ret:
+        break
+
+
+out_video.release()
+video_capture.release()
+print("Ended!")
diff --git a/util/util.py b/util/util.py
@@ -9,9 +9,13 @@
 
 # Converts a Tensor into a Numpy array
 # |imtype|: the desired type of the converted numpy array
-def tensor2im(image_tensor, imtype=np.uint8):
-    image_numpy = image_tensor[0].cpu().float().numpy()
-    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
+def tensor2im(image_tensor, imtype=np.uint8, batch=False):
+    if batch:
+        image_numpy = image_tensor.cpu().float().numpy()
+        image_numpy = (np.transpose(image_numpy, (0, 2, 3, 1)) + 1) / 2.0 * 255.0
+    else:
+        image_numpy = image_tensor[0].cpu().float().numpy()
+        image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
     return image_numpy.astype(imtype)