module.py

from __future__ import division
from ops import *
import tensorflow.contrib.layers as layers
import math

def conv_nn(input, dims1, dims2, size1, size2, k_size = 3):

    pp = tf.pad(input, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
    L1 = layers.conv2d(pp, dims1, [k_size, k_size], stride=[1, 1], padding='VALID', activation_fn=None)
    L1 = tf.nn.elu(L1)

    pp = tf.pad(L1, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
    L2 = layers.conv2d(pp, dims2, [k_size, k_size], stride=[1, 1], padding='VALID', activation_fn=None)
    L2 = tf.nn.elu(L2)
    L2 = tf.image.resize_nearest_neighbor(L2, (size1, size2))

    return L2

def encoder(input, reuse, name):
    with tf.variable_scope(name):
        if reuse:
            tf.get_variable_scope().reuse_variables()
        else:
            assert tf.get_variable_scope().reuse is False

        p = tf.pad(input, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
        CL1 = layers.conv2d(p, 32, [5, 5], stride=[1, 1], padding='VALID', activation_fn=None)
        CL1 = tf.nn.elu(CL1)  # 256 256 32

        p = tf.pad(CL1, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
        CL2 = layers.conv2d(p, 64, [3, 3], stride=[2, 2], padding='VALID', activation_fn=None)
        CL2 = tf.nn.elu(CL2)  # 128 128 64

        p = tf.pad(CL2, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
        CL3 = layers.conv2d(p, 64, [3, 3], stride=[1, 1], padding='VALID', activation_fn=None)
        CL3 = tf.nn.elu(CL3)  # 128 128 64

        p = tf.pad(CL3, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
        CL4 = layers.conv2d(p, 128, [3, 3], stride=[2, 2], padding='VALID', activation_fn=None)
        CL4 = tf.nn.elu(CL4)  # 64 64 128

        p = tf.pad(CL4, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
        CL5 = layers.conv2d(p, 128, [3, 3], stride=[1, 1], padding='VALID', activation_fn=None)
        CL5 = tf.nn.elu(CL5)  # 64 64 128

        p = tf.pad(CL5, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
        CL6 = layers.conv2d(p, 256, [3, 3], stride=[2, 2], padding='VALID', activation_fn=None)
        CL6 = tf.nn.elu(CL6)  # 32 32 128

        p = tf.pad(CL6, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
        DCL1 = layers.conv2d(p, 256, [3, 3], rate=2, stride=[1, 1], padding='VALID', activation_fn=None)
        DCL1 = tf.nn.elu(DCL1)
        p = tf.pad(DCL1, [[0, 0], [4, 4], [4, 4], [0, 0]], "REFLECT")
        DCL2 = layers.conv2d(p, 256, [3, 3], rate=4, stride=[1, 1], padding='VALID', activation_fn=None)
        DCL2 = tf.nn.elu(DCL2)
        p = tf.pad(DCL2, [[0, 0], [8, 8], [8, 8], [0, 0]], "REFLECT")
        DCL3 = layers.conv2d(p, 256, [3, 3], rate=8, stride=[1, 1], padding='VALID', activation_fn=None)
        DCL3 = tf.nn.elu(DCL3)
        p = tf.pad(DCL3, [[0, 0], [16, 16], [16, 16], [0, 0]], "REFLECT")
        DCL4 = layers.conv2d(p, 256, [3, 3], rate=16, stride=[1, 1], padding='VALID', activation_fn=None)
        DCL4 = tf.nn.elu(DCL4)  # 32 32 128

        return DCL4

def decoder(input, size1, size2, reuse, name):
    with tf.variable_scope(name):
        if reuse:
            tf.get_variable_scope().reuse_variables()
        else:
            assert tf.get_variable_scope().reuse is False

        DL1 = conv_nn(input, 128, 128, int(size1/4), int(size2/4))  # 64 64 128

        DL2 = conv_nn(DL1, 64, 64, int(size1/2), int(size2/2))  # 128 128 64

        DL3 = conv_nn(DL2, 32, 32, int(size1), int(size2))

        DL4 = conv_nn(DL3, 16, 16, int(size1), int(size2))

        LL2 = layers.conv2d(DL4, 3, [3, 3], stride=[1, 1], padding='SAME', activation_fn=None)  # 256 256 3
        LL2 = tf.clip_by_value(LL2, -1.0, 1.0)

        return LL2

def discriminator_G(input, reuse, name):
    with tf.variable_scope(name):
        # image is 256 x 256 x input_c_dim
        if reuse:
            tf.get_variable_scope().reuse_variables()
        else:
            assert tf.get_variable_scope().reuse is False

        p = tf.pad(input, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
        L1 = layers.conv2d(p, 64, [5, 5], stride=2, padding='VALID', activation_fn=None)
        #L1 = instance_norm(L1, 'di1')
        L1 = tf.nn.leaky_relu(L1)

        p = tf.pad(L1, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
        L2 = layers.conv2d(p, 128, [5, 5], stride=2, padding='VALID', activation_fn=None)
        #L2 = instance_norm(L2, 'di2')
        L2 = tf.nn.leaky_relu(L2)

        p = tf.pad(L2, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
        L3 = layers.conv2d(p, 256, [5, 5], stride=2, padding='VALID', activation_fn=None)
        #L3 = instance_norm(L3, 'di3')
        L3 = tf.nn.leaky_relu(L3)

        p = tf.pad(L3, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
        L4 = layers.conv2d(p, 256, [5, 5], stride=2, padding='VALID', activation_fn=None)
        #L4 = instance_norm(L4, 'di4')
        L4 = tf.nn.leaky_relu(L4)
        L4 = layers.flatten(L4)

        L5 = tf.layers.dense(L4, 1)

        return L5

def discriminator_L(input, reuse, name):
    with tf.variable_scope(name):
        # image is 256 x 256 x input_c_dim
        if reuse:
            tf.get_variable_scope().reuse_variables()
        else:
            assert tf.get_variable_scope().reuse is False

        p = tf.pad(input, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
        L1 = layers.conv2d(p, 64, [5, 5], stride=2, padding='VALID', activation_fn=None)
        #L1 = instance_norm(L1, 'di1l')
        L1 = tf.nn.leaky_relu(L1) # 32 32 64

        p = tf.pad(L1, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
        L2 = layers.conv2d(p, 128, [5, 5], stride=2, padding='VALID', activation_fn=None)
        #L2 = instance_norm(L2, 'di2l')
        L2 = tf.nn.leaky_relu(L2) # 16 16 128

        p = tf.pad(L2, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
        L3 = layers.conv2d(p, 256, [5, 5], stride=2, padding='VALID', activation_fn=None)
        #L3 = instance_norm(L3, 'di3l')
        L3 = tf.nn.leaky_relu(L3) # 8 8 256

        p = tf.pad(L3, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
        L4 = layers.conv2d(p, 512, [5, 5], stride=2, padding='VALID', activation_fn=None)
        #L4 = instance_norm(L4, 'di4l')
        L4 = tf.nn.leaky_relu(L4) # 4 4 512
        L4 = layers.flatten(L4)

        L5 = tf.layers.dense(L4, 1)

        return L5

def discriminator_red(input, reuse, name):
    with tf.variable_scope(name):
        # image is 256 x 256 x input_c_dim
        if reuse:
            tf.get_variable_scope().reuse_variables()
        else:
            assert tf.get_variable_scope().reuse is False

        L1 = convolution_SN(input, 64, 5, 2, 'l1')
        # L1 = instance_norm(L1, 'di1')
        L1 = tf.nn.leaky_relu(L1)

        L2 = convolution_SN(L1, 128, 5, 2, 'l2')
        # L2 = instance_norm(L2, 'di2')
        L2 = tf.nn.leaky_relu(L2)

        L3 = convolution_SN(L2, 256, 5, 2, 'l3')
        # L3 = instance_norm(L3, 'di3')
        L3 = tf.nn.leaky_relu(L3)

        L4 = convolution_SN(L3, 256, 5, 2, 'l4')
        # L4 = instance_norm(L4, 'di4')
        L4 = tf.nn.leaky_relu(L4)

        L5 = convolution_SN(L4, 256, 5, 2, 'l5')
        # L5 = instance_norm(L5, 'di5')
        L5 = tf.nn.leaky_relu(L5)

        L6 = convolution_SN(L5, 512, 5, 2, 'l6')
        # L6 = instance_norm(L6, 'di6')
        L6 = tf.nn.leaky_relu(L6)

        L7 = dense_RED_SN(L6, 'l7')

        return L7

def contextual_block(bg_in, fg_in, mask, k_size, lamda, name, stride=1):
    with tf.variable_scope(name):
        b, h, w, dims = [i.value for i in bg_in.get_shape()]
        temp = tf.image.resize_nearest_neighbor(mask, (h, w))
        temp = tf.expand_dims(temp[:, :, :, 0], 3) # b 128 128 1
        mask_r = tf.tile(temp, [1, 1, 1, dims]) # b 128 128 128
        bg = bg_in * mask_r

        kn = int((k_size - 1) / 2)
        c = 0
        for p in range(kn, h - kn, stride):
            for q in range(kn, w - kn, stride):
                c += 1

        patch1 = tf.extract_image_patches(bg, [1, k_size, k_size, 1], [1, stride, stride, 1], [1, 1, 1, 1], 'VALID')

        patch1 = tf.reshape(patch1, (b, 1, c, k_size*k_size*dims))
        patch1 = tf.reshape(patch1, (b, 1, 1, c, k_size * k_size * dims))
        patch1 = tf.transpose(patch1, [0, 1, 2, 4, 3])

        patch2 = tf.extract_image_patches(fg_in, [1,k_size,k_size,1], [1,1,1,1], [1,1,1,1], 'SAME')
        ACL = []

        for ib in range(b):

            k1 = patch1[ib, :, :, :, :]
            k1d = tf.reduce_sum(tf.square(k1), axis=2)
            k2 = tf.reshape(k1, (k_size, k_size, dims, c))
            ww = patch2[ib, :, :, :]
            wwd = tf.reduce_sum(tf.square(ww), axis=2, keepdims=True)
            ft = tf.expand_dims(ww, 0)

            CS = tf.nn.conv2d(ft, k1, strides=[1, 1, 1, 1], padding='SAME')

            tt = k1d + wwd

            DS1 = tf.expand_dims(tt, 0) - 2 * CS

            DS2 = (DS1 - tf.reduce_mean(DS1, 3, True)) / reduce_std(DS1, 3, True)
            DS2 = -1 * tf.nn.tanh(DS2)

            CA = softmax(lamda * DS2)

            ACLt = tf.nn.conv2d_transpose(CA, k2, output_shape=[1, h, w, dims], strides=[1, 1, 1, 1], padding='SAME')
            ACLt = ACLt / (k_size ** 2)

            if ib == 0:
                ACL = ACLt
            else:
                ACL = tf.concat((ACL, ACLt), 0)

        ACL = bg + ACL * (1.0 - mask_r)

        con1 = tf.concat([bg_in, ACL], 3)
        ACL2 = layers.conv2d(con1, dims, [1, 1], stride=[1, 1], padding='VALID', activation_fn=None, scope='ML')
        ACL2 = tf.nn.elu(ACL2)

        return ACL2

def contextual_block_cs(bg_in, fg_in, mask, k_size, lamda, name, stride=1):
    with tf.variable_scope(name):
        b, h, w, dims = [i.value for i in bg_in.get_shape()]
        temp = tf.image.resize_nearest_neighbor(mask, (h, w))
        temp = tf.expand_dims(temp[:, :, :, 0], 3) # b 128 128 1
        mask_r = tf.tile(temp, [1, 1, 1, dims]) # b 128 128 128
        bg = bg_in * mask_r

        kn = int((k_size - 1) / 2)
        c = 0
        for p in range(kn, h - kn, stride):
            for q in range(kn, w - kn, stride):
                c += 1

        patch1 = tf.extract_image_patches(bg, [1, k_size, k_size, 1], [1, stride, stride, 1], [1, 1, 1, 1], 'VALID')

        patch1 = tf.reshape(patch1, (b, 1, c, k_size*k_size*dims))
        patch1 = tf.reshape(patch1, (b, 1, 1, c, k_size * k_size * dims))
        patch1 = tf.transpose(patch1, [0, 1, 2, 4, 3])

        patch2 = tf.extract_image_patches(fg_in, [1,k_size,k_size,1], [1,1,1,1], [1,1,1,1], 'SAME')
        ACL = []

        fuse_weight = tf.reshape(tf.eye(3), [3, 3, 1, 1])

        for ib in range(b):

            k1 = patch1[ib, :, :, :, :]
            k2 = k1 / tf.sqrt(tf.reduce_sum(tf.square(k1), axis=2, keepdims=True) + 1e-16)
            k1 = tf.reshape(k1, (k_size, k_size, dims, c))
            ww = patch2[ib, :, :, :]
            ft = ww / tf.sqrt(tf.reduce_sum(tf.square(ww), axis=2, keepdims=True) + 1e-16)
            ft = tf.expand_dims(ft, 0)

            CA = tf.nn.conv2d(ft, k2, strides=[1, 1, 1, 1], padding='SAME')

            CA = tf.reshape(CA, [1, h * w, c, 1])
            CA = tf.nn.conv2d(CA, fuse_weight, strides=[1, 1, 1, 1], padding='SAME')
            CA = tf.reshape(CA, [1, h, w, int(math.sqrt(c)), int(math.sqrt(c))])
            CA = tf.transpose(CA, [0, 2, 1, 4, 3])
            CA = tf.reshape(CA, [1, h * w, c, 1])
            CA = tf.nn.conv2d(CA, fuse_weight, strides=[1, 1, 1, 1], padding='SAME')
            CA = tf.reshape(CA, [1, h, w, int(math.sqrt(c)), int(math.sqrt(c))])
            CA = tf.transpose(CA, [0, 2, 1, 4, 3])
            CA = tf.reshape(CA, [1, h, w, c])

            CA2 = softmax(lamda * CA)

            ACLt = tf.nn.conv2d_transpose(CA2, k1, output_shape=[1, h, w, dims], strides=[1, 1, 1, 1], padding='SAME')
            ACLt = ACLt / (k_size ** 2)

            if ib == 0:
                ACL = ACLt
            else:
                ACL = tf.concat((ACL, ACLt), 0)

        ACL2 = bg + ACL * (1.0 - mask_r)

        return ACL2