TensorFlowでもラプラシアンピラミッドを作る

Posted On 2019-10-24

897{icon} {views}

以前作ったPyTorchのラプラシアンピラミッドをTensorFlow2.0に移植しました。何かと便利なラプラシアンピラミッドをつかってみよう。

環境

TensorFlow2.0

CPUの動作で確認しましたが、一応TPUでも動くように配慮はしました。

コード

Conv2DのカーネルがPyTorchは「out_ch, in_ch, kernel_size, kernel_size」だったのに対し、TensorFlow（Keras）は「kernel_size, kernel_size, in_ch, out_ch」なのに注意すればいいだけです。

from PIL import Image
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K

# 画像ファイル→PyTorchテンソル
def load_tensor(img_path):
    x = tf.image.convert_image_dtype(tf.io.decode_jpeg(tf.io.read_file(img_path)), tf.float32)
    return tf.expand_dims(x, axis=0)

def get_gaussian_kernel():
    kernel = np.array([
        [1, 4, 6, 4, 1],
        [4, 16, 24, 16, 4],
        [6, 24, 36, 24, 6],
        [4, 16, 24, 16, 4],
        [1, 4, 6, 4, 1]], np.float32) / 256.0
    return tf.constant(kernel.reshape(5, 5, 1, 1))

def pyramid_down(image):
    gaussian_k = get_gaussian_kernel()
    multiband = [K.conv2d(image[:,:,:, i:i + 1], gaussian_k, padding="same", strides=2) for i in range(3)]
    return tf.concat(multiband, axis=-1)

def pyramid_up(image):
    gaussian_k = get_gaussian_kernel()
    upsample = K.repeat_elements(K.repeat_elements(image, 2, axis=1), 2, axis=2)
    multiband = [K.conv2d(upsample[:,:,:, i:i + 1], gaussian_k, padding="same") for i in range(3)]
    return tf.concat(multiband, axis=-1)

def gaussian_pyramid(original, n_pyramids):
    x = original
    # pyramid down
    pyramids = [original]
    for i in range(n_pyramids):
        x = pyramid_down(x)
        pyramids.append(x)
    return pyramids

def laplacian_pyramid(original, n_pyramids):
    # gaussian pyramidを作る
    pyramids = gaussian_pyramid(original, n_pyramids)
    # pyramid up - diff
    laplacian = []
    for i in range(len(pyramids) - 1):
        diff = pyramids[i] - pyramid_up(pyramids[i + 1])
        laplacian.append(diff)

    # ガウシアンピラミッドの最後を入れる
    laplacian.append(pyramids[len(pyramids) - 1])
    return laplacian

## 出力用のツール
# ピラミッドを1枚の画像に結合して保存するための関数
def tile_pyramid(tf_tenosor_imgs):
    height, width = tf_tenosor_imgs[0].shape[1:3]
    canvas = np.zeros((height * 3 // 2, width, 3), np.float32)
    x, y = 0, 0
    for i, img in enumerate(tf_tenosor_imgs):
        h, w = img.shape[1:3]
        canvas[y:(y + h), x:(x + w), :] = img.numpy()            
        if i % 2 == 0:
            x += width // (2 ** (i + 3))                    
            y += height // (2 ** i) # 0, 2, 4..でy方向にシフト
        else:
            x += width // (2 ** i)  # 1, 3, 5..でx方向にシフト
            y += height // (2 ** (i + 3))
    return canvas

# 見やすいようにMin-Maxでスケーリングする
def normalize_pyramids(pyramids):
    result = []
    for diff in pyramids:
        diff_min = tf.reduce_min(diff)
        diff_max = tf.reduce_max(diff)
        diff_normalize = (diff - diff_min) / (diff_max - diff_min)
        result.append(diff_normalize)
    return result

if __name__ == "__main__":
    original = load_tensor("train.jpg")
    pyramid = laplacian_pyramid(original, 6)
    pyramid = normalize_pyramids(pyramid)
    tile = (tile_pyramid(pyramid) * 255.0).astype(np.uint8)
    with Image.fromarray(tile) as img:
        img.save("tile_pyramid.jpg", quality=95)

結果

元画像

タイル

良いのではないでしょうか

Shikoan's ML Blogの中の人が運営しているサークル「じゅ～しぃ～すくりぷと」の本のご案内

技術書コーナー

北海道の駅巡りコーナー

Tags:DeepLearning, 小ネタ, 画像処理

環境

コード

結果

Add a Comment