ステレオグラムの作り方＿実践編３(２Dシルエット画像から３D画像を作る）

■前回までの取り組み

前回の更新から間が空いてしまったのでおさらいします。

＜前回取り組んだ課題＞

　好きな背景画像で、

　好きな深度マップ画像を使い、ステレオグラムを作ろうとしたところ、

　３D表示にならないはずの箇所も部分的に３D表示になってしまった。

（余計な３Ｄ部分をゴミと呼んでいます。）

＜出てきた解決策＞

　深度マップ画像を３Dグラフィック画像にする。

　３Dグラフィック化にはBlenderという有名なフリーソフトが存在するのでそれを

　使うのが良い。

ところがこのBlenderの使い方が簡単には行かなくて。

ちゃんと勉強しないと使いこなせなさそうだったので、心が折れそうになっていました。

だけど！今はPythonを勉強しているのです。

ならば３Ｄグラフィック化はPythonでするべきではないだろうかと思い至りました。

■Pythonで２Dシルエット画像から３D画像を作る

色々調べてみて、参考にしたのは以下の２サイトです。

【第15回Python流体の数値計算】2次元ポアソン方程式をPythonで実装する。｜宇宙に入ったカマキリ (takun-physics.net)

こちらは何をしているのか感覚的に理解するのに使わせていただきました

方程式を解いたわけではないので正確に理解はできていないですけど。。

GitHub - unclearness/inflation_py: Generate a height/depth map and a mesh from a single silhouette.

こちらはソースをいただきました。

難しい説明はできません。

白黒のシルエット画像のエッジ端からの距離で、立体度を変える処理をしてくれています。単純にエッジ端からの距離だけで立体化しても滑らかな曲線は描けないので、ポワソン方程式を解く＋αで滑らかな曲線にしているということらしいです。

ではもらってきたソースで、試しにシルエット画像の棒人間くんを３Dグラフィック化してみましょう。

ln[1]　#インポートするライブラリ

import cv2
import numpy as np
from scipy.sparse import coo_matrix, linalg

"from scipy.sparse import coo_matrix, linalg"

このライブラリは初めて使います。高速演算をするためのライブラリのようですね。

白黒画像の黒の部分が多い時にはnumpyを使うよりも早いとか。

へー・・・。そんな使い分けをするレベルにないのでへー・・・としか言えないですが、いつか役立つかもしれないので頭の片隅に置いておきます。

In[2]　

def depth2orthomesh(depth, x_step=1, y_step=1, scale=[1.0, 1.0, 1.0], minus_depth=True):
vertices =
faces =
if len(depth.shape) != 2:
return None
h, w = depth.shape
vertex_id = 0
added_table = {}
for y in range(0, h, y_step):
for x in range(0, w, x_step):
added_table[(y, x)] = -1
max_connect_z_diff = 99999.9
# TODO
# pixel-wise loop in pure python is toooooooo slow
for y in range(0, h, y_step):
for x in range(0, w, x_step):
d = depth[y, x]
if d <= 0.000001:
continue
if minus_depth:
d = -d
vertices.append([x * scale[0], y * scale[1], d * scale[2]])
added_table[(y, x)] = vertex_id
current_index = vertex_id
upper_left_index = added_table[((y - y_step), (x - x_step))]
upper_index = added_table[((y - y_step), x)]
left_index = added_table[(y, (x - x_step))]
upper_left_diff = np.abs(depth[y - y_step, x - x_step] - d)
upper_diff = np.abs(depth[y - y_step, x] - d)
left_diff = np.abs(depth[y, x - x_step] - d)
if upper_left_index > 0 and upper_index > 0\
and upper_left_diff < max_connect_z_diff\
and upper_diff < max_connect_z_diff:
faces.append([upper_left_index, current_index, upper_index])
if upper_left_index > 0 and left_index > 0\
and upper_left_diff < max_connect_z_diff\
and left_diff < max_connect_z_diff:
faces.append([upper_left_index, left_index, current_index])
vertex_id += 1
return vertices, faces

ln[3]　

def _make_ply_txt(vertices, faces, color=, normal=):
header_lines = ["ply", "format ascii 1.0",
"element vertex " + str(len(vertices)),
"property float x", "property float y", "property float z"]
has_normal = len(vertices) == len(normal)
has_color = len(vertices) == len(color)
if has_normal:
header_lines += ["property float nx",
"property float ny", "property float nz"]
if has_color:
header_lines += ["property uchar red", "property uchar green",
"property uchar blue", "property uchar alpha"]
# no face
header_lines += ["element face " + str(len(faces)),
"property list uchar int vertex_indices", "end_header"]
header = "\n".join(header_lines) + "\n"
data_lines =
for i in range(len(vertices)):
line = [vertices[i][0], vertices[i][1], vertices[i][2]]
if has_normal:
line += [normal[i][0], normal[i][1], normal[i][2]]
if has_color:
line += [int(color[i][0]), int(color[i][1]), int(color[i][2]), 255]
line_txt = " ".join([str(x) for x in line])
data_lines.append(line_txt)
for f in faces:
line_txt = " ".join(['3'] + [str(int(x)) for x in f])
data_lines.append(line_txt)
data_txt = "\n".join(data_lines)
ply_txt = header + data_txt
return ply_txt

ln[4]　

# Implementation of the following paper
# "Notes on Inflating Curves" [Baran and Lehtinen 2009].
# http://alecjacobson.com/weblog/media/notes-on-inflating-curves-2009-baran.pdf
def inflationByBaran(mask, use_sparse=True):
h, w = mask.shape
depth = np.zeros((h, w))
img2param_idx = {}
param_idx = 0
def get_idx(x, y):
return y * w + x
for y in range(h):
for x in range(w):
c = mask[y, x]
if c != 0:
img2param_idx[get_idx(x, y)] = param_idx
param_idx += 1
num_param = len(img2param_idx.keys())
triplets =
cur_row = 0
# 4 neighbor laplacian
for y in range(1, h-1):
for x in range(1, w-1):
c = mask[y, x]
if c == 0:
continue
triplets.append([cur_row, img2param_idx[get_idx(x, y)], -4.0])
kernels = [(y, x - 1), (y, x + 1), (y - 1, x), (y + 1, x)]
for kernel in kernels:
jj, ii = kernel
if mask[jj, ii] != 0:
triplets.append([cur_row, img2param_idx[get_idx(ii, jj)], 1.0])
cur_row += 1 # Go to the next equation
# Prepare right hand side
b = np.zeros((num_param, 1))
rhs = -4.0
cur_row = 0
for y in range(1, h-1):
for x in range(1, w-1):
c = mask[y, x]
if c == 0:
continue
b[cur_row] = rhs
cur_row += 1
if use_sparse:
# Sparse matrix version
data, row, col = , , []
for tri in triplets:
row.append(tri[0])
col.append(tri[1])
data.append(tri[2])
data = np.array(data)
row = np.array(row, dtype=np.int64)
col = np.array(col, dtype=np.int64)
A = coo_matrix((data, (row, col)), shape=(num_param, num_param))
x = linalg.spsolve(A, b)
else:
# Dense matrix version
A = np.zeros((num_param, num_param))
# Set from triplets
for tri in triplets:
row = tri[0]
col = tri[1]
val = tri[2]
A[row, col] = val
x = np.linalg.solve(A, b)
for j in range(1, h-1):
for i in range(1, w-1):
c = mask[j, i]
if c == 0:
continue
idx = img2param_idx[get_idx(i, j)]
# setting z = √ h
depth[j, i] = np.sqrt(x[idx])
return depth

ln[5]　

def visualizeDepth(depth, path='', dmin=0, dmax=50, cm_name='viridis'):
import matplotlib.pyplot as plt
cm = plt.get_cmap(cm_name)
colors = (np.array(cm.colors)* 255).astype(np.uint8)
colors = colors[..., ::-1] # -> BGR

normed = np.clip( (depth - dmin) / (dmax - dmin))(, 0, 1)
normed = (normed*255).astype(np.uint8)

vis = colors[normed]
if path != '':
cv2.imwrite(path, vis)
return vis

ln[6]　

def writeMeshAsPly(path, vertices, faces):
with open(path, 'w') as f:
txt = _make_ply_txt(vertices, faces)
f.write(txt)

ln[7]　

if __name__ == '__main__':
names = ['walking1']
for name in names:
mask_path = 'Path名/' + name + '.png'
print(mask_path)
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
mask[mask > 100] = 255
mask[mask <= 100] = 0

depth = inflationByBaran(mask)
visualizeDepth(depth, name + '_baran.jpg')
vertices, faces = depth2orthomesh(depth)
writeMeshAsPly(name + '_baran.ply', vertices, faces)

もはや中身の理解は無理なので、とりあえず”def”で区切って書きました。

触る場所はln[7]のnames の画像の名前部分と、画像ファイルのPathとファイル名の部分だけですね。

（ソースの不要な部分は削除してます）

これを実行すると、以下のような画像が生成されました！

成功しているのかどうかよく分からないので実際にステレオグラム化して確認してみましょう。

上記の画像をグレースケールに変換してステレオグラム化のアルゴリズムに放り込んでしまえば良いのですが、背景が紫色なのでグレースケールにすると背景が灰色になってしまいます。背景は黒になって欲しいので、ここは手作業で背景を黒に変更します。

そうして作った以下の画像をステレオグラム化のアルゴリズムに入れます。

■ステレオグラム化

ひとまず背景はランダムドットノイズで試してみます。

ln[１]　#背景のパターンとパターンサイズ

def make_pattern(shape=(16, 16), levels=64 ):
return np.random.randint( 0, levels - 1, shape)/ levels
pattern = make_pattern(shape=(128,64))

ln[２]　#ステレオグラム化アルゴリズムの定義

def make_autostereogram(depthmap, pattern, shift_amplitude=0.1, invert=False):
if invert:
depthmap = 1 - depthmap
autostereogram = np.zeros_like(depthmap, dtype=pattern.dtype)

for r in np.arange(autostereogram.shape[0]):

for c in np.arange(autostereogram.shape[1]):
if c < pattern.shape[1]:
autostereogram[r, c] = pattern[r % pattern.shape[0], c]
else:
shift = int(depthmap[r, c] * shift_amplitude * pattern.shape[1])
autostereogram[r, c] = autostereogram[r, c - pattern.shape[1] + shift]

return autostereogram

ln[３]　#ステレオグラム化

pattern = make_pattern(shape=(128,64))
depthmap = cv2.imread("Path/ファイル名.jpg")
depthmap = cv2.cvtColor(depthmap, cv2.COLOR_BGR2GRAY)
depthmap = depthmap/255
autostereogram = make_autostereogram(depthmap, pattern, 0.3)
autostereogram = np.clip(autostereogram * 255, a_min = 0, a_max = 255).astype(np.uint8)
cv2.imwrite("Path/ファイル名.jpg", autostereogram)

完了！

出来上がったものがこちらです。