PythonとffmpegでMP4動画を超解像する

2020-12-09 - By Mats (admin)

無料のソフトウェアだけを使って低コストでmp4動画の超解像と拡大を行う方法を紹介します。

90年代から2000年代にVHSビデオテープやDVDに録画した映像を鮮明で大きくしたいという要件がある場合、Deep Learning技術を使って行う手法が存在するようになりました。しかし、自分でニューラルネットワークモデルを作成してこれを行うことには多くの方にとってハードルがありますし、数少ないパッケージ化された動画の超解像ソフトウェアはまだ数万円の価格です。

そこで、数年前に静止画像の超解像ソフトウェアとして話題になった無料ソフトウェア「Waifu2x」を使って、ニューラルネットワークモデルの作成をせずにお金をかけずに難しいことをスキップして動画の「拡大＆超解像」を行うコードを下記に記載します。

この方法ではコマンドラインからWaifu2xをコールして超解像させる手法をとっているため、Waifu2xよりも適切な超解像ソフトウェアが手元に入手できた場合はそちらのソフトウェアに切り替えることが容易にできるメリットがあります。

1. 用意するもの

ここで記述するPythonコードは前提としてWindows10上のJupyter Notebookで実行する前提で書いてあります。

Windows10パソコン

Python実行環境（Anaconda・Jupyter Notebook）

ffmpegがWindows10にインストールされている状態

Waifu2x CUI版がWindows10にインストールされている状態

CUDAを動かせるGPUがひとつ以上ある状態

超解像と拡大を適用したい動画はMP4形式でsourceフォルダに保存しておきます。複数のMP4ファイルがある場合、順番に連続して処理を行います。

2. Pythonコード

#####
# souceフォルダに入ったmp4ファイルを次々に960x720に高解像拡大します。
#####

from IPython.display import clear_output
import cv2, glob, math, os, shutil, subprocess
import tensorflow as tf

# 接続されているGPUの数を検出する (最大で２つのGPUまで)
physical_gpus = tf.config.experimental.list_physical_devices('GPU')
number_of_gpus = len(physical_gpus)

# application path
# ffmpegの実行コードがあるフォルダのパス
ffmpeg_path = 'C:/Users/Ryusuke/TensorFlow/ffmpeg/bin'
# waifu2x CUI版があるフォルダのパス
waifu2x_path = 'C:/Program Files/waifu2x-caffe'
# 上記ふたつを環境変数のPATHに追加で登録する
os.environ["PATH"] += os.pathsep + ffmpeg_path
os.environ["PATH"] += os.pathsep + waifu2x_path

# 途中処理用のフォルダを定義し作成する
source_path = 'source/'
temp_path = 'temp/'
temp_path_lr = temp_path+'lr/'
temp_path_lr_1 = temp_path_lr+'1/'
temp_path_lr_2 = temp_path_lr+'2/'
temp_path_sr = temp_path+'sr/'
result_path = 'result/'

if not os.path.exists(source_path): os.mkdir(source_path)
if not os.path.exists(temp_path): os.mkdir(temp_path)
if not os.path.exists(temp_path_lr): os.mkdir(temp_path_lr)
if not os.path.exists(temp_path_lr_1): os.mkdir(temp_path_lr_1)    
if not os.path.exists(temp_path_lr_2): os.mkdir(temp_path_lr_2)   
if not os.path.exists(temp_path_sr): os.mkdir(temp_path_sr)
if not os.path.exists(result_path): os.mkdir(result_path)

# 元データフォルダにあるmp4動画を見つける
input_videos = glob.glob(source_path+'*.mp4')
filter_object = filter(lambda x: x != "", input_videos)
input_videos = list(filter_object)
input_videos.sort()

print(str(len(input_videos))+' files found.')

# 処理の進捗状況を表示するプログレスバーを関数で定義
def update_progress(progress):

    bar_length = 20

    if isinstance(progress, int): progress = float(progress)
    if not isinstance(progress, float): progress = 0
    if progress < 0: progress = 0
    if progress >= 1: progress = 1

    block = int(round(bar_length * progress))

    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    print(text)
    
# 実際に動画を順番に読み込んで行う超解像処理
for j in range(len(input_videos)):
    
    input_video = input_videos[j]
    print('converting: '+input_video)
    
    #####
    # 1. 元データフォルダにあるmp4動画から音声トラックをffmpegで切り出して保存する
    #####

    output_audio_name = 'temp/'+os.path.splitext(os.path.basename(input_video))[0]+'.m4a'

    command = 'ffmpeg -i '+input_video+' -acodec copy -map 0:1 -vn '+output_audio_name
    subprocess.call(command, shell=True)
    
    #####
    # 2. ビデオからフレーム画像の切り出し
    #####

    if(os.path.exists(input_video)):
        
        try:
            count = 0
            cap = cv2.VideoCapture(input_video)

            # ビデオ情報の取得
            video_name = os.path.basename(input_video)
            width = str(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = str(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frame_count = str(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            frame_count = math.floor(float(frame_count))
            frame_rate = str(cap.get(cv2.CAP_PROP_FPS))
            
            # ビデオ情報の表示
            print('video name: '+video_name)
            print('frame count: '+str(frame_count))
            print('frame rate: '+str(frame_rate))

            # 元データフォルダにあるmp4動画から映像トラックのフレーム画像をffmpegで切り出して保存する
            while True:
                ret, frame = cap.read()
                if ret == True:

                    count += 1
                    
                    # もしフレームの特定範囲だけを切り出したい場合はここで切り出し範囲を指定
                    # 昔の動画だと周囲に黒い枠やノイズがある場合があるので
                    # y = 0
                    # h = 480
                    # x = 40
                    # w = 640
                    # 切り出し処理
                    # frame = frame[y:y+h, x:x+w]
                    
                    # GPUが１つの場合
                    if number_of_gpus <= 1:
                        
                        cv2.imwrite(temp_path_lr_1+str("{0:06d}".format(count))+'.jpg', frame)
                        
                    # GPUが２つ以上の場合（追って２つのGPUで独立して超解像するために２つのフォルダに分けて切り出し保存）
                    elif number_of_gpus >= 2:
                    
                        if count%2 == 1:    # count = 0, 2, 4 ....
                        
                            cv2.imwrite(temp_path_lr_1+str("{0:06d}".format(count))+'.jpg', frame)
                        
                        elif count%2 == 0:    # count = 1, 3, 5 ....
                        
                            cv2.imwrite(temp_path_lr_2+str("{0:06d}".format(count))+'.jpg', frame)
            
                else:
                    break

        except Exception as e:
            print('error: opencv error '+e)
            break

    else:

        print('error: '+str(input_video)+' does not exists.')


    #####
    # 3. 超解像を適用
    #####

    input_path_1 = temp_path_lr_1
    input_path_2 = temp_path_lr_2
    output_path = temp_path_sr
    process_mode = 'gpu' # GPUを使うモードを指定
    gpu_1 = 0
    gpu_2 = 1
    noise_reduction_level = 3 # Waifu2xではノイズリダクションレベルを0-3で指定する（0: リダクションなし）
    image_processing_mode = 'scale' # サイズ変更と超解像を同時に行うモード
    target_width, target_height = 960, 720 # 超解像後の画像サイズをここで指定
    noise_level = 0
    
    print('excuting super resolution')
        
    try:
        # subprocessのcallでWaifu2x CUI版をコールして切り出した画像に対して繰り返し超解像と拡大を行う    
        if number_of_gpus <= 1:
                
            command = 'waifu2x-caffe-cui.exe -i '+input_path_1+' -o '+output_path+'  --input_extention_list jpg --mode '+image_processing_mode+' --process '+process_mode+' --gpu '+str(gpu_1)+' --scale_width '+str(target_width)+' --scale_height '+str(target_height)+' --noise_level '+str(noise_level)
            subprocess.call(["powershell.exe", command])
                
        elif number_of_gpus >= 2:
        
            procs = []
                
            command_1 = 'waifu2x-caffe-cui.exe -i '+input_path_1+' -o '+output_path+'  --input_extention_list jpg --mode '+image_processing_mode+' --process '+process_mode+' --gpu '+str(gpu_2)+' --scale_width '+str(target_width)+' --scale_height '+str(target_height)+' --noise_level '+str(noise_level)
            proc_1 = subprocess.Popen(["powershell.exe", command_1])
            procs.append(proc_1)
            
            command_2 = 'waifu2x-caffe-cui.exe -i '+input_path_2+' -o '+output_path+'  --input_extention_list jpg --mode '+image_processing_mode+' --process '+process_mode+' --gpu '+str(gpu_1)+' --scale_width '+str(target_width)+' --scale_height '+str(target_height)+' --noise_level '+str(noise_level)
            proc_2 = subprocess.Popen(["powershell.exe", command_2])
            procs.append(proc_2)
            
            for proc in procs:
                
                proc.communicate()
            
            proc_1.wait()
            proc_2.wait()
        
            print('super resolution has been completed')

    except Exception as e:
            
        print('error: waifu2x error '+e)
    
    #####
    # 4. 超解像した画像をつなぎ合わせて動画に戻す
    #####

    output_video_name = os.path.splitext(video_name)[0]+'_sr_'+str(target_width)+'x'+str(target_height)+'_noaudio.mp4'

    fourcc = cv2.VideoWriter_fourcc('m','p','4','v')
    video = cv2.VideoWriter(result_path+output_video_name, fourcc, float(frame_rate), (int(target_width), int(target_height)))

    for i in range(1, int(frame_count)):
    
        try:
            img = cv2.imread(temp_path_sr+'{0:06d}.png'.format(i))

            # シャープ化するためのオペレータ k=2
            kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
            # 作成したオペレータを基にシャープ化
            img_sharp = cv2.filter2D(img, -1, kernel)
            
            video.write(img_sharp)

        except:
            pass
                      
        update_progress(i / frame_count)

    update_progress(1)
    video.release()
    
    #####
    # 5..動画と音声をふたたび結合する
    #####
    
    # resultフォルダにoutput.mp4という名前で超解像されたmp4動画が保存されます
    command = 'ffmpeg -i '+result_path+output_video_name+' -i '+output_audio_name+' -c copy '+'result/output.mp4'
    subprocess.call(command, shell=True)

    # 次の動画を超解像する処理のため、一時ファイルを保存したフォルダを消して作り直す
    shutil.rmtree(temp_path)
    os.mkdir(temp_path)