Follow ONNX_Runtime_Perf_Tuning and introduce new args

This commit is contained in:
henryruhs 2023-06-03 02:23:48 +02:00
parent 6bd3724443
commit 8734a6c2e0
6 changed files with 48 additions and 41 deletions

View File

@ -40,14 +40,17 @@ options:
replace this face replace this face
-o OUTPUT_FILE, --output OUTPUT_FILE -o OUTPUT_FILE, --output OUTPUT_FILE
save output to this file save output to this file
--gpu use gpu
--keep-fps maintain original fps --keep-fps maintain original fps
--keep-frames keep frames directory --keep-frames keep frames directory
--all-faces swap all faces in frame
--max-memory MAX_MEMORY --max-memory MAX_MEMORY
maximum amount of RAM in GB to be used maximum amount of RAM in GB to be used
--max-cores CORES_COUNT --cpu-threads CPU_THREADS
number of cores to be use for CPU mode number of threads to be use for CPU mode
--all-faces swap all faces in frame --gpu-threads GPU_THREADS
number of threads to be use for GPU moded
--gpu-vendor {amd,intel,nvidia}
choice your gpu vendor
``` ```
Looking for a CLI mode? Using the -f/--face argument will make the program in cli mode. Looking for a CLI mode? Using the -f/--face argument will make the program in cli mode.

View File

@ -13,3 +13,4 @@ tensorflow==2.12.0; sys_platform != 'darwin'
opennsfw2==0.10.2 opennsfw2==0.10.2
protobuf==4.23.2 protobuf==4.23.2
tqdm==4.65.0 tqdm==4.65.0
threadpoolctl==3.1.0

View File

@ -6,7 +6,6 @@ import sys
import shutil import shutil
import glob import glob
import argparse import argparse
import multiprocessing as mp
import os import os
import torch import torch
from pathlib import Path from pathlib import Path
@ -15,9 +14,9 @@ from tkinter import filedialog
from opennsfw2 import predict_video_frames, predict_image from opennsfw2 import predict_video_frames, predict_image
from tkinter.filedialog import asksaveasfilename from tkinter.filedialog import asksaveasfilename
import webbrowser import webbrowser
import psutil
import cv2 import cv2
import threading import threading
from threadpoolctl import threadpool_limits
from PIL import Image, ImageTk from PIL import Image, ImageTk
import roop.globals import roop.globals
@ -28,30 +27,35 @@ from roop.analyser import get_face_single
if 'ROCMExecutionProvider' in roop.globals.providers: if 'ROCMExecutionProvider' in roop.globals.providers:
del torch del torch
pool = None
args = {}
signal.signal(signal.SIGINT, lambda signal_number, frame: quit()) signal.signal(signal.SIGINT, lambda signal_number, frame: quit())
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-f', '--face', help='use this face', dest='source_img') parser.add_argument('-f', '--face', help='use this face', dest='source_img')
parser.add_argument('-t', '--target', help='replace this face', dest='target_path') parser.add_argument('-t', '--target', help='replace this face', dest='target_path')
parser.add_argument('-o', '--output', help='save output to this file', dest='output_file') parser.add_argument('-o', '--output', help='save output to this file', dest='output_file')
parser.add_argument('--gpu', help='choice your gpu vendor', dest='gpu', choices=['amd', 'nvidia'])
parser.add_argument('--keep-fps', help='maintain original fps', dest='keep_fps', action='store_true', default=False) parser.add_argument('--keep-fps', help='maintain original fps', dest='keep_fps', action='store_true', default=False)
parser.add_argument('--keep-frames', help='keep frames directory', dest='keep_frames', action='store_true', default=False) parser.add_argument('--keep-frames', help='keep frames directory', dest='keep_frames', action='store_true', default=False)
parser.add_argument('--max-memory', help='maximum amount of RAM in GB to be used', type=int)
parser.add_argument('--max-cores', help='number of cores to be use for CPU mode', dest='cores_count', type=int, default=max(psutil.cpu_count() - 2, 2))
parser.add_argument('--all-faces', help='swap all faces in frame', dest='all_faces', action='store_true', default=False) parser.add_argument('--all-faces', help='swap all faces in frame', dest='all_faces', action='store_true', default=False)
parser.add_argument('--max-memory', help='maximum amount of RAM in GB to be used', dest='max_memory', type=int)
parser.add_argument('--cpu-threads', help='number of threads to be use for CPU mode', dest='cpu_threads', type=int)
parser.add_argument('--gpu-threads', help='number of threads to be use for GPU mode', dest='gpu_threads', type=int)
parser.add_argument('--gpu-vendor', help='choice your gpu vendor', dest='gpu_vendor', choices=['amd', 'intel', 'nvidia'])
args = {}
for name, value in vars(parser.parse_args()).items(): for name, value in vars(parser.parse_args()).items():
args[name] = value args[name] = value
if 'gpu' in args: if 'all_faces' in args:
roop.globals.gpu = args['gpu']
if 'all-faces' in args:
roop.globals.all_faces = True roop.globals.all_faces = True
if 'cpu_threads' in args and args['cpu_threads']:
roop.globals.cpu_threads = args['cpu_threads']
if 'gpu_threads' in args and args['gpu_threads']:
roop.globals.gpu_threads = args['gpu_threads']
if 'gpu_vendor' in args and args['gpu_vendor']:
roop.globals.gpu_vendor = args['gpu_vendor']
sep = "/" sep = "/"
if os.name == "nt": if os.name == "nt":
sep = "\\" sep = "\\"
@ -77,10 +81,10 @@ def pre_check():
model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../inswapper_128.onnx') model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../inswapper_128.onnx')
if not os.path.isfile(model_path): if not os.path.isfile(model_path):
quit('File "inswapper_128.onnx" does not exist!') quit('File "inswapper_128.onnx" does not exist!')
if roop.globals.gpu == 'amd': if roop.globals.gpu_vendor == 'amd':
if 'ROCMExecutionProvider' not in roop.globals.providers: if 'ROCMExecutionProvider' not in roop.globals.providers:
quit("You are using --gpu=amd flag but ROCM isn't available or properly installed on your system.") quit("You are using --gpu=amd flag but ROCM isn't available or properly installed on your system.")
if roop.globals.gpu == 'nvidia': if roop.globals.gpu_vendor == 'nvidia':
CUDA_VERSION = torch.version.cuda CUDA_VERSION = torch.version.cuda
CUDNN_VERSION = torch.backends.cudnn.version() CUDNN_VERSION = torch.backends.cudnn.version()
if not torch.cuda.is_available() or not CUDA_VERSION: if not torch.cuda.is_available() or not CUDA_VERSION:
@ -98,22 +102,14 @@ def pre_check():
def start_processing(): def start_processing():
frame_paths = args["frame_paths"] # gpu mode
n = len(frame_paths) // (args['cores_count']) if roop.globals.gpu_vendor is not None:
# single thread process_video(args['source_img'], args["frame_paths"])
if roop.globals.gpu == 'amd' or roop.globals.gpu == 'nvidia' or n < 2: return
# cpu mode
with threadpool_limits(limits=roop.globals.cpu_threads):
process_video(args['source_img'], args["frame_paths"]) process_video(args['source_img'], args["frame_paths"])
return return
# multithread if total frames to cpu cores ratio is greater than 2
if n > 2:
processes = []
for i in range(0, len(frame_paths), n):
p = pool.apply_async(process_video, args=(args['source_img'], frame_paths[i:i+n],))
processes.append(p)
for p in processes:
p.get()
pool.close()
pool.join()
def preview_image(image_path): def preview_image(image_path):
@ -194,8 +190,6 @@ def start():
if not args['output_file']: if not args['output_file']:
target_path = args['target_path'] target_path = args['target_path']
args['output_file'] = rreplace(target_path, "/", "/swapped-", 1) if "/" in target_path else "swapped-" + target_path args['output_file'] = rreplace(target_path, "/", "/swapped-", 1) if "/" in target_path else "swapped-" + target_path
global pool
pool = mp.Pool(args['cores_count'])
target_path = args['target_path'] target_path = args['target_path']
test_face = get_face_single(cv2.imread(args['source_img'])) test_face = get_face_single(cv2.imread(args['source_img']))
if not test_face: if not test_face:
@ -241,10 +235,8 @@ def start():
def run(): def run():
global all_faces, keep_frames, limit_fps, status_label, window global all_faces, keep_frames, limit_fps, status_label, window
pre_check() pre_check()
limit_resources() limit_resources()
if args['source_img']: if args['source_img']:
args['cli_mode'] = True args['cli_mode'] = True
start() start()
@ -291,4 +283,4 @@ def run():
status_label = tk.Label(window, width=580, justify="center", text="Status: waiting for input...", fg="#2ecc71", bg="#2d3436") status_label = tk.Label(window, width=580, justify="center", text="Status: waiting for input...", fg="#2ecc71", bg="#2d3436")
status_label.place(x=10,y=640,width=580,height=30) status_label.place(x=10,y=640,width=580,height=30)
window.mainloop() window.mainloop()

View File

@ -1,8 +1,11 @@
import onnxruntime import onnxruntime
import psutil
gpu = None
all_faces = False all_faces = False
log_level = 'error' log_level = 'error'
cpu_threads = max(psutil.cpu_count() - 2, 2)
gpu_threads = 8
gpu_vendor = None
providers = onnxruntime.get_available_providers() providers = onnxruntime.get_available_providers()
if 'TensorrtExecutionProvider' in providers: if 'TensorrtExecutionProvider' in providers:

View File

@ -4,6 +4,7 @@ import cv2
import insightface import insightface
import roop.globals import roop.globals
from roop.analyser import get_face_single, get_face_many from roop.analyser import get_face_single, get_face_many
import onnxruntime
FACE_SWAPPER = None FACE_SWAPPER = None
@ -11,8 +12,15 @@ FACE_SWAPPER = None
def get_face_swapper(): def get_face_swapper():
global FACE_SWAPPER global FACE_SWAPPER
if FACE_SWAPPER is None: if FACE_SWAPPER is None:
session_options = onnxruntime.SessionOptions()
if roop.globals.gpu_vendor is not None:
session_options.intra_op_num_threads = roop.globals.gpu_threads
session_options.execution_mode = onnxruntime.ExecutionMode.ORT_PARALLEL
else:
session_options.enable_cpu_mem_arena = True
session_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../inswapper_128.onnx') model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../inswapper_128.onnx')
FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.providers) FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.providers, session_options=session_options)
return FACE_SWAPPER return FACE_SWAPPER

View File

@ -43,13 +43,13 @@ def set_fps(input_path, output_path, fps):
def create_video(video_name, fps, output_dir): def create_video(video_name, fps, output_dir):
hwaccel_option = '-hwaccel cuda' if roop.globals.gpu == 'nvidia' else '' hwaccel_option = '-hwaccel cuda' if roop.globals.gpu_vendor == 'nvidia' else ''
output_dir = path(output_dir) output_dir = path(output_dir)
run_ffmpeg(f'{hwaccel_option} -framerate "{fps}" -i "{output_dir}{sep}%04d.png" -c:v libx264 -crf 7 -pix_fmt yuv420p -y "{output_dir}{sep}output.mp4"') run_ffmpeg(f'{hwaccel_option} -framerate "{fps}" -i "{output_dir}{sep}%04d.png" -c:v libx264 -crf 7 -pix_fmt yuv420p -y "{output_dir}{sep}output.mp4"')
def extract_frames(input_path, output_dir): def extract_frames(input_path, output_dir):
hwaccel_option = '-hwaccel cuda' if roop.globals.gpu == 'nvidia' else '' hwaccel_option = '-hwaccel cuda' if roop.globals.gpu_vendor == 'nvidia' else ''
input_path, output_dir = path(input_path), path(output_dir) input_path, output_dir = path(input_path), path(output_dir)
run_ffmpeg(f' {hwaccel_option} -i "{input_path}" "{output_dir}{sep}%04d.png"') run_ffmpeg(f' {hwaccel_option} -i "{input_path}" "{output_dir}{sep}%04d.png"')