Follow ONNX_Runtime_Perf_Tuning and introduce new args
This commit is contained in:
parent
6bd3724443
commit
8734a6c2e0
11
README.md
11
README.md
@ -40,14 +40,17 @@ options:
|
||||
replace this face
|
||||
-o OUTPUT_FILE, --output OUTPUT_FILE
|
||||
save output to this file
|
||||
--gpu use gpu
|
||||
--keep-fps maintain original fps
|
||||
--keep-frames keep frames directory
|
||||
--all-faces swap all faces in frame
|
||||
--max-memory MAX_MEMORY
|
||||
maximum amount of RAM in GB to be used
|
||||
--max-cores CORES_COUNT
|
||||
number of cores to be use for CPU mode
|
||||
--all-faces swap all faces in frame
|
||||
--cpu-threads CPU_THREADS
|
||||
number of threads to be use for CPU mode
|
||||
--gpu-threads GPU_THREADS
|
||||
number of threads to be use for GPU moded
|
||||
--gpu-vendor {amd,intel,nvidia}
|
||||
choice your gpu vendor
|
||||
```
|
||||
|
||||
Looking for a CLI mode? Using the -f/--face argument will make the program in cli mode.
|
||||
|
@ -13,3 +13,4 @@ tensorflow==2.12.0; sys_platform != 'darwin'
|
||||
opennsfw2==0.10.2
|
||||
protobuf==4.23.2
|
||||
tqdm==4.65.0
|
||||
threadpoolctl==3.1.0
|
58
roop/core.py
58
roop/core.py
@ -6,7 +6,6 @@ import sys
|
||||
import shutil
|
||||
import glob
|
||||
import argparse
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import torch
|
||||
from pathlib import Path
|
||||
@ -15,9 +14,9 @@ from tkinter import filedialog
|
||||
from opennsfw2 import predict_video_frames, predict_image
|
||||
from tkinter.filedialog import asksaveasfilename
|
||||
import webbrowser
|
||||
import psutil
|
||||
import cv2
|
||||
import threading
|
||||
from threadpoolctl import threadpool_limits
|
||||
from PIL import Image, ImageTk
|
||||
|
||||
import roop.globals
|
||||
@ -28,30 +27,35 @@ from roop.analyser import get_face_single
|
||||
if 'ROCMExecutionProvider' in roop.globals.providers:
|
||||
del torch
|
||||
|
||||
pool = None
|
||||
args = {}
|
||||
|
||||
signal.signal(signal.SIGINT, lambda signal_number, frame: quit())
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-f', '--face', help='use this face', dest='source_img')
|
||||
parser.add_argument('-t', '--target', help='replace this face', dest='target_path')
|
||||
parser.add_argument('-o', '--output', help='save output to this file', dest='output_file')
|
||||
parser.add_argument('--gpu', help='choice your gpu vendor', dest='gpu', choices=['amd', 'nvidia'])
|
||||
parser.add_argument('--keep-fps', help='maintain original fps', dest='keep_fps', action='store_true', default=False)
|
||||
parser.add_argument('--keep-frames', help='keep frames directory', dest='keep_frames', action='store_true', default=False)
|
||||
parser.add_argument('--max-memory', help='maximum amount of RAM in GB to be used', type=int)
|
||||
parser.add_argument('--max-cores', help='number of cores to be use for CPU mode', dest='cores_count', type=int, default=max(psutil.cpu_count() - 2, 2))
|
||||
parser.add_argument('--all-faces', help='swap all faces in frame', dest='all_faces', action='store_true', default=False)
|
||||
parser.add_argument('--max-memory', help='maximum amount of RAM in GB to be used', dest='max_memory', type=int)
|
||||
parser.add_argument('--cpu-threads', help='number of threads to be use for CPU mode', dest='cpu_threads', type=int)
|
||||
parser.add_argument('--gpu-threads', help='number of threads to be use for GPU mode', dest='gpu_threads', type=int)
|
||||
parser.add_argument('--gpu-vendor', help='choice your gpu vendor', dest='gpu_vendor', choices=['amd', 'intel', 'nvidia'])
|
||||
|
||||
args = {}
|
||||
for name, value in vars(parser.parse_args()).items():
|
||||
args[name] = value
|
||||
|
||||
if 'gpu' in args:
|
||||
roop.globals.gpu = args['gpu']
|
||||
|
||||
if 'all-faces' in args:
|
||||
if 'all_faces' in args:
|
||||
roop.globals.all_faces = True
|
||||
|
||||
if 'cpu_threads' in args and args['cpu_threads']:
|
||||
roop.globals.cpu_threads = args['cpu_threads']
|
||||
|
||||
if 'gpu_threads' in args and args['gpu_threads']:
|
||||
roop.globals.gpu_threads = args['gpu_threads']
|
||||
|
||||
if 'gpu_vendor' in args and args['gpu_vendor']:
|
||||
roop.globals.gpu_vendor = args['gpu_vendor']
|
||||
|
||||
sep = "/"
|
||||
if os.name == "nt":
|
||||
sep = "\\"
|
||||
@ -77,10 +81,10 @@ def pre_check():
|
||||
model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../inswapper_128.onnx')
|
||||
if not os.path.isfile(model_path):
|
||||
quit('File "inswapper_128.onnx" does not exist!')
|
||||
if roop.globals.gpu == 'amd':
|
||||
if roop.globals.gpu_vendor == 'amd':
|
||||
if 'ROCMExecutionProvider' not in roop.globals.providers:
|
||||
quit("You are using --gpu=amd flag but ROCM isn't available or properly installed on your system.")
|
||||
if roop.globals.gpu == 'nvidia':
|
||||
if roop.globals.gpu_vendor == 'nvidia':
|
||||
CUDA_VERSION = torch.version.cuda
|
||||
CUDNN_VERSION = torch.backends.cudnn.version()
|
||||
if not torch.cuda.is_available() or not CUDA_VERSION:
|
||||
@ -98,22 +102,14 @@ def pre_check():
|
||||
|
||||
|
||||
def start_processing():
|
||||
frame_paths = args["frame_paths"]
|
||||
n = len(frame_paths) // (args['cores_count'])
|
||||
# single thread
|
||||
if roop.globals.gpu == 'amd' or roop.globals.gpu == 'nvidia' or n < 2:
|
||||
# gpu mode
|
||||
if roop.globals.gpu_vendor is not None:
|
||||
process_video(args['source_img'], args["frame_paths"])
|
||||
return
|
||||
# cpu mode
|
||||
with threadpool_limits(limits=roop.globals.cpu_threads):
|
||||
process_video(args['source_img'], args["frame_paths"])
|
||||
return
|
||||
# multithread if total frames to cpu cores ratio is greater than 2
|
||||
if n > 2:
|
||||
processes = []
|
||||
for i in range(0, len(frame_paths), n):
|
||||
p = pool.apply_async(process_video, args=(args['source_img'], frame_paths[i:i+n],))
|
||||
processes.append(p)
|
||||
for p in processes:
|
||||
p.get()
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
|
||||
def preview_image(image_path):
|
||||
@ -194,8 +190,6 @@ def start():
|
||||
if not args['output_file']:
|
||||
target_path = args['target_path']
|
||||
args['output_file'] = rreplace(target_path, "/", "/swapped-", 1) if "/" in target_path else "swapped-" + target_path
|
||||
global pool
|
||||
pool = mp.Pool(args['cores_count'])
|
||||
target_path = args['target_path']
|
||||
test_face = get_face_single(cv2.imread(args['source_img']))
|
||||
if not test_face:
|
||||
@ -241,10 +235,8 @@ def start():
|
||||
|
||||
def run():
|
||||
global all_faces, keep_frames, limit_fps, status_label, window
|
||||
|
||||
pre_check()
|
||||
limit_resources()
|
||||
|
||||
if args['source_img']:
|
||||
args['cli_mode'] = True
|
||||
start()
|
||||
@ -291,4 +283,4 @@ def run():
|
||||
status_label = tk.Label(window, width=580, justify="center", text="Status: waiting for input...", fg="#2ecc71", bg="#2d3436")
|
||||
status_label.place(x=10,y=640,width=580,height=30)
|
||||
|
||||
window.mainloop()
|
||||
window.mainloop()
|
||||
|
@ -1,8 +1,11 @@
|
||||
import onnxruntime
|
||||
import psutil
|
||||
|
||||
gpu = None
|
||||
all_faces = False
|
||||
log_level = 'error'
|
||||
cpu_threads = max(psutil.cpu_count() - 2, 2)
|
||||
gpu_threads = 8
|
||||
gpu_vendor = None
|
||||
providers = onnxruntime.get_available_providers()
|
||||
|
||||
if 'TensorrtExecutionProvider' in providers:
|
||||
|
@ -4,6 +4,7 @@ import cv2
|
||||
import insightface
|
||||
import roop.globals
|
||||
from roop.analyser import get_face_single, get_face_many
|
||||
import onnxruntime
|
||||
|
||||
FACE_SWAPPER = None
|
||||
|
||||
@ -11,8 +12,15 @@ FACE_SWAPPER = None
|
||||
def get_face_swapper():
|
||||
global FACE_SWAPPER
|
||||
if FACE_SWAPPER is None:
|
||||
session_options = onnxruntime.SessionOptions()
|
||||
if roop.globals.gpu_vendor is not None:
|
||||
session_options.intra_op_num_threads = roop.globals.gpu_threads
|
||||
session_options.execution_mode = onnxruntime.ExecutionMode.ORT_PARALLEL
|
||||
else:
|
||||
session_options.enable_cpu_mem_arena = True
|
||||
session_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../inswapper_128.onnx')
|
||||
FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.providers)
|
||||
FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.providers, session_options=session_options)
|
||||
return FACE_SWAPPER
|
||||
|
||||
|
||||
|
@ -43,13 +43,13 @@ def set_fps(input_path, output_path, fps):
|
||||
|
||||
|
||||
def create_video(video_name, fps, output_dir):
|
||||
hwaccel_option = '-hwaccel cuda' if roop.globals.gpu == 'nvidia' else ''
|
||||
hwaccel_option = '-hwaccel cuda' if roop.globals.gpu_vendor == 'nvidia' else ''
|
||||
output_dir = path(output_dir)
|
||||
run_ffmpeg(f'{hwaccel_option} -framerate "{fps}" -i "{output_dir}{sep}%04d.png" -c:v libx264 -crf 7 -pix_fmt yuv420p -y "{output_dir}{sep}output.mp4"')
|
||||
|
||||
|
||||
def extract_frames(input_path, output_dir):
|
||||
hwaccel_option = '-hwaccel cuda' if roop.globals.gpu == 'nvidia' else ''
|
||||
hwaccel_option = '-hwaccel cuda' if roop.globals.gpu_vendor == 'nvidia' else ''
|
||||
input_path, output_dir = path(input_path), path(output_dir)
|
||||
run_ffmpeg(f' {hwaccel_option} -i "{input_path}" "{output_dir}{sep}%04d.png"')
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user