Roop-multi changed the implementation of multi-threading processing for nvidia GPU. (#317)
* changed the multi-thread implementation for nvidia gpu * Update requirements.txt * Add files via upload * fix core.py and swapper.py * fix core.py * code clean * code clean * doubles performance of gpu-mode --------- Co-authored-by: Moeblack <Moeblack@kuroinekorachi@gmail.com> Co-authored-by: Somdev Sangwan <s0md3v@gmail.com>
This commit is contained in:
parent
160a16f4b5
commit
f200b4c7b4
@ -1,5 +1,4 @@
|
|||||||
import insightface
|
import insightface
|
||||||
import onnxruntime
|
|
||||||
import roop.globals
|
import roop.globals
|
||||||
|
|
||||||
FACE_ANALYSER = None
|
FACE_ANALYSER = None
|
||||||
@ -8,12 +7,6 @@ FACE_ANALYSER = None
|
|||||||
def get_face_analyser():
|
def get_face_analyser():
|
||||||
global FACE_ANALYSER
|
global FACE_ANALYSER
|
||||||
if FACE_ANALYSER is None:
|
if FACE_ANALYSER is None:
|
||||||
session_options = onnxruntime.SessionOptions()
|
|
||||||
if roop.globals.gpu_vendor is not None:
|
|
||||||
session_options.intra_op_num_threads = roop.globals.gpu_threads
|
|
||||||
else:
|
|
||||||
session_options.intra_op_num_threads = roop.globals.cpu_threads
|
|
||||||
session_options.execution_mode = onnxruntime.ExecutionMode.ORT_PARALLEL
|
|
||||||
FACE_ANALYSER = insightface.app.FaceAnalysis(name='buffalo_l', providers=roop.globals.providers)
|
FACE_ANALYSER = insightface.app.FaceAnalysis(name='buffalo_l', providers=roop.globals.providers)
|
||||||
FACE_ANALYSER.prepare(ctx_id=0, det_size=(640, 640))
|
FACE_ANALYSER.prepare(ctx_id=0, det_size=(640, 640))
|
||||||
return FACE_ANALYSER
|
return FACE_ANALYSER
|
||||||
|
@ -10,6 +10,7 @@ import signal
|
|||||||
import shutil
|
import shutil
|
||||||
import glob
|
import glob
|
||||||
import argparse
|
import argparse
|
||||||
|
import psutil
|
||||||
import torch
|
import torch
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from opennsfw2 import predict_video_frames, predict_image
|
from opennsfw2 import predict_video_frames, predict_image
|
||||||
@ -33,11 +34,12 @@ parser.add_argument('--keep-fps', help='maintain original fps', dest='keep_fps',
|
|||||||
parser.add_argument('--keep-frames', help='keep frames directory', dest='keep_frames', action='store_true', default=False)
|
parser.add_argument('--keep-frames', help='keep frames directory', dest='keep_frames', action='store_true', default=False)
|
||||||
parser.add_argument('--all-faces', help='swap all faces in frame', dest='all_faces', action='store_true', default=False)
|
parser.add_argument('--all-faces', help='swap all faces in frame', dest='all_faces', action='store_true', default=False)
|
||||||
parser.add_argument('--max-memory', help='maximum amount of RAM in GB to be used', dest='max_memory', type=int)
|
parser.add_argument('--max-memory', help='maximum amount of RAM in GB to be used', dest='max_memory', type=int)
|
||||||
parser.add_argument('--cpu-threads', help='number of threads to be use for CPU mode', dest='cpu_threads', type=int)
|
parser.add_argument('--cpu-threads', help='number of threads to be use for CPU mode', dest='cpu_threads', type=int, default=max(psutil.cpu_count() - 2, 2))
|
||||||
parser.add_argument('--gpu-threads', help='number of threads to be use for GPU mode', dest='gpu_threads', type=int)
|
parser.add_argument('--gpu-threads', help='number of threads to be use for GPU mode', dest='gpu_threads', type=int, default=4)
|
||||||
parser.add_argument('--gpu-vendor', help='choice your gpu vendor', dest='gpu_vendor', choices=['apple', 'amd', 'intel', 'nvidia'])
|
parser.add_argument('--gpu-vendor', help='choice your gpu vendor', dest='gpu_vendor', choices=['apple', 'amd', 'intel', 'nvidia'])
|
||||||
|
|
||||||
args = {}
|
args = {}
|
||||||
|
|
||||||
for name, value in vars(parser.parse_args()).items():
|
for name, value in vars(parser.parse_args()).items():
|
||||||
args[name] = value
|
args[name] = value
|
||||||
|
|
||||||
@ -218,8 +220,7 @@ def save_file_handler(path: str):
|
|||||||
def create_test_preview(frame_number):
|
def create_test_preview(frame_number):
|
||||||
return process_faces(
|
return process_faces(
|
||||||
get_face_single(cv2.imread(args['source_img'])),
|
get_face_single(cv2.imread(args['source_img'])),
|
||||||
get_video_frame(args['target_path'], frame_number),
|
get_video_frame(args['target_path'], frame_number)
|
||||||
None
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
import onnxruntime
|
import onnxruntime
|
||||||
import psutil
|
|
||||||
|
|
||||||
all_faces = False
|
all_faces = None
|
||||||
log_level = 'error'
|
log_level = 'error'
|
||||||
cpu_threads = max(psutil.cpu_count() - 2, 2)
|
cpu_threads = None
|
||||||
gpu_threads = 8
|
gpu_threads = None
|
||||||
gpu_vendor = None
|
gpu_vendor = None
|
||||||
providers = onnxruntime.get_available_providers()
|
providers = onnxruntime.get_available_providers()
|
||||||
|
|
||||||
|
@ -1,27 +1,22 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import torch
|
|
||||||
import onnxruntime
|
|
||||||
import cv2
|
import cv2
|
||||||
import insightface
|
import insightface
|
||||||
|
import threading
|
||||||
import roop.globals
|
import roop.globals
|
||||||
from roop.analyser import get_face_single, get_face_many
|
from roop.analyser import get_face_single, get_face_many
|
||||||
|
|
||||||
FACE_SWAPPER = None
|
FACE_SWAPPER = None
|
||||||
|
THREAD_LOCK = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
def get_face_swapper():
|
def get_face_swapper():
|
||||||
global FACE_SWAPPER
|
global FACE_SWAPPER
|
||||||
|
with THREAD_LOCK:
|
||||||
if FACE_SWAPPER is None:
|
if FACE_SWAPPER is None:
|
||||||
session_options = onnxruntime.SessionOptions()
|
|
||||||
if roop.globals.gpu_vendor is not None:
|
|
||||||
session_options.intra_op_num_threads = roop.globals.gpu_threads
|
|
||||||
else:
|
|
||||||
session_options.intra_op_num_threads = roop.globals.cpu_threads
|
|
||||||
session_options.execution_mode = onnxruntime.ExecutionMode.ORT_PARALLEL
|
|
||||||
model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../inswapper_128.onnx')
|
model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../inswapper_128.onnx')
|
||||||
FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.providers, session_options=session_options)
|
FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.providers)
|
||||||
return FACE_SWAPPER
|
return FACE_SWAPPER
|
||||||
|
|
||||||
|
|
||||||
@ -31,7 +26,7 @@ def swap_face_in_frame(source_face, target_face, frame):
|
|||||||
return frame
|
return frame
|
||||||
|
|
||||||
|
|
||||||
def process_faces(source_face, target_frame, progress):
|
def process_faces(source_face, target_frame):
|
||||||
if roop.globals.all_faces:
|
if roop.globals.all_faces:
|
||||||
many_faces = get_face_many(target_frame)
|
many_faces = get_face_many(target_frame)
|
||||||
if many_faces:
|
if many_faces:
|
||||||
@ -44,25 +39,45 @@ def process_faces(source_face, target_frame, progress):
|
|||||||
return target_frame
|
return target_frame
|
||||||
|
|
||||||
|
|
||||||
def process_video(source_img, frame_paths, preview_callback):
|
def process_frames(source_face, frame_paths, progress):
|
||||||
source_face = get_face_single(cv2.imread(source_img))
|
|
||||||
progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
|
|
||||||
|
|
||||||
with tqdm(total=len(frame_paths), desc="Processing", unit="frame", dynamic_ncols=True, bar_format=progress_bar_format) as progress:
|
|
||||||
for frame_path in frame_paths:
|
for frame_path in frame_paths:
|
||||||
if roop.globals.gpu_vendor == 'nvidia':
|
|
||||||
progress.set_postfix(cuda_utilization="{:02d}%".format(torch.cuda.utilization()), cuda_memory="{:02d}GB".format(torch.cuda.memory_usage()))
|
|
||||||
frame = cv2.imread(frame_path)
|
frame = cv2.imread(frame_path)
|
||||||
try:
|
try:
|
||||||
result = process_faces(source_face, frame, progress)
|
result = process_faces(source_face, frame)
|
||||||
cv2.imwrite(frame_path, result)
|
cv2.imwrite(frame_path, result)
|
||||||
if preview_callback:
|
|
||||||
preview_callback(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
progress.update(1)
|
progress.update(1)
|
||||||
|
|
||||||
|
|
||||||
|
def multi_process_frame(source_face,frame_paths,progress):
|
||||||
|
|
||||||
|
# caculate the number of frames each threads processed
|
||||||
|
num_threads = roop.globals.gpu_threads
|
||||||
|
num_frames_per_thread = len(frame_paths) // num_threads
|
||||||
|
remaining_frames = len(frame_paths) % num_threads
|
||||||
|
|
||||||
|
# initialize thread list
|
||||||
|
threads = []
|
||||||
|
|
||||||
|
# create thread and launch
|
||||||
|
start_index = 0
|
||||||
|
for _ in range(num_threads):
|
||||||
|
end_index = start_index + num_frames_per_thread
|
||||||
|
if remaining_frames > 0:
|
||||||
|
end_index += 1
|
||||||
|
remaining_frames -= 1
|
||||||
|
thread_frame_paths = frame_paths[start_index:end_index]
|
||||||
|
thread = threading.Thread(target=process_frames, args=(source_face, thread_frame_paths, progress))
|
||||||
|
threads.append(thread)
|
||||||
|
thread.start()
|
||||||
|
start_index = end_index
|
||||||
|
|
||||||
|
# threading
|
||||||
|
for thread in threads:
|
||||||
|
thread.join()
|
||||||
|
|
||||||
|
|
||||||
def process_img(source_img, target_path, output_file):
|
def process_img(source_img, target_path, output_file):
|
||||||
frame = cv2.imread(target_path)
|
frame = cv2.imread(target_path)
|
||||||
face = get_face_single(frame)
|
face = get_face_single(frame)
|
||||||
@ -70,3 +85,13 @@ def process_img(source_img, target_path, output_file):
|
|||||||
result = get_face_swapper().get(frame, face, source_face, paste_back=True)
|
result = get_face_swapper().get(frame, face, source_face, paste_back=True)
|
||||||
cv2.imwrite(output_file, result)
|
cv2.imwrite(output_file, result)
|
||||||
print("\n\nImage saved as:", output_file, "\n\n")
|
print("\n\nImage saved as:", output_file, "\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
def process_video(source_img, frame_paths, preview_callback):
|
||||||
|
source_face = get_face_single(cv2.imread(source_img))
|
||||||
|
progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
|
||||||
|
with tqdm(total=len(frame_paths), desc="Processing", unit="frame", dynamic_ncols=True, bar_format=progress_bar_format) as progress:
|
||||||
|
if roop.globals.gpu_vendor is not None:
|
||||||
|
multi_process_frame(source_face,frame_paths,progress)
|
||||||
|
else:
|
||||||
|
process_frames(source_img, frame_paths, progress)
|
||||||
|
Loading…
Reference in New Issue
Block a user