diff --git a/roop/core.py b/roop/core.py index e91ae99..0957d05 100755 --- a/roop/core.py +++ b/roop/core.py @@ -2,6 +2,9 @@ import os import sys +# single thread doubles performance of gpu-mode - needs to be set before torch import +if any(arg.startswith('--gpu-vendor=') for arg in sys.argv): + os.environ['OMP_NUM_THREADS'] = '1' import platform import signal import shutil @@ -20,7 +23,6 @@ from roop.utils import is_img, detect_fps, set_fps, create_video, add_audio, ext from roop.analyser import get_face_single import roop.ui as ui - signal.signal(signal.SIGINT, lambda signal_number, frame: quit()) parser = argparse.ArgumentParser() parser.add_argument('-f', '--face', help='use this face', dest='source_img') @@ -57,6 +59,8 @@ sep = "/" if os.name == "nt": sep = "\\" +POOL = None + def limit_resources(): if args['max_memory']: @@ -140,12 +144,12 @@ def process_video_multi_cores(source_img, frame_paths): if n > 2: processes = [] for i in range(0, len(frame_paths), n): - p = pool.apply_async(process_frames, args=(source_img, frame_paths[i:i+n],)) + p = POOL.apply_async(process_video, args=(source_img, frame_paths[i:i + n],)) processes.append(p) for p in processes: p.get() - pool.close() - pool.join() + POOL.close() + POOL.join() def start(preview_callback = None): @@ -192,8 +196,8 @@ def start(preview_callback = None): )) status("swapping in progress...") if sys.platform != 'darwin' and roop.globals.gpu_vendor is None: - global pool - pool = mp.Pool(roop.globals.cpu_cores) + global POOL + POOL = mp.Pool(roop.globals.cpu_cores) process_video_multi_cores(args['source_img'], args['frame_paths']) else: process_video(args['source_img'], args["frame_paths"], preview_callback) diff --git a/roop/swapper.py b/roop/swapper.py index bc4730a..c5328a3 100644 --- a/roop/swapper.py +++ b/roop/swapper.py @@ -53,15 +53,11 @@ def process_frames(source_img, frame_paths, progress=None): def multi_process_frame(source_img, frame_paths, progress): - - # caculate the number of frames each threads processed + threads = [] num_threads = roop.globals.gpu_threads num_frames_per_thread = len(frame_paths) // num_threads remaining_frames = len(frame_paths) % num_threads - - # initialize thread list - threads = [] - + # create thread and launch start_index = 0 for _ in range(num_threads): @@ -92,7 +88,7 @@ def process_img(source_img, target_path, output_file): def process_video(source_img, frame_paths, preview_callback): progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]' with tqdm(total=len(frame_paths), desc="Processing", unit="frame", dynamic_ncols=True, bar_format=progress_bar_format) as progress: - if roop.globals.gpu_vendor == "nvidia": # multi-threading breaks in AMD + if roop.globals.gpu_vendor is not None and roop.globals.gpu_threads > 0: multi_process_frame(source_img, frame_paths, progress) else: process_frames(source_img, frame_paths, progress)