From f200b4c7b446ea43f769f8a78acec1319e01b300 Mon Sep 17 00:00:00 2001 From: Pikachu~~~ Date: Sun, 4 Jun 2023 19:49:27 +0800 Subject: [PATCH] Roop-multi changed the implementation of multi-threading processing for nvidia GPU. (#317) * changed the multi-thread implementation for nvidia gpu * Update requirements.txt * Add files via upload * fix core.py and swapper.py * fix core.py * code clean * code clean * doubles performance of gpu-mode --------- Co-authored-by: Moeblack Co-authored-by: Somdev Sangwan --- requirements.txt | 2 +- roop/analyser.py | 7 -- roop/core.py | 9 +-- roop/globals.py | 7 +- roop/swapper.py | 169 +++++++++++++++++++++++++++-------------------- 5 files changed, 106 insertions(+), 88 deletions(-) diff --git a/requirements.txt b/requirements.txt index f51fc17..0897eb9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,4 @@ tensorflow==2.12.0; sys_platform != 'darwin' opennsfw2==0.10.2 protobuf==4.23.2 pynvml==11.5.0 -tqdm==4.65.0 +tqdm==4.65.0 \ No newline at end of file diff --git a/roop/analyser.py b/roop/analyser.py index 716af3b..804f7a8 100644 --- a/roop/analyser.py +++ b/roop/analyser.py @@ -1,5 +1,4 @@ import insightface -import onnxruntime import roop.globals FACE_ANALYSER = None @@ -8,12 +7,6 @@ FACE_ANALYSER = None def get_face_analyser(): global FACE_ANALYSER if FACE_ANALYSER is None: - session_options = onnxruntime.SessionOptions() - if roop.globals.gpu_vendor is not None: - session_options.intra_op_num_threads = roop.globals.gpu_threads - else: - session_options.intra_op_num_threads = roop.globals.cpu_threads - session_options.execution_mode = onnxruntime.ExecutionMode.ORT_PARALLEL FACE_ANALYSER = insightface.app.FaceAnalysis(name='buffalo_l', providers=roop.globals.providers) FACE_ANALYSER.prepare(ctx_id=0, det_size=(640, 640)) return FACE_ANALYSER diff --git a/roop/core.py b/roop/core.py index ad0c2f4..b32dc32 100755 --- a/roop/core.py +++ b/roop/core.py @@ -10,6 +10,7 @@ import signal import shutil import glob import argparse +import psutil import torch from pathlib import Path from opennsfw2 import predict_video_frames, predict_image @@ -33,11 +34,12 @@ parser.add_argument('--keep-fps', help='maintain original fps', dest='keep_fps', parser.add_argument('--keep-frames', help='keep frames directory', dest='keep_frames', action='store_true', default=False) parser.add_argument('--all-faces', help='swap all faces in frame', dest='all_faces', action='store_true', default=False) parser.add_argument('--max-memory', help='maximum amount of RAM in GB to be used', dest='max_memory', type=int) -parser.add_argument('--cpu-threads', help='number of threads to be use for CPU mode', dest='cpu_threads', type=int) -parser.add_argument('--gpu-threads', help='number of threads to be use for GPU mode', dest='gpu_threads', type=int) +parser.add_argument('--cpu-threads', help='number of threads to be use for CPU mode', dest='cpu_threads', type=int, default=max(psutil.cpu_count() - 2, 2)) +parser.add_argument('--gpu-threads', help='number of threads to be use for GPU mode', dest='gpu_threads', type=int, default=4) parser.add_argument('--gpu-vendor', help='choice your gpu vendor', dest='gpu_vendor', choices=['apple', 'amd', 'intel', 'nvidia']) args = {} + for name, value in vars(parser.parse_args()).items(): args[name] = value @@ -218,8 +220,7 @@ def save_file_handler(path: str): def create_test_preview(frame_number): return process_faces( get_face_single(cv2.imread(args['source_img'])), - get_video_frame(args['target_path'], frame_number), - None + get_video_frame(args['target_path'], frame_number) ) diff --git a/roop/globals.py b/roop/globals.py index 1c1bc49..da3cfac 100644 --- a/roop/globals.py +++ b/roop/globals.py @@ -1,10 +1,9 @@ import onnxruntime -import psutil -all_faces = False +all_faces = None log_level = 'error' -cpu_threads = max(psutil.cpu_count() - 2, 2) -gpu_threads = 8 +cpu_threads = None +gpu_threads = None gpu_vendor = None providers = onnxruntime.get_available_providers() diff --git a/roop/swapper.py b/roop/swapper.py index 9f25b46..de00920 100644 --- a/roop/swapper.py +++ b/roop/swapper.py @@ -1,72 +1,97 @@ -import os -from tqdm import tqdm -import torch -import onnxruntime -import cv2 -import insightface - -import roop.globals -from roop.analyser import get_face_single, get_face_many - -FACE_SWAPPER = None - - -def get_face_swapper(): - global FACE_SWAPPER - if FACE_SWAPPER is None: - session_options = onnxruntime.SessionOptions() - if roop.globals.gpu_vendor is not None: - session_options.intra_op_num_threads = roop.globals.gpu_threads - else: - session_options.intra_op_num_threads = roop.globals.cpu_threads - session_options.execution_mode = onnxruntime.ExecutionMode.ORT_PARALLEL - model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../inswapper_128.onnx') - FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.providers, session_options=session_options) - return FACE_SWAPPER - - -def swap_face_in_frame(source_face, target_face, frame): - if target_face: - return get_face_swapper().get(frame, target_face, source_face, paste_back=True) - return frame - - -def process_faces(source_face, target_frame, progress): - if roop.globals.all_faces: - many_faces = get_face_many(target_frame) - if many_faces: - for face in many_faces: - target_frame = swap_face_in_frame(source_face, face, target_frame) - else: - face = get_face_single(target_frame) - if face: - target_frame = swap_face_in_frame(source_face, face, target_frame) - return target_frame - - -def process_video(source_img, frame_paths, preview_callback): - source_face = get_face_single(cv2.imread(source_img)) - progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]' - - with tqdm(total=len(frame_paths), desc="Processing", unit="frame", dynamic_ncols=True, bar_format=progress_bar_format) as progress: - for frame_path in frame_paths: - if roop.globals.gpu_vendor == 'nvidia': - progress.set_postfix(cuda_utilization="{:02d}%".format(torch.cuda.utilization()), cuda_memory="{:02d}GB".format(torch.cuda.memory_usage())) - frame = cv2.imread(frame_path) - try: - result = process_faces(source_face, frame, progress) - cv2.imwrite(frame_path, result) - if preview_callback: - preview_callback(cv2.cvtColor(result, cv2.COLOR_BGR2RGB)) - except Exception: - pass - progress.update(1) - - -def process_img(source_img, target_path, output_file): - frame = cv2.imread(target_path) - face = get_face_single(frame) - source_face = get_face_single(cv2.imread(source_img)) - result = get_face_swapper().get(frame, face, source_face, paste_back=True) - cv2.imwrite(output_file, result) - print("\n\nImage saved as:", output_file, "\n\n") + +import os +from tqdm import tqdm +import cv2 +import insightface +import threading +import roop.globals +from roop.analyser import get_face_single, get_face_many + +FACE_SWAPPER = None +THREAD_LOCK = threading.Lock() + + +def get_face_swapper(): + global FACE_SWAPPER + with THREAD_LOCK: + if FACE_SWAPPER is None: + model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../inswapper_128.onnx') + FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.providers) + return FACE_SWAPPER + + +def swap_face_in_frame(source_face, target_face, frame): + if target_face: + return get_face_swapper().get(frame, target_face, source_face, paste_back=True) + return frame + + +def process_faces(source_face, target_frame): + if roop.globals.all_faces: + many_faces = get_face_many(target_frame) + if many_faces: + for face in many_faces: + target_frame = swap_face_in_frame(source_face, face, target_frame) + else: + face = get_face_single(target_frame) + if face: + target_frame = swap_face_in_frame(source_face, face, target_frame) + return target_frame + + +def process_frames(source_face, frame_paths, progress): + for frame_path in frame_paths: + frame = cv2.imread(frame_path) + try: + result = process_faces(source_face, frame) + cv2.imwrite(frame_path, result) + except Exception: + pass + progress.update(1) + + +def multi_process_frame(source_face,frame_paths,progress): + + # caculate the number of frames each threads processed + num_threads = roop.globals.gpu_threads + num_frames_per_thread = len(frame_paths) // num_threads + remaining_frames = len(frame_paths) % num_threads + + # initialize thread list + threads = [] + + # create thread and launch + start_index = 0 + for _ in range(num_threads): + end_index = start_index + num_frames_per_thread + if remaining_frames > 0: + end_index += 1 + remaining_frames -= 1 + thread_frame_paths = frame_paths[start_index:end_index] + thread = threading.Thread(target=process_frames, args=(source_face, thread_frame_paths, progress)) + threads.append(thread) + thread.start() + start_index = end_index + + # threading + for thread in threads: + thread.join() + + +def process_img(source_img, target_path, output_file): + frame = cv2.imread(target_path) + face = get_face_single(frame) + source_face = get_face_single(cv2.imread(source_img)) + result = get_face_swapper().get(frame, face, source_face, paste_back=True) + cv2.imwrite(output_file, result) + print("\n\nImage saved as:", output_file, "\n\n") + + +def process_video(source_img, frame_paths, preview_callback): + source_face = get_face_single(cv2.imread(source_img)) + progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]' + with tqdm(total=len(frame_paths), desc="Processing", unit="frame", dynamic_ncols=True, bar_format=progress_bar_format) as progress: + if roop.globals.gpu_vendor is not None: + multi_process_frame(source_face,frame_paths,progress) + else: + process_frames(source_img, frame_paths, progress)