diff --git a/README.md b/README.md index 2c22d06..1416826 100644 --- a/README.md +++ b/README.md @@ -45,12 +45,12 @@ options: --all-faces swap all faces in frame --max-memory MAX_MEMORY maximum amount of RAM in GB to be used - --cpu-threads CPU_THREADS - number of threads to be use for CPU mode + --cpu-cores CPU_CORES + number of CPU cores to use --gpu-threads GPU_THREADS - number of threads to be use for GPU moded - --gpu-vendor {amd,intel,nvidia} - choice your gpu vendor + number of threads to be use for the GPU + --gpu-vendor {apple,amd,intel,nvidia} + choice your GPU vendor ``` Looking for a CLI mode? Using the -f/--face argument will make the program in cli mode. diff --git a/requirements.txt b/requirements.txt index 0897eb9..bf185d8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 + numpy==1.23.5 opencv-python==4.7.0.72 onnx==1.14.0 @@ -5,7 +7,7 @@ insightface==0.7.3 psutil==5.9.5 tk==0.1.0 pillow==9.5.0 -torch==2.0.1 +torch==2.0.1+cu118 onnxruntime==1.15.0; sys_platform == 'darwin' and platform_machine != 'arm64' onnxruntime-silicon==1.13.1; sys_platform == 'darwin' and platform_machine == 'arm64' onnxruntime-gpu==1.15.0; sys_platform != 'darwin' @@ -13,5 +15,4 @@ tensorflow==2.13.0rc1; sys_platform == 'darwin' tensorflow==2.12.0; sys_platform != 'darwin' opennsfw2==0.10.2 protobuf==4.23.2 -pynvml==11.5.0 tqdm==4.65.0 \ No newline at end of file diff --git a/roop/core.py b/roop/core.py index 96eb1c7..307e761 100755 --- a/roop/core.py +++ b/roop/core.py @@ -2,6 +2,9 @@ import os import sys +# single thread doubles performance of gpu-mode - needs to be set before torch import +if any(arg.startswith('--gpu-vendor=') for arg in sys.argv): + os.environ['OMP_NUM_THREADS'] = '1' import platform import signal import shutil @@ -20,7 +23,6 @@ from roop.utils import is_img, detect_fps, set_fps, create_video, add_audio, ext from roop.analyser import get_face_single import roop.ui as ui - signal.signal(signal.SIGINT, lambda signal_number, frame: quit()) parser = argparse.ArgumentParser() parser.add_argument('-f', '--face', help='use this face', dest='source_img') @@ -30,26 +32,31 @@ parser.add_argument('--keep-fps', help='maintain original fps', dest='keep_fps', parser.add_argument('--keep-frames', help='keep frames directory', dest='keep_frames', action='store_true', default=False) parser.add_argument('--all-faces', help='swap all faces in frame', dest='all_faces', action='store_true', default=False) parser.add_argument('--max-memory', help='maximum amount of RAM in GB to be used', dest='max_memory', type=int) -parser.add_argument('--max-cores', help='number of cores to use at max', dest='max_cores', type=int, default=max(psutil.cpu_count() - 2, 2)) -parser.add_argument('--gpu-threads', help='number of threads to be use for GPU mode', dest='gpu_threads', type=int, default=4) -parser.add_argument('--gpu-vendor', help='choice your gpu vendor', dest='gpu_vendor', choices=['apple', 'amd', 'intel', 'nvidia']) +parser.add_argument('--cpu-cores', help='number of CPU cores to use', dest='cpu_cores', type=int, default=max(psutil.cpu_count() / 2, 2)) +parser.add_argument('--gpu-threads', help='number of threads to be use for the GPU', dest='gpu_threads', type=int, default=4) +parser.add_argument('--gpu-vendor', help='choice your GPU vendor', dest='gpu_vendor', choices=['apple', 'amd', 'intel', 'nvidia']) -args = {} - -for name, value in vars(parser.parse_args()).items(): - args[name] = value +args = parser.parse_known_args()[0] if 'all_faces' in args: roop.globals.all_faces = True -if args['max_cores']: - roop.globals.max_cores = args['max_cores'] +if args.cpu_cores: + roop.globals.cpu_cores = int(args.cpu_cores) -if args['gpu_threads']: - roop.globals.gpu_threads = args['gpu_threads'] +# cpu thread fix for mac +if sys.platform == 'darwin': + roop.globals.cpu_cores = 1 -if args['gpu_vendor']: - roop.globals.gpu_vendor = args['gpu_vendor'] +if args.gpu_threads: + roop.globals.gpu_threads = int(args.gpu_threads) + +# gpu thread fix for amd +if args.gpu_vendor == 'amd': + roop.globals.gpu_threads = 1 + +if args.gpu_vendor: + roop.globals.gpu_vendor = args.gpu_vendor else: roop.globals.providers = ['CPUExecutionProvider'] @@ -59,8 +66,8 @@ if os.name == "nt": def limit_resources(): - if args['max_memory']: - memory = args['max_memory'] * 1024 * 1024 * 1024 + if args.max_memory: + memory = args.max_memory * 1024 * 1024 * 1024 if str(platform.system()).lower() == 'windows': import ctypes kernel32 = ctypes.windll.kernel32 @@ -81,13 +88,13 @@ def pre_check(): if roop.globals.gpu_vendor == 'apple': if 'CoreMLExecutionProvider' not in roop.globals.providers: quit("You are using --gpu=apple flag but CoreML isn't available or properly installed on your system.") - elif roop.globals.gpu_vendor == 'amd': + if roop.globals.gpu_vendor == 'amd': if 'ROCMExecutionProvider' not in roop.globals.providers: quit("You are using --gpu=amd flag but ROCM isn't available or properly installed on your system.") - elif roop.globals.gpu_vendor == 'nvidia': + if roop.globals.gpu_vendor == 'nvidia': CUDA_VERSION = torch.version.cuda CUDNN_VERSION = torch.backends.cudnn.version() - if not torch.cuda.is_available() or not CUDA_VERSION: + if not torch.cuda.is_available(): quit("You are using --gpu=nvidia flag but CUDA isn't available or properly installed on your system.") if CUDA_VERSION > '11.8': quit(f"CUDA version {CUDA_VERSION} is not supported - please downgrade to 11.8") @@ -97,8 +104,6 @@ def pre_check(): quit(f"CUDNN version {CUDNN_VERSION} is not supported - please upgrade to 8.9.1") if CUDNN_VERSION > 8910: quit(f"CUDNN version {CUDNN_VERSION} is not supported - please downgrade to 8.9.1") - else: - roop.globals.providers = ['CPUExecutionProvider'] def get_video_frame(video_path, frame_number = 1): @@ -138,40 +143,40 @@ def status(string): def process_video_multi_cores(source_img, frame_paths): - n = len(frame_paths) // roop.globals.max_cores + n = len(frame_paths) // roop.globals.cpu_cores if n > 2: processes = [] for i in range(0, len(frame_paths), n): - p = pool.apply_async(process_frames, args=(source_img, frame_paths[i:i+n],)) + p = POOL.apply_async(process_video, args=(source_img, frame_paths[i:i + n],)) processes.append(p) for p in processes: p.get() - pool.close() - pool.join() + POOL.close() + POOL.join() def start(preview_callback = None): - if not args['source_img'] or not os.path.isfile(args['source_img']): + if not args.source_img or not os.path.isfile(args.source_img): print("\n[WARNING] Please select an image containing a face.") return - elif not args['target_path'] or not os.path.isfile(args['target_path']): + elif not args.target_path or not os.path.isfile(args.target_path): print("\n[WARNING] Please select a video/image to swap face in.") return - if not args['output_file']: - target_path = args['target_path'] - args['output_file'] = rreplace(target_path, "/", "/swapped-", 1) if "/" in target_path else "swapped-" + target_path - target_path = args['target_path'] - test_face = get_face_single(cv2.imread(args['source_img'])) + if not args.output_file: + target_path = args.target_path + args.output_file = rreplace(target_path, "/", "/swapped-", 1) if "/" in target_path else "swapped-" + target_path + target_path = args.target_path + test_face = get_face_single(cv2.imread(args.source_img)) if not test_face: print("\n[WARNING] No face detected in source image. Please try with another one.\n") return if is_img(target_path): if predict_image(target_path) > 0.85: quit() - process_img(args['source_img'], target_path, args['output_file']) + process_img(args.source_img, target_path, args.output_file) status("swap successful!") return - seconds, probabilities = predict_video_frames(video_path=args['target_path'], frame_interval=100) + seconds, probabilities = predict_video_frames(video_path=args.target_path, frame_interval=100) if any(probability > 0.85 for probability in probabilities): quit() video_name_full = target_path.split("/")[-1] @@ -180,7 +185,7 @@ def start(preview_callback = None): Path(output_dir).mkdir(exist_ok=True) status("detecting video's FPS...") fps, exact_fps = detect_fps(target_path) - if not args['keep_fps'] and fps > 30: + if not args.keep_fps and fps > 30: this_path = output_dir + "/" + video_name + ".mp4" set_fps(target_path, this_path, 30) target_path, exact_fps = this_path, 30 @@ -188,33 +193,33 @@ def start(preview_callback = None): shutil.copy(target_path, output_dir) status("extracting frames...") extract_frames(target_path, output_dir) - args['frame_paths'] = tuple(sorted( + args.frame_paths = tuple(sorted( glob.glob(output_dir + "/*.png"), key=lambda x: int(x.split(sep)[-1].replace(".png", "")) )) status("swapping in progress...") - if sys.platform != 'darwin' and not args['gpu_vendor']: - global pool - pool = mp.Pool(roop.globals.max_cores) - process_video_multi_cores(args['source_img'], args['frame_paths']) + if roop.globals.gpu_vendor is None and roop.globals.cpu_cores > 0: + global POOL + POOL = mp.Pool(roop.globals.cpu_cores) + process_video_multi_cores(args.source_img, args.frame_paths) else: - process_video(args['source_img'], args["frame_paths"], preview_callback) + process_video(args.source_img, args.frame_paths) status("creating video...") create_video(video_name, exact_fps, output_dir) status("adding audio...") - add_audio(output_dir, target_path, video_name_full, args['keep_frames'], args['output_file']) - save_path = args['output_file'] if args['output_file'] else output_dir + "/" + video_name + ".mp4" + add_audio(output_dir, target_path, video_name_full, args.keep_frames, args.output_file) + save_path = args.output_file if args.output_file else output_dir + "/" + video_name + ".mp4" print("\n\nVideo saved as:", save_path, "\n\n") status("swap successful!") def select_face_handler(path: str): - args['source_img'] = path + args.source_img = path def select_target_handler(path: str): - args['target_path'] = path - return preview_video(args['target_path']) + args.target_path = path + return preview_video(args.target_path) def toggle_all_faces_handler(value: int): @@ -222,21 +227,21 @@ def toggle_all_faces_handler(value: int): def toggle_fps_limit_handler(value: int): - args['keep_fps'] = int(value != 1) + args.keep_fps = int(value != 1) def toggle_keep_frames_handler(value: int): - args['keep_frames'] = value + args.keep_frames = value def save_file_handler(path: str): - args['output_file'] = path + args.output_file = path def create_test_preview(frame_number): return process_faces( - get_face_single(cv2.imread(args['source_img'])), - get_video_frame(args['target_path'], frame_number) + get_face_single(cv2.imread(args.source_img)), + get_video_frame(args.target_path, frame_number) ) @@ -245,16 +250,16 @@ def run(): pre_check() limit_resources() - if args['source_img']: - args['cli_mode'] = True + if args.source_img: + args.cli_mode = True start() quit() window = ui.init( { 'all_faces': roop.globals.all_faces, - 'keep_fps': args['keep_fps'], - 'keep_frames': args['keep_frames'] + 'keep_fps': args.keep_fps, + 'keep_frames': args.keep_frames }, select_face_handler, select_target_handler, diff --git a/roop/globals.py b/roop/globals.py index da3cfac..986bf91 100644 --- a/roop/globals.py +++ b/roop/globals.py @@ -2,7 +2,7 @@ import onnxruntime all_faces = None log_level = 'error' -cpu_threads = None +cpu_cores = None gpu_threads = None gpu_vendor = None providers = onnxruntime.get_available_providers() diff --git a/roop/swapper.py b/roop/swapper.py index bc4730a..769a39e 100644 --- a/roop/swapper.py +++ b/roop/swapper.py @@ -53,15 +53,11 @@ def process_frames(source_img, frame_paths, progress=None): def multi_process_frame(source_img, frame_paths, progress): - - # caculate the number of frames each threads processed + threads = [] num_threads = roop.globals.gpu_threads num_frames_per_thread = len(frame_paths) // num_threads remaining_frames = len(frame_paths) % num_threads - - # initialize thread list - threads = [] - + # create thread and launch start_index = 0 for _ in range(num_threads): @@ -89,10 +85,10 @@ def process_img(source_img, target_path, output_file): print("\n\nImage saved as:", output_file, "\n\n") -def process_video(source_img, frame_paths, preview_callback): +def process_video(source_img, frame_paths): progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]' with tqdm(total=len(frame_paths), desc="Processing", unit="frame", dynamic_ncols=True, bar_format=progress_bar_format) as progress: - if roop.globals.gpu_vendor == "nvidia": # multi-threading breaks in AMD + if roop.globals.gpu_vendor is not None and roop.globals.gpu_threads > 0: multi_process_frame(source_img, frame_paths, progress) else: process_frames(source_img, frame_paths, progress)