diff --git a/roop/core.py b/roop/core.py index b4ddab0..a2ae5a1 100755 --- a/roop/core.py +++ b/roop/core.py @@ -45,8 +45,8 @@ def parse_args() -> None: parser.add_argument('--video-encoder', help='adjust output video encoder', dest='video_encoder', default='libx264') parser.add_argument('--video-quality', help='adjust output video quality', dest='video_quality', type=int, default=18) parser.add_argument('--max-memory', help='maximum amount of RAM in GB to be used', dest='max_memory', type=int) - parser.add_argument('--cpu-cores', help='number of CPU cores to use', dest='cpu_cores', type=int, default=max(psutil.cpu_count() / 2, 1)) - parser.add_argument('--gpu-threads', help='number of threads to be use for the GPU', dest='gpu_threads', type=int, default=8) + parser.add_argument('--cpu-cores', help='number of CPU cores to use', dest='cpu_cores', type=int, default=suggest_cpu_cores()) + parser.add_argument('--gpu-threads', help='number of threads to be use for the GPU', dest='gpu_threads', type=int, default=suggest_gpu_threads()) parser.add_argument('--gpu-vendor', help='select your GPU vendor', dest='gpu_vendor', choices=['apple', 'amd', 'nvidia']) args = parser.parse_known_args()[0] @@ -61,20 +61,8 @@ def parse_args() -> None: roop.globals.many_faces = args.many_faces roop.globals.video_encoder = args.video_encoder roop.globals.video_quality = args.video_quality - - if args.cpu_cores: - roop.globals.cpu_cores = int(args.cpu_cores) - - # cpu thread fix for mac - if sys.platform == 'darwin': - roop.globals.cpu_cores = 1 - - if args.gpu_threads: - roop.globals.gpu_threads = int(args.gpu_threads) - - # gpu thread fix for amd - if args.gpu_vendor == 'amd': - roop.globals.gpu_threads = 1 + roop.globals.cpu_cores = args.cpu_cores + roop.globals.gpu_threads = args.gpu_threads if args.gpu_vendor: roop.globals.gpu_vendor = args.gpu_vendor @@ -82,7 +70,19 @@ def parse_args() -> None: roop.globals.providers = ['CPUExecutionProvider'] -def limit_resources(): +def suggest_gpu_threads() -> int: + if 'ROCMExecutionProvider' in roop.globals.providers: + return 2 + return 8 + + +def suggest_cpu_cores() -> int: + if sys.platform == 'darwin': + return 2 + return int(max(psutil.cpu_count() / 2, 1)) + + +def limit_resources() -> None: # prevent tensorflow memory leak gpus = tensorflow.config.experimental.list_physical_devices('GPU') for gpu in gpus: @@ -98,7 +98,7 @@ def limit_resources(): resource.setrlimit(resource.RLIMIT_DATA, (memory, memory)) -def pre_check(): +def pre_check() -> None: if sys.version_info < (3, 9): quit('Python version is not supported - please upgrade to 3.9 or higher.') if not shutil.which('ffmpeg'): @@ -128,23 +128,21 @@ def pre_check(): def conditional_process_video(source_path: str, frame_paths: List[str]) -> None: pool_amount = len(frame_paths) // roop.globals.cpu_cores if pool_amount > 2 and roop.globals.cpu_cores > 1 and roop.globals.gpu_vendor is None: - update_status('Pool-Swapping in progress...') global POOL POOL = multiprocessing.Pool(roop.globals.cpu_cores, maxtasksperchild=1) pools = [] for i in range(0, len(frame_paths), pool_amount): - pool = POOL.apply_async(process_video, args=(source_path, frame_paths[i:i + pool_amount])) + pool = POOL.apply_async(process_video, args=(source_path, frame_paths[i:i + pool_amount], 'cpu')) pools.append(pool) for pool in pools: pool.get() POOL.close() POOL.join() else: - update_status('Swapping in progress...') - process_video(roop.globals.source_path, frame_paths) + process_video(roop.globals.source_path, frame_paths, 'gpu') -def update_status(message: str): +def update_status(message: str) -> None: value = 'Status: ' + message print(value) if not roop.globals.headless: @@ -181,6 +179,7 @@ def start() -> None: update_status('Extracting frames...') extract_frames(roop.globals.target_path) frame_paths = get_temp_frames_paths(roop.globals.target_path) + update_status('Swapping in progress...') conditional_process_video(roop.globals.source_path, frame_paths) # prevent memory leak using ffmpeg with cuda if roop.globals.gpu_vendor == 'nvidia': diff --git a/roop/swapper.py b/roop/swapper.py index 8f4e4ac..1152423 100644 --- a/roop/swapper.py +++ b/roop/swapper.py @@ -1,4 +1,3 @@ - import os from tqdm import tqdm import cv2 @@ -83,12 +82,13 @@ def process_image(source_img, target_path, output_file): cv2.imwrite(output_file, result) -def process_video(source_img, frame_paths): - do_multi = roop.globals.gpu_vendor is not None and roop.globals.gpu_threads > 1 +def process_video(source_path, frame_paths, mode: str): progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]' total = len(frame_paths) with tqdm(total=total, desc='Processing', unit='frame', dynamic_ncols=True, bar_format=progress_bar_format) as progress: - if do_multi: - multi_process_frame(source_img, frame_paths, progress) - else: - process_frames(source_img, frame_paths, progress) + if mode == 'cpu': + progress.set_postfix({'mode': mode, 'cores': roop.globals.cpu_cores}) + process_frames(source_path, frame_paths, progress) + elif mode == 'gpu': + progress.set_postfix({'mode': mode, 'threads': roop.globals.gpu_threads}) + multi_process_frame(source_path, frame_paths, progress)