import subprocess import concurrent.futures import os import shutil import argparse import json import threading import queue from tqdm import tqdm from platformdirs import user_config_dir # --- Constants for Configuration --- APP_NAME = "eac3-transcode" APP_AUTHOR = "eac3-transcode" CONFIG_FILENAME = "options.json" # Global lock for TQDM writes to prevent interleaving from multiple threads tqdm_lock = threading.Lock() SUPPORTED_EXTENSIONS = (".mkv", ".mp4") def get_video_duration(filepath: str) -> float: """Gets the duration of a video file in seconds.""" if not shutil.which("ffprobe"): return 0.0 command = [ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", filepath ] try: process = subprocess.run(command, capture_output=True, text=True, check=True, creationflags=subprocess.CREATE_NO_WINDOW if os.name == 'nt' else 0) return float(process.stdout.strip()) except (subprocess.CalledProcessError, ValueError): return 0.0 def get_stream_info(filepath: str, stream_type: str = "audio") -> tuple[list[dict], list[str]]: """ Retrieves details for specified stream types (audio, video, subtitle) in a file. For audio, returns list of dicts with 'index', 'codec_name', 'channels', 'language'. For video/subtitle, returns list of dicts with 'index', 'codec_name'. """ logs = [] if not shutil.which("ffprobe"): logs.append(f" ⚠️ Warning: ffprobe is missing. Cannot get {stream_type} stream info for '{os.path.basename(filepath)}'.") return [], logs select_streams_option = { "audio": "a", "video": "v", "subtitle": "s" }.get(stream_type, "a") # Default to audio if type is unknown ffprobe_cmd = [ "ffprobe", "-v", "quiet", "-print_format", "json", "-show_streams", "-select_streams", select_streams_option, filepath ] try: process = subprocess.run( ffprobe_cmd, capture_output=True, text=True, check=False, creationflags=subprocess.CREATE_NO_WINDOW if os.name == 'nt' else 0 ) if process.returncode != 0: # Non-critical error for this function, main processing will decide to skip/fail return [], logs if not process.stdout.strip(): return [], logs # No streams of the selected type found data = json.loads(process.stdout) streams_details = [] for stream in data.get("streams", []): detail = { "index": stream["index"], # Absolute stream index "codec_name": stream.get("codec_name", "unknown") } if stream_type == "audio": detail["channels"] = stream.get("channels") detail["language"] = stream.get("tags", {}).get("language", "und").lower() streams_details.append(detail) return streams_details, logs except json.JSONDecodeError: logs.append(f" ⚠️ Warning: Failed to decode ffprobe JSON for {stream_type} streams in '{os.path.basename(filepath)}'.") return [], logs except Exception as e: logs.append(f" ⚠️ Error getting {stream_type} stream info for '{os.path.basename(filepath)}': {e}") return [], logs def time_str_to_seconds(time_str: str) -> float: """Converts HH:MM:SS.ms time string to seconds.""" parts = time_str.split(':') seconds = float(parts[-1]) if len(parts) > 1: seconds += int(parts[-2]) * 60 if len(parts) > 2: seconds += int(parts[-3]) * 3600 return seconds def process_file_with_ffmpeg( input_filepath: str, final_output_filepath: str | None, audio_bitrate: str, audio_processing_ops: list[dict], # [{'index':X, 'op':'transcode'/'copy', 'lang':'eng'}] duration: float, pbar_position: int ) -> tuple[bool, list[str]]: """ Processes a single video file using ffmpeg, writing to a temporary file first. """ logs = [] if not shutil.which("ffmpeg"): logs.append(" 🚨 Error: ffmpeg is not installed or not found.") return False, logs # FFMpeg will write to a temporary file, which we will rename upon success temp_output_filepath = final_output_filepath + ".tmp" base_filename = os.path.basename(input_filepath) output_filename = os.path.basename(final_output_filepath) ffmpeg_cmd = ["ffmpeg", "-nostdin", "-i", input_filepath, "-map_metadata", "0"] map_operations = [] output_audio_stream_ffmpeg_idx = 0 # For -c:a:0, -c:a:1 etc. # Map Video Streams map_operations.extend(["-map", "0:v?", "-c:v", "copy"]) # Map Subtitle Streams map_operations.extend(["-map", "0:s?", "-c:s", "copy"]) # Map Audio Streams based on operations for op_details in audio_processing_ops: map_operations.extend(["-map", f"0:{op_details['index']}"]) if op_details['op'] == 'transcode': map_operations.extend([f"-c:a:{output_audio_stream_ffmpeg_idx}", "eac3", f"-b:a:{output_audio_stream_ffmpeg_idx}", audio_bitrate, f"-ac:a:{output_audio_stream_ffmpeg_idx}", "6", f"-metadata:s:a:{output_audio_stream_ffmpeg_idx}", f"language={op_details['lang']}"]) elif op_details['op'] == 'copy': map_operations.extend([f"-c:a:{output_audio_stream_ffmpeg_idx}", "copy"]) output_audio_stream_ffmpeg_idx += 1 ffmpeg_cmd.extend(map_operations) if final_output_filepath.lower().endswith('.mkv'): ffmpeg_cmd.extend(['-f', 'matroska']) elif final_output_filepath.lower().endswith('.mp4'): ffmpeg_cmd.extend(['-f', 'mp4']) ffmpeg_cmd.extend(["-y", "-v", "quiet", "-stats_period", "1", "-progress", "pipe:1", temp_output_filepath]) logs.append(f" ⚙️ Processing: '{base_filename}' -> '{output_filename}'") process = subprocess.Popen(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, creationflags=subprocess.CREATE_NO_WINDOW if os.name == 'nt' else 0) file_pbar = None if duration > 0: file_pbar = tqdm(total=int(duration), desc=f"└─'{base_filename[:30]}…'", position=pbar_position, unit='s', leave=False, ncols=100) for line in process.stdout: if "out_time_ms" in line: try: time_us = int(line.strip().split("=")[1]) elapsed_seconds = time_us / 1_000_000 update_amount = max(0, elapsed_seconds - file_pbar.n) if update_amount > 0: file_pbar.update(update_amount) except (ValueError, IndexError): continue process.wait() file_pbar.close() if process.returncode == 0: if os.path.exists(temp_output_filepath) and os.path.getsize(temp_output_filepath) > 0: os.rename(temp_output_filepath, final_output_filepath) logs.append(f" ✅ Success: '{output_filename}' saved.") return True, logs else: logs.append(f" ⚠️ Warning: ffmpeg reported success, but temp file is missing or empty.") return False, logs else: logs.append(f" 🚨 Error during ffmpeg processing for '{base_filename}'. RC: {process.returncode}") stderr_output = process.stderr.read() if stderr_output: logs.append(f" ffmpeg stderr:\n{stderr_output.strip()}") return False, logs def process_single_file(filepath: str, pbar_position: int, args: argparse.Namespace, input_path_abs: str) -> str: """ Analyzes and processes a single file, managing temporary files for graceful exit. """ file_specific_logs = [] final_status = "failed" # Determine a display name relative to the initial input path for cleaner logs display_name = os.path.relpath(filepath, input_path_abs) if os.path.isdir(input_path_abs) else os.path.basename(filepath) file_specific_logs.append(f"▶️ Checked: '{display_name}'") target_languages = [lang.strip().lower() for lang in args.languages.split(',') if lang.strip()] audio_streams_details, get_info_logs = get_stream_info(filepath, "audio") file_specific_logs.extend(get_info_logs) audio_ops_for_ffmpeg = [] if not audio_streams_details: file_specific_logs.append(" ℹ️ No audio streams found in this file.") else: for stream in audio_streams_details: lang = stream['language'] op_to_perform = None channels_info = f"{stream.get('channels')}ch" if stream.get('channels') is not None else "N/Ach" codec_name = stream.get('codec_name', 'unknown') if lang in target_languages: is_5_1 = stream.get('channels') == 6 is_not_ac3_eac3 = codec_name not in ['ac3', 'eac3'] if is_5_1 and is_not_ac3_eac3: op_to_perform = 'transcode' file_specific_logs.append(f" 🔈 Will transcode: Audio stream #{stream['index']} ({lang}, {channels_info}, {codec_name})") else: op_to_perform = 'copy' reason_parts = [f"already {codec_name}" if codec_name in ['ac3', 'eac3'] else None, f"not 5.1 ({channels_info})" if stream.get('channels') != 6 else None] reason = ", ".join(filter(None, reason_parts)) or "meets other criteria for copying" file_specific_logs.append(f" 🔈 Will copy: Audio stream #{stream['index']} ({lang}, {channels_info}, {codec_name}) - Reason: {reason}") else: file_specific_logs.append(f" 🔈 Will drop: Audio stream #{stream['index']} ({lang}, {channels_info}, {codec_name}) - Not a target language.") if op_to_perform: audio_ops_for_ffmpeg.append({'index': stream['index'], 'op': op_to_perform, 'lang': lang}) # First, check if there are any operations at all for target languages if not audio_ops_for_ffmpeg: file_specific_logs.append(f" ⏭️ Skipping '{display_name}': No target audio streams to process (copy/transcode).") with tqdm_lock: for log_msg in file_specific_logs: tqdm.write(log_msg) final_status = "skipped_no_ops" return final_status needs_transcode = any(op['op'] == 'transcode' for op in audio_ops_for_ffmpeg) if not needs_transcode: file_specific_logs.append(f" ⏭️ Skipping '{display_name}': No transcoding required.") with tqdm_lock: for log_msg in file_specific_logs: tqdm.write(log_msg) final_status = "skipped_no_transcode" return final_status # Determine final output path name, ext = os.path.splitext(os.path.basename(filepath)) output_filename = f"{name}_eac3{ext}" output_dir_for_this_file = os.path.dirname(filepath) # Default to same directory if args.output_directory_base: # Input was a folder if os.path.isdir(input_path_abs): relative_dir = os.path.relpath(os.path.dirname(filepath), start=input_path_abs) output_dir_for_this_file = os.path.join(args.output_directory_base, relative_dir) if relative_dir != "." else args.output_directory_base else: # Input was a single file output_dir_for_this_file = args.output_directory_base final_output_filepath = os.path.join(output_dir_for_this_file, output_filename) # Check if the output file already exists and we are NOT forcing reprocessing. if os.path.exists(final_output_filepath) and not args.force_reprocess: file_specific_logs.append(f" ⏭️ Skipping: Output file already exists. Use --force-reprocess to override.") with tqdm_lock: for log_msg in file_specific_logs: tqdm.write(log_msg) final_status = "skipped_existing" return final_status # Check for identical paths before starting if os.path.abspath(filepath) == os.path.abspath(final_output_filepath): file_specific_logs.append(f" ⚠️ Warning: Input and output paths are identical. Skipping.") with tqdm_lock: for log_msg in file_specific_logs: tqdm.write(log_msg) final_status = "skipped_identical_path" return final_status if args.dry_run: file_specific_logs.append(f" DRY RUN: Would process '{display_name}'. No changes will be made.") with tqdm_lock: for log_msg in file_specific_logs: tqdm.write(log_msg) # We return 'processed' to indicate it *would* have been processed final_status = "processed" return final_status # Ensure output directory exists before processing if not os.path.isdir(output_dir_for_this_file): try: os.makedirs(output_dir_for_this_file, exist_ok=True) except OSError as e: file_specific_logs.append(f" 🚨 Error creating output directory '{output_dir_for_this_file}': {e}") with tqdm_lock: for log_msg in file_specific_logs: tqdm.write(log_msg) return "failed" duration = get_video_duration(filepath) if duration == 0: file_specific_logs.append(f" ⚠️ Could not determine duration for '{display_name}'. Per-file progress will not be shown.") temp_filepath = final_output_filepath + ".tmp" try: success, ffmpeg_logs = process_file_with_ffmpeg(filepath, final_output_filepath, args.audio_bitrate, audio_ops_for_ffmpeg, duration, pbar_position) file_specific_logs.extend(ffmpeg_logs) final_status = "processed" if success else "failed" finally: # This block will run whether the try block succeeded, failed, or was interrupted. if os.path.exists(temp_filepath): try: os.remove(temp_filepath) except OSError as e: file_specific_logs.append(f" 🚨 Error cleaning up temp file '{temp_filepath}': {e}") with tqdm_lock: # Print all logs for this file at the very end of its processing for log_msg in file_specific_logs: tqdm.write(log_msg) return final_status # Worker initializer to assign a unique position to each worker's progress bar def worker_init(worker_id_queue): threading.current_thread().worker_id = worker_id_queue.get() def main(): # Initial check for ffmpeg and ffprobe if not shutil.which("ffmpeg") or not shutil.which("ffprobe"): missing_tools = [] if not shutil.which("ffmpeg"): missing_tools.append("ffmpeg") if not shutil.which("ffprobe"): missing_tools.append("ffprobe") print(f"🚨 Error: {', '.join(missing_tools)} is not installed or not found in your system's PATH. Please install ffmpeg.") return parser = argparse.ArgumentParser( description="Advanced video transcoder: E-AC3 for specific audio, language filtering, folder processing.", formatter_class=argparse.RawTextHelpFormatter ) parser.add_argument( "-i", "--input", required=True, help="Path to the input video file or folder.", dest="input_path" ) parser.add_argument( "-o", "--outdir", help="Optional. Base directory to save processed files.\n" "If input is a folder, source structure is replicated under this directory.\n" "If not set, processed files are saved alongside originals.", dest="output_directory_base", default=None ) parser.add_argument( "-br", "--bitrate", help="Audio bitrate for E-AC3 (e.g., '640k', '1536k'). Defaults to '1536k'.", dest="audio_bitrate", default="1536k" ) parser.add_argument( "-l", "--langs", help="Comma-separated list of 3-letter audio languages to keep (e.g., 'eng,jpn').\nDefaults to 'eng,jpn'.", dest="languages", default="eng,jpn" ) parser.add_argument( "-j", "--jobs", type=int, default=os.cpu_count(), # Default to the number of CPU cores help=f"Number of files to process in parallel. Defaults to the number of CPU cores on your system ({os.cpu_count()})." ) parser.add_argument( "--dry-run", action="store_true", # Makes it a flag, e.g., --dry-run help="Analyze files and report actions without executing ffmpeg." ) parser.add_argument( "--force-reprocess", action="store_true", help="Force reprocessing of all files, even if an output file with the target name already exists." ) # --- Configuration File Logic --- config = {} user_config_dir_path = user_config_dir(APP_NAME, APP_AUTHOR) user_config_file_path = os.path.join(user_config_dir_path, CONFIG_FILENAME) if not os.path.exists(user_config_file_path): try: defaults = {action.dest: action.default for action in parser._actions if action.dest != "help" and not action.required} os.makedirs(user_config_dir_path, exist_ok=True) with open(user_config_file_path, 'w') as f: json.dump(defaults, f, indent=4) print(f"✅ Created default configuration at: {user_config_file_path}") except Exception as e: print(f"⚠️ Warning: Could not create default config at '{user_config_file_path}': {e}") potential_paths = [os.path.join(os.getcwd(), CONFIG_FILENAME), user_config_file_path] loaded_config_path = None for path in potential_paths: if os.path.exists(path): try: with open(path, 'r') as f: config = json.load(f) loaded_config_path = path break except (json.JSONDecodeError, IOError) as e: print(f"⚠️ Warning: Could not read or parse config at '{path}': {e}") break parser.set_defaults(**config) args = parser.parse_args() if loaded_config_path: print(f"✅ Loaded configuration from: {loaded_config_path}") if args.dry_run: print("--- DRY RUN MODE ENABLED: No files will be modified. ---") # --- File Discovery --- input_path_abs = os.path.abspath(args.input_path) files_to_process_paths = [] if os.path.isdir(input_path_abs): print(f"📁 Scanning folder: {input_path_abs}") for root, _, filenames in os.walk(input_path_abs): for filename in filenames: if filename.lower().endswith(SUPPORTED_EXTENSIONS): files_to_process_paths.append(os.path.join(root, filename)) if not files_to_process_paths: print(" No .mkv or .mp4 files found in the specified folder.") elif os.path.isfile(input_path_abs): if input_path_abs.lower().endswith((".mkv", ".mp4")): files_to_process_paths.append(input_path_abs) else: print(f"⚠️ Provided file '{args.input_path}' is not an .mkv or .mp4 file. Skipping this input.") else: print(f"🚨 Error: Input path '{args.input_path}' is not a valid file or directory.") return if not files_to_process_paths: print("No files to process.") return print(f"\nFound {len(files_to_process_paths)} file(s) to potentially process...") # Initialize stats counters stats = { "processed": 0, "skipped_no_ops": 0, "skipped_no_transcode": 0, "skipped_identical_path": 0, "skipped_existing": 0, "failed": 0 } worker_id_queue = queue.Queue() for i in range(args.jobs): worker_id_queue.put(i + 1) try: with tqdm(total=len(files_to_process_paths), desc="Overall Progress", unit="file", ncols=100, smoothing=0.1, position=0, leave=True) as pbar: with concurrent.futures.ThreadPoolExecutor(max_workers=args.jobs, initializer=worker_init, initargs=(worker_id_queue,)) as executor: def submit_task(filepath): worker_id = threading.current_thread().worker_id return process_single_file(filepath, worker_id, args, input_path_abs) future_to_path = {executor.submit(submit_task, path): path for path in files_to_process_paths} for future in concurrent.futures.as_completed(future_to_path): path = future_to_path[future] try: status = future.result() if status in stats: stats[status] += 1 else: stats["failed"] += 1 with tqdm_lock: tqdm.write(f"🚨 UNKNOWN STATUS '{status}' for '{os.path.basename(path)}'.") except Exception as exc: with tqdm_lock: tqdm.write(f"🚨 CRITICAL ERROR during task for '{os.path.basename(path)}': {exc}") stats["failed"] += 1 finally: pbar.update(1) except KeyboardInterrupt: print("\n\n🚨 Process interrupted by user. Shutting down gracefully... Any in-progress files have been cleaned up.") # The 'finally' blocks in each thread will handle cleanup. # Exiting here. return # Print summary of operations summary_title = "--- Dry Run Summary ---" if args.dry_run else "--- Processing Summary ---" processed_label = "Would be processed" if args.dry_run else "Successfully processed" print() print(f"\n{summary_title}") print(f"Total files checked: {len(files_to_process_paths)}") print(f"✅ {processed_label}: {stats['processed']}") total_skipped = stats['skipped_no_ops'] + stats['skipped_no_transcode'] + stats['skipped_identical_path'] + stats['skipped_existing'] print(f"⏭️ Total Skipped: {total_skipped}") if total_skipped > 0: print(f" - No target audio operations: {stats['skipped_no_ops']}") print(f" - No transcoding required (all copy): {stats['skipped_no_transcode']}") print(f" - Identical input/output path: {stats['skipped_identical_path']}") print(f" - Output file already exists: {stats['skipped_existing']}") print(f"🚨 Failed to process: {stats['failed']}") print("--------------------------")