From 6e93abf7fbb8de4a10cfc4a921021ae62e6a4427 Mon Sep 17 00:00:00 2001 From: laowang Date: Thu, 20 Nov 2025 14:28:23 +0800 Subject: [PATCH] first --- README.md | 38 ++++++++++ translate_folders.py | 128 +++++++++++++++++++++++++++++++++ translate_sfx.py | 168 +++++++++++++++++++++++++++++++++++++++++++ wav_to_mp3.py | 156 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 490 insertions(+) create mode 100644 README.md create mode 100644 translate_folders.py create mode 100644 translate_sfx.py create mode 100644 wav_to_mp3.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..3c15645 --- /dev/null +++ b/README.md @@ -0,0 +1,38 @@ +# SFX-Library-Automation 🎧 +**基于 Ollama 本地大模型与 Python 的音效库整理工具集:自动汉化文件名、目录名,及多线程压缩瘦身。** + +## ✨ 功能特性 (Features) + +这是我为了整理几百 GB 英文音效库而编写的三个 Python 脚本,旨在解决“英文文件名难检索”和“WAV文件占用空间大”的痛点。 + +1. **🤖 智能汉化文件名 (`translate_sfx.py`)** + * 使用本地 Ollama API (推荐 Gemma/Llama3) 进行翻译。 + * 理解音效术语 (如 `Whoosh` -> `嗖嗖声`, `Impact` -> `撞击`)。 + * **格式**:`【中文翻译】原始英文名.wav`,保留原文以便对照。 + * 支持断点续传(跳过已汉化文件)。 + +2. **📂 智能汉化文件夹 (`translate_folders.py`)** + * 采用 Bottom-Up (倒序) 逻辑,安全处理多层级嵌套目录。 + * 智能识别并跳过已包含中文的目录,保护根目录结构。 + +3. **⚡ 多线程音频压缩 (`wav_to_mp3.py`)** + * 利用 `concurrent.futures` 跑满 CPU 核心。 + * 实测 16 线程下,处理 500+ 文件仅需 20多秒。 + * 推荐 **320kbps MP3**,在缩减 75% 体积的同时保留高频细节。 + * **安全模式**:只有在 MP3 生成且校验成功后,才询问是否删除原 WAV。 + +## 🚀 快速开始 (Quick Start) + +### 依赖 +* Python 3.x +* `pip install requests` +* [Ollama](https://ollama.com/) (运行中, 默认端口 11434) +* [FFmpeg](https://ffmpeg.org/) (需添加到环境变量或放置在脚本同级目录) + +### 使用 +1. 启动 Ollama: `ollama run gemma3:12b` (或其他模型) +2. 运行脚本: `python translate_sfx.py` +3. 拖入文件夹路径,按提示操作即可。建议先选择 **模式1 (模拟/Dry Run)** 查看效果。 + +--- +*Made with ❤️ for Sound Designers & Editors.* \ No newline at end of file diff --git a/translate_folders.py b/translate_folders.py new file mode 100644 index 0000000..514972d --- /dev/null +++ b/translate_folders.py @@ -0,0 +1,128 @@ +import os +import requests +import sys + +# ================= 配置区域 ================= +MODEL_NAME = "gemma3:12b" +OLLAMA_API_URL = "http://localhost:11434/api/chat" + +# 翻译缓存 +translation_cache = {} + +# =========================================== + +def get_translation(text, model): + if text in translation_cache: + return translation_cache[text] + + system_prompt = ( + "You are a professional translator for Audio/Video asset libraries. " + "Translate the category/folder name into concise Simplified Chinese. " + "Rules: 1. Only output the Chinese translation. 2. Do not explain. " + "3. Keep terms like 'SFX', 'BGM', 'Foley' accurate." + ) + + payload = { + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": f'Translate folder name: "{text}"'} + ], + "stream": False, + "options": {"temperature": 0.1} + } + + try: + response = requests.post(OLLAMA_API_URL, json=payload, timeout=60) + if response.status_code == 200: + translated_text = response.json()['message']['content'].strip() + # 清洗特殊字符 + for char in '<>:"/\|?*\'"“”。': + translated_text = translated_text.replace(char, '') + + if translated_text: + translation_cache[text] = translated_text + return translated_text + except: + pass + return None + +def is_already_chinese(text): + """只要包含任意一个中文字符,就认为是中文命名的""" + for char in text: + if '\u4e00' <= char <= '\u9fff': + return True + return False + +def process_folder_renaming(): + while True: + target_dir = input("\n请拖入(或粘贴)要处理文件夹的【根目录路径】: ").strip().strip('"').strip("'") + if os.path.exists(target_dir): + break + print("❌ 路径不存在,请重试。") + + print("\n请选择模式:") + print("1. 仅模拟 (推荐先看一眼)") + print("2. 直接执行") + is_dry_run = input("请输入数字 (默认1): ").strip() != '2' + + if not is_dry_run: + print("\n>>> ⚠️ 警告:即将修改文件夹名称! <<<") + + count = 0 + success_count = 0 + skipped_chinese_count = 0 + + print(f"\n正在扫描并分析目录结构 (倒序模式)...") + + # topdown=False 确保先处理最里面的子文件夹 + for root, dirs, files in os.walk(target_dir, topdown=False): + for dirname in dirs: + + # 【核心保护机制】 + # 如果文件夹名字里已经有中文,直接跳过,并在控制台显示灰色/提示信息 + if is_already_chinese(dirname): + # 这里为了不刷屏,只在 dry_run 或者特定情况下打印,或者只计数 + # 既然你担心根目录被改,我们可以把跳过的信息打印出来让你放心 + # print(f"[跳过-已含中文] {dirname}") + skipped_chinese_count += 1 + continue + + count += 1 + print(f"[{count}] 正在思考: {dirname} ...", end="\r") + + cn_name = get_translation(dirname, MODEL_NAME) + padding = " " * 40 + + if cn_name: + new_dirname = f"【{cn_name}】{dirname}" + + old_path = os.path.join(root, dirname) + new_path = os.path.join(root, new_dirname) + + if is_dry_run: + print(f"[模拟] {dirname} -> 【{cn_name}】{dirname}{padding}") + else: + try: + os.rename(old_path, new_path) + print(f"[成功] {new_dirname}{padding}") + success_count += 1 + except Exception as e: + print(f"[失败] {dirname}: {e}{padding}") + + print("\n" + "-"*30) + print(f"✅ 完成!") + print(f" - 翻译并重命名: {success_count} 个文件夹") + print(f" - 智能避开已汉化目录: {skipped_chinese_count} 个") + +def main(): + print("=== 子目录智能汉化工具 (保护根目录版) ===") + print(f"当前模型: {MODEL_NAME}") + + while True: + process_folder_renaming() + if input("\n🔄 是否继续处理其他位置?(y/n): ").lower() != 'y': + break + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/translate_sfx.py b/translate_sfx.py new file mode 100644 index 0000000..7405339 --- /dev/null +++ b/translate_sfx.py @@ -0,0 +1,168 @@ +import os +import requests +import json +import sys + +# ================= 配置区域 ================= +# 你的 Ollama 模型名称 +MODEL_NAME = "gemma3:12b" + +# Ollama API 地址 +OLLAMA_API_URL = "http://localhost:11434/api/chat" + +# 需要处理的文件后缀 +EXTENSIONS = ('.wav', '.mp3', '.flac', '.aiff', '.ogg', '.m4a') + +# 翻译缓存 (保留在内存中,处理下一个文件夹时如果有重复词,速度会飞快) +translation_cache = {} + +# =========================================== + +def get_translation_via_requests(filename, model): + """使用 requests 直接调用 Ollama 接口""" + if filename in translation_cache: + return translation_cache[filename] + + system_prompt = ( + "You are a professional translator for Audio Sound Effects (SFX) libraries. " + "Your task is to translate English filenames into concise Simplified Chinese. " + "Rules: 1. Only output the Chinese translation. 2. Do not explain. 3. Keep technical terms accurate (e.g., 'Whoosh' -> '嗖嗖声')." + ) + + user_prompt = f'Translate this filename: "{filename}"' + + payload = { + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ], + "stream": False, + "options": { + "temperature": 0.1 + } + } + + try: + response = requests.post(OLLAMA_API_URL, json=payload, timeout=60) + response.raise_for_status() + + result_json = response.json() + translated_text = result_json['message']['content'].strip() + + invalid_chars = '<>:"/\|?*\'"“”。' + for char in invalid_chars: + translated_text = translated_text.replace(char, '') + + if not translated_text: + return None + + translation_cache[filename] = translated_text + return translated_text + + except Exception as e: + # 这里不打印网络报错细节,以免刷屏,只返回None跳过 + return None + +def is_already_chinese(text): + """检查文件名里是否已经包含中文""" + for char in text: + if '\u4e00' <= char <= '\u9fff': + return True + return False + +def process_folder(): + """单个文件夹处理流程""" + + # 1. 获取路径 + while True: + target_dir = input("\n请拖入(或粘贴)你要处理的【文件夹路径】: ").strip() + target_dir = target_dir.strip('"').strip("'") # 去除引号 + + if os.path.exists(target_dir): + break + else: + print("❌ 错误:路径不存在,请重新输入。") + + # 2. 选择模式 + print("\n请选择模式:") + print("1. 仅模拟 (只打印不修改)") + print("2. 直接执行 (修改文件名)") + mode_choice = input("请输入数字 (默认1): ").strip() + + is_dry_run = True + if mode_choice == '2': + is_dry_run = False + print("\n>>> ⚠️ 警告:即将开始【真实修改】文件名! <<<") + else: + print("\n>>> 🛡️ 模拟模式:不会修改任何文件 <<<") + + # 3. 扫描与处理 + count = 0 + success_count = 0 + print(f"\n正在扫描目录: {target_dir} ...\n") + + for root, dirs, files in os.walk(target_dir): + for file in files: + if file.lower().endswith(EXTENSIONS): + old_name_with_ext = file + name_part, ext_part = os.path.splitext(old_name_with_ext) + + if is_already_chinese(name_part): + continue + + count += 1 + # 动态显示进度 + print(f"[{count}] 正在思考: {name_part} ...", end="\r") + + cn_name = get_translation_via_requests(name_part, MODEL_NAME) + + padding = " " * 30 # 用于覆盖之前的打印内容 + + if cn_name: + # 格式:【中文】英文.wav + new_name_with_ext = f"【{cn_name}】{old_name_with_ext}" + old_path = os.path.join(root, old_name_with_ext) + new_path = os.path.join(root, new_name_with_ext) + + if is_dry_run: + print(f"[模拟] {name_part} -> 【{cn_name}】{padding}") + else: + try: + os.rename(old_path, new_path) + print(f"[成功] {new_name_with_ext}{padding}") + success_count += 1 + except Exception as e: + print(f"[失败] {e}{padding}") + else: + # 如果翻译失败,不需要刷屏,静默跳过或简单提示 + pass + + print("\n" + "-"*30) + print(f"✅ 本次任务完成!扫描: {count} 个,成功重命名: {success_count} 个。") + +def main(): + print("=============================================") + print(" 音效文件名智能翻译工具 (Gemma + Ollama)") + print("=============================================") + print(f"当前模型: {MODEL_NAME}") + + while True: + process_folder() + + # 循环询问 + print("\n" + "="*30) + choice = input("🔄 是否继续处理其他目录?(y/n): ").strip().lower() + + if choice == 'y': + print("\n" * 2) # 空两行,视觉上分隔 + continue + else: + print("\n👋 再见!") + break + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n\n用户强制停止,程序退出。") \ No newline at end of file diff --git a/wav_to_mp3.py b/wav_to_mp3.py new file mode 100644 index 0000000..71f83d4 --- /dev/null +++ b/wav_to_mp3.py @@ -0,0 +1,156 @@ +import os +import subprocess +import concurrent.futures +import time +import sys + +# ================= 配置区域 ================= +# 目标码率: '192k' 或 '320k' (推荐 320k) +BITRATE = '320k' + +# 并发线程数 (根据你的CPU核心数自动设定,也可以手动填数字,比如 4) +MAX_WORKERS = os.cpu_count() +# =========================================== + +def check_ffmpeg(): + """检查 ffmpeg 是否可用""" + try: + subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) + return True + except (subprocess.CalledProcessError, FileNotFoundError): + return False + +def convert_single_file(file_info): + """单个文件转换逻辑""" + wav_path, mp3_path = file_info + + # 如果 MP3 已经存在且大小不为0,跳过 + if os.path.exists(mp3_path) and os.path.getsize(mp3_path) > 0: + return "skipped" + + # 调用 ffmpeg 转换 + # -i 输入 + # -b:a 码率 + # -map_metadata 0 保留元数据 + # -y 覆盖输出 + # -v error 静默模式,只报错 + cmd = [ + "ffmpeg", "-i", wav_path, + "-codec:a", "libmp3lame", + "-b:a", BITRATE, + "-map_metadata", "0", + "-y", "-v", "error", + mp3_path + ] + + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if result.returncode == 0: + return "success" + else: + return f"error: {result.stderr.decode('utf-8', errors='ignore')}" + except Exception as e: + return f"exception: {str(e)}" + +def main(): + print("=============================================") + print(f" WAV 转 MP3 批量压缩工具 (多线程版)") + print(f" 目标码率: {BITRATE}") + print("=============================================") + + if not check_ffmpeg(): + print("❌ 错误:找不到 ffmpeg。") + print("请下载 ffmpeg.exe 并将其放到本脚本同一目录下,或者添加到系统环境变量。") + return + + target_dir = input("\n请拖入要压缩的文件夹路径: ").strip().strip('"').strip("'") + + if not os.path.exists(target_dir): + print("路径不存在。") + return + + # 1. 扫描所有 wav 文件 + print(f"\n正在扫描 WAV 文件...") + tasks = [] + wav_files_list = [] # 用于后续统计和删除 + + for root, dirs, files in os.walk(target_dir): + for file in files: + if file.lower().endswith(".wav"): + wav_path = os.path.join(root, file) + # 构造 MP3 路径 (同目录下,后缀变 .mp3) + mp3_path = os.path.splitext(wav_path)[0] + ".mp3" + + tasks.append((wav_path, mp3_path)) + wav_files_list.append(wav_path) + + total_files = len(tasks) + if total_files == 0: + print("没有找到 WAV 文件。") + return + + print(f"找到 {total_files} 个 WAV 文件,准备开始转换...") + print(f"火力全开,使用 {MAX_WORKERS} 个线程并发处理...") + + start_time = time.time() + + # 2. 多线程执行转换 + success_count = 0 + skipped_count = 0 + error_count = 0 + + with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: + # 提交任务 + future_to_file = {executor.submit(convert_single_file, task): task for task in tasks} + + # 处理结果进度条 + finished = 0 + for future in concurrent.futures.as_completed(future_to_file): + result = future.result() + finished += 1 + + if result == "success": + success_count += 1 + elif result == "skipped": + skipped_count += 1 + else: + error_count += 1 + # 如果出错,打印出错的文件名 + wav_p = future_to_file[future][0] + print(f"\n[失败] {os.path.basename(wav_p)} -> {result}") + + # 简单的进度显示 + print(f"进度: {finished}/{total_files} | 成功: {success_count} | 跳过: {skipped_count} | 失败: {error_count}", end="\r") + + end_time = time.time() + duration = end_time - start_time + + print(f"\n\n转换完成!耗时: {duration:.2f} 秒") + + # 3. 清理旧文件询问 + if success_count > 0 or skipped_count > 0: + print("="*40) + print("【清理阶段】") + print(f"转换已完成。你现在的文件夹里同时存在 .wav 和 .mp3 文件。") + choice = input(f"⚠️ 是否删除原有的 {len(wav_files_list)} 个 WAV 文件以释放空间?(输入 'DELETE' 确认): ").strip() + + if choice == 'DELETE': + print("正在删除 WAV 源文件...") + deleted_num = 0 + for wav_p in wav_files_list: + try: + # 再次确认对应的 MP3 真的存在,防止误删 + mp3_p = os.path.splitext(wav_p)[0] + ".mp3" + if os.path.exists(mp3_p) and os.path.getsize(mp3_p) > 0: + os.remove(wav_p) + deleted_num += 1 + else: + print(f"[跳过删除] 对应的 MP3 不存在或大小异常: {wav_p}") + except Exception as e: + print(f"[删除失败] {wav_p}: {e}") + print(f"清理完毕!共删除了 {deleted_num} 个 WAV 文件。") + else: + print("已保留 WAV 原文件。") + +if __name__ == "__main__": + main() \ No newline at end of file