This commit is contained in:
2025-11-20 14:28:23 +08:00
commit 6e93abf7fb
4 changed files with 490 additions and 0 deletions

38
README.md Normal file
View File

@@ -0,0 +1,38 @@
# SFX-Library-Automation 🎧
**基于 Ollama 本地大模型与 Python 的音效库整理工具集:自动汉化文件名、目录名,及多线程压缩瘦身。**
## ✨ 功能特性 (Features)
这是我为了整理几百 GB 英文音效库而编写的三个 Python 脚本旨在解决“英文文件名难检索”和“WAV文件占用空间大”的痛点。
1. **🤖 智能汉化文件名 (`translate_sfx.py`)**
* 使用本地 Ollama API (推荐 Gemma/Llama3) 进行翻译。
* 理解音效术语 (如 `Whoosh` -> `嗖嗖声`, `Impact` -> `撞击`)。
* **格式**`【中文翻译】原始英文名.wav`,保留原文以便对照。
* 支持断点续传(跳过已汉化文件)。
2. **📂 智能汉化文件夹 (`translate_folders.py`)**
* 采用 Bottom-Up (倒序) 逻辑,安全处理多层级嵌套目录。
* 智能识别并跳过已包含中文的目录,保护根目录结构。
3. **⚡ 多线程音频压缩 (`wav_to_mp3.py`)**
* 利用 `concurrent.futures` 跑满 CPU 核心。
* 实测 16 线程下,处理 500+ 文件仅需 20多秒。
* 推荐 **320kbps MP3**,在缩减 75% 体积的同时保留高频细节。
* **安全模式**:只有在 MP3 生成且校验成功后,才询问是否删除原 WAV。
## 🚀 快速开始 (Quick Start)
### 依赖
* Python 3.x
* `pip install requests`
* [Ollama](https://ollama.com/) (运行中, 默认端口 11434)
* [FFmpeg](https://ffmpeg.org/) (需添加到环境变量或放置在脚本同级目录)
### 使用
1. 启动 Ollama: `ollama run gemma3:12b` (或其他模型)
2. 运行脚本: `python translate_sfx.py`
3. 拖入文件夹路径,按提示操作即可。建议先选择 **模式1 (模拟/Dry Run)** 查看效果。
---
*Made with ❤️ for Sound Designers & Editors.*

128
translate_folders.py Normal file
View File

@@ -0,0 +1,128 @@
import os
import requests
import sys
# ================= 配置区域 =================
MODEL_NAME = "gemma3:12b"
OLLAMA_API_URL = "http://localhost:11434/api/chat"
# 翻译缓存
translation_cache = {}
# ===========================================
def get_translation(text, model):
if text in translation_cache:
return translation_cache[text]
system_prompt = (
"You are a professional translator for Audio/Video asset libraries. "
"Translate the category/folder name into concise Simplified Chinese. "
"Rules: 1. Only output the Chinese translation. 2. Do not explain. "
"3. Keep terms like 'SFX', 'BGM', 'Foley' accurate."
)
payload = {
"model": model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": f'Translate folder name: "{text}"'}
],
"stream": False,
"options": {"temperature": 0.1}
}
try:
response = requests.post(OLLAMA_API_URL, json=payload, timeout=60)
if response.status_code == 200:
translated_text = response.json()['message']['content'].strip()
# 清洗特殊字符
for char in '<>:"/\|?*\'"“”。':
translated_text = translated_text.replace(char, '')
if translated_text:
translation_cache[text] = translated_text
return translated_text
except:
pass
return None
def is_already_chinese(text):
"""只要包含任意一个中文字符,就认为是中文命名的"""
for char in text:
if '\u4e00' <= char <= '\u9fff':
return True
return False
def process_folder_renaming():
while True:
target_dir = input("\n请拖入(或粘贴)要处理文件夹的【根目录路径】: ").strip().strip('"').strip("'")
if os.path.exists(target_dir):
break
print("❌ 路径不存在,请重试。")
print("\n请选择模式:")
print("1. 仅模拟 (推荐先看一眼)")
print("2. 直接执行")
is_dry_run = input("请输入数字 (默认1): ").strip() != '2'
if not is_dry_run:
print("\n>>> ⚠️ 警告:即将修改文件夹名称! <<<")
count = 0
success_count = 0
skipped_chinese_count = 0
print(f"\n正在扫描并分析目录结构 (倒序模式)...")
# topdown=False 确保先处理最里面的子文件夹
for root, dirs, files in os.walk(target_dir, topdown=False):
for dirname in dirs:
# 【核心保护机制】
# 如果文件夹名字里已经有中文,直接跳过,并在控制台显示灰色/提示信息
if is_already_chinese(dirname):
# 这里为了不刷屏,只在 dry_run 或者特定情况下打印,或者只计数
# 既然你担心根目录被改,我们可以把跳过的信息打印出来让你放心
# print(f"[跳过-已含中文] {dirname}")
skipped_chinese_count += 1
continue
count += 1
print(f"[{count}] 正在思考: {dirname} ...", end="\r")
cn_name = get_translation(dirname, MODEL_NAME)
padding = " " * 40
if cn_name:
new_dirname = f"{cn_name}{dirname}"
old_path = os.path.join(root, dirname)
new_path = os.path.join(root, new_dirname)
if is_dry_run:
print(f"[模拟] {dirname} -> 【{cn_name}{dirname}{padding}")
else:
try:
os.rename(old_path, new_path)
print(f"[成功] {new_dirname}{padding}")
success_count += 1
except Exception as e:
print(f"[失败] {dirname}: {e}{padding}")
print("\n" + "-"*30)
print(f"✅ 完成!")
print(f" - 翻译并重命名: {success_count} 个文件夹")
print(f" - 智能避开已汉化目录: {skipped_chinese_count}")
def main():
print("=== 子目录智能汉化工具 (保护根目录版) ===")
print(f"当前模型: {MODEL_NAME}")
while True:
process_folder_renaming()
if input("\n🔄 是否继续处理其他位置?(y/n): ").lower() != 'y':
break
if __name__ == "__main__":
main()

168
translate_sfx.py Normal file
View File

@@ -0,0 +1,168 @@
import os
import requests
import json
import sys
# ================= 配置区域 =================
# 你的 Ollama 模型名称
MODEL_NAME = "gemma3:12b"
# Ollama API 地址
OLLAMA_API_URL = "http://localhost:11434/api/chat"
# 需要处理的文件后缀
EXTENSIONS = ('.wav', '.mp3', '.flac', '.aiff', '.ogg', '.m4a')
# 翻译缓存 (保留在内存中,处理下一个文件夹时如果有重复词,速度会飞快)
translation_cache = {}
# ===========================================
def get_translation_via_requests(filename, model):
"""使用 requests 直接调用 Ollama 接口"""
if filename in translation_cache:
return translation_cache[filename]
system_prompt = (
"You are a professional translator for Audio Sound Effects (SFX) libraries. "
"Your task is to translate English filenames into concise Simplified Chinese. "
"Rules: 1. Only output the Chinese translation. 2. Do not explain. 3. Keep technical terms accurate (e.g., 'Whoosh' -> '嗖嗖声')."
)
user_prompt = f'Translate this filename: "{filename}"'
payload = {
"model": model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
"stream": False,
"options": {
"temperature": 0.1
}
}
try:
response = requests.post(OLLAMA_API_URL, json=payload, timeout=60)
response.raise_for_status()
result_json = response.json()
translated_text = result_json['message']['content'].strip()
invalid_chars = '<>:"/\|?*\'"“”。'
for char in invalid_chars:
translated_text = translated_text.replace(char, '')
if not translated_text:
return None
translation_cache[filename] = translated_text
return translated_text
except Exception as e:
# 这里不打印网络报错细节以免刷屏只返回None跳过
return None
def is_already_chinese(text):
"""检查文件名里是否已经包含中文"""
for char in text:
if '\u4e00' <= char <= '\u9fff':
return True
return False
def process_folder():
"""单个文件夹处理流程"""
# 1. 获取路径
while True:
target_dir = input("\n请拖入(或粘贴)你要处理的【文件夹路径】: ").strip()
target_dir = target_dir.strip('"').strip("'") # 去除引号
if os.path.exists(target_dir):
break
else:
print("❌ 错误:路径不存在,请重新输入。")
# 2. 选择模式
print("\n请选择模式:")
print("1. 仅模拟 (只打印不修改)")
print("2. 直接执行 (修改文件名)")
mode_choice = input("请输入数字 (默认1): ").strip()
is_dry_run = True
if mode_choice == '2':
is_dry_run = False
print("\n>>> ⚠️ 警告:即将开始【真实修改】文件名! <<<")
else:
print("\n>>> 🛡️ 模拟模式:不会修改任何文件 <<<")
# 3. 扫描与处理
count = 0
success_count = 0
print(f"\n正在扫描目录: {target_dir} ...\n")
for root, dirs, files in os.walk(target_dir):
for file in files:
if file.lower().endswith(EXTENSIONS):
old_name_with_ext = file
name_part, ext_part = os.path.splitext(old_name_with_ext)
if is_already_chinese(name_part):
continue
count += 1
# 动态显示进度
print(f"[{count}] 正在思考: {name_part} ...", end="\r")
cn_name = get_translation_via_requests(name_part, MODEL_NAME)
padding = " " * 30 # 用于覆盖之前的打印内容
if cn_name:
# 格式:【中文】英文.wav
new_name_with_ext = f"{cn_name}{old_name_with_ext}"
old_path = os.path.join(root, old_name_with_ext)
new_path = os.path.join(root, new_name_with_ext)
if is_dry_run:
print(f"[模拟] {name_part} -> 【{cn_name}{padding}")
else:
try:
os.rename(old_path, new_path)
print(f"[成功] {new_name_with_ext}{padding}")
success_count += 1
except Exception as e:
print(f"[失败] {e}{padding}")
else:
# 如果翻译失败,不需要刷屏,静默跳过或简单提示
pass
print("\n" + "-"*30)
print(f"✅ 本次任务完成!扫描: {count} 个,成功重命名: {success_count} 个。")
def main():
print("=============================================")
print(" 音效文件名智能翻译工具 (Gemma + Ollama)")
print("=============================================")
print(f"当前模型: {MODEL_NAME}")
while True:
process_folder()
# 循环询问
print("\n" + "="*30)
choice = input("🔄 是否继续处理其他目录?(y/n): ").strip().lower()
if choice == 'y':
print("\n" * 2) # 空两行,视觉上分隔
continue
else:
print("\n👋 再见!")
break
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\n用户强制停止,程序退出。")

156
wav_to_mp3.py Normal file
View File

@@ -0,0 +1,156 @@
import os
import subprocess
import concurrent.futures
import time
import sys
# ================= 配置区域 =================
# 目标码率: '192k' 或 '320k' (推荐 320k)
BITRATE = '320k'
# 并发线程数 (根据你的CPU核心数自动设定也可以手动填数字比如 4)
MAX_WORKERS = os.cpu_count()
# ===========================================
def check_ffmpeg():
"""检查 ffmpeg 是否可用"""
try:
subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
return True
except (subprocess.CalledProcessError, FileNotFoundError):
return False
def convert_single_file(file_info):
"""单个文件转换逻辑"""
wav_path, mp3_path = file_info
# 如果 MP3 已经存在且大小不为0跳过
if os.path.exists(mp3_path) and os.path.getsize(mp3_path) > 0:
return "skipped"
# 调用 ffmpeg 转换
# -i 输入
# -b:a 码率
# -map_metadata 0 保留元数据
# -y 覆盖输出
# -v error 静默模式,只报错
cmd = [
"ffmpeg", "-i", wav_path,
"-codec:a", "libmp3lame",
"-b:a", BITRATE,
"-map_metadata", "0",
"-y", "-v", "error",
mp3_path
]
try:
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode == 0:
return "success"
else:
return f"error: {result.stderr.decode('utf-8', errors='ignore')}"
except Exception as e:
return f"exception: {str(e)}"
def main():
print("=============================================")
print(f" WAV 转 MP3 批量压缩工具 (多线程版)")
print(f" 目标码率: {BITRATE}")
print("=============================================")
if not check_ffmpeg():
print("❌ 错误:找不到 ffmpeg。")
print("请下载 ffmpeg.exe 并将其放到本脚本同一目录下,或者添加到系统环境变量。")
return
target_dir = input("\n请拖入要压缩的文件夹路径: ").strip().strip('"').strip("'")
if not os.path.exists(target_dir):
print("路径不存在。")
return
# 1. 扫描所有 wav 文件
print(f"\n正在扫描 WAV 文件...")
tasks = []
wav_files_list = [] # 用于后续统计和删除
for root, dirs, files in os.walk(target_dir):
for file in files:
if file.lower().endswith(".wav"):
wav_path = os.path.join(root, file)
# 构造 MP3 路径 (同目录下,后缀变 .mp3)
mp3_path = os.path.splitext(wav_path)[0] + ".mp3"
tasks.append((wav_path, mp3_path))
wav_files_list.append(wav_path)
total_files = len(tasks)
if total_files == 0:
print("没有找到 WAV 文件。")
return
print(f"找到 {total_files} 个 WAV 文件,准备开始转换...")
print(f"火力全开,使用 {MAX_WORKERS} 个线程并发处理...")
start_time = time.time()
# 2. 多线程执行转换
success_count = 0
skipped_count = 0
error_count = 0
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
# 提交任务
future_to_file = {executor.submit(convert_single_file, task): task for task in tasks}
# 处理结果进度条
finished = 0
for future in concurrent.futures.as_completed(future_to_file):
result = future.result()
finished += 1
if result == "success":
success_count += 1
elif result == "skipped":
skipped_count += 1
else:
error_count += 1
# 如果出错,打印出错的文件名
wav_p = future_to_file[future][0]
print(f"\n[失败] {os.path.basename(wav_p)} -> {result}")
# 简单的进度显示
print(f"进度: {finished}/{total_files} | 成功: {success_count} | 跳过: {skipped_count} | 失败: {error_count}", end="\r")
end_time = time.time()
duration = end_time - start_time
print(f"\n\n转换完成!耗时: {duration:.2f}")
# 3. 清理旧文件询问
if success_count > 0 or skipped_count > 0:
print("="*40)
print("【清理阶段】")
print(f"转换已完成。你现在的文件夹里同时存在 .wav 和 .mp3 文件。")
choice = input(f"⚠️ 是否删除原有的 {len(wav_files_list)} 个 WAV 文件以释放空间?(输入 'DELETE' 确认): ").strip()
if choice == 'DELETE':
print("正在删除 WAV 源文件...")
deleted_num = 0
for wav_p in wav_files_list:
try:
# 再次确认对应的 MP3 真的存在,防止误删
mp3_p = os.path.splitext(wav_p)[0] + ".mp3"
if os.path.exists(mp3_p) and os.path.getsize(mp3_p) > 0:
os.remove(wav_p)
deleted_num += 1
else:
print(f"[跳过删除] 对应的 MP3 不存在或大小异常: {wav_p}")
except Exception as e:
print(f"[删除失败] {wav_p}: {e}")
print(f"清理完毕!共删除了 {deleted_num} 个 WAV 文件。")
else:
print("已保留 WAV 原文件。")
if __name__ == "__main__":
main()