上传文件至「/」
This commit is contained in:
254
srt_optimizer_v2.py
Normal file
254
srt_optimizer_v2.py
Normal file
@@ -0,0 +1,254 @@
|
||||
# filename: srt_optimizer_v3.4.py
|
||||
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog, ttk, messagebox
|
||||
import sv_ttk
|
||||
import re
|
||||
from datetime import timedelta
|
||||
import os
|
||||
import jieba
|
||||
|
||||
# --- V3.4 配置区 - 黄金语速优先 ---
|
||||
# 基于您的标准 "这是一个静止画面" (7字/1.86秒) 计算出的黄金语速
|
||||
TARGET_SPEED_STRICT = 3.8
|
||||
|
||||
# 定义一个自然的、合理的停顿时长(秒)
|
||||
NATURAL_PAUSE_DURATION = 0.5
|
||||
|
||||
# 其他触发条件
|
||||
FAST_SPEED_THRESHOLD = 8.0
|
||||
SPLIT_CHARS_THRESHOLD = 25
|
||||
|
||||
# 安全阈值
|
||||
MIN_DURATION_THRESHOLD = 0.4
|
||||
MIN_CHARS_THRESHOLD = 2
|
||||
|
||||
# ... (SrtEntry 类 和 parse_srt 函数保持不变)
|
||||
class SrtEntry:
|
||||
def __init__(self, index, start_td, end_td, text):
|
||||
self.index = index; self.start_td = start_td; self.end_td = end_td
|
||||
self.text = text.strip(); self.is_new = False
|
||||
@property
|
||||
def duration(self): return (self.end_td - self.start_td).total_seconds()
|
||||
@property
|
||||
def char_count(self): return len(re.sub(r'[\s,.?!。,、?!]', '', self.text))
|
||||
@property
|
||||
def speed(self):
|
||||
count = self.char_count
|
||||
return count / self.duration if self.duration > 0 and count > 0 else 0
|
||||
@property
|
||||
def start_str(self): return self._td_to_str(self.start_td)
|
||||
@property
|
||||
def end_str(self): return self._td_to_str(self.end_td)
|
||||
@staticmethod
|
||||
def _td_to_str(td):
|
||||
total_seconds = td.total_seconds()
|
||||
ms = int((total_seconds - int(total_seconds)) * 1000)
|
||||
total_seconds = int(total_seconds)
|
||||
h, m, s = total_seconds // 3600, (total_seconds % 3600) // 60, total_seconds % 60
|
||||
return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
|
||||
def to_srt_block(self): return f"{self.index}\n{self.start_str} --> {self.end_str}\n{self.text}\n\n"
|
||||
|
||||
def parse_srt(content):
|
||||
entries = []
|
||||
pattern = re.compile(r'(\d+)\n(\d{2}:\d{2}:\d{2},\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2},\d{3})\n([\s\S]*?)(?=\n\n|\Z)', re.MULTILINE)
|
||||
def to_td(time_str):
|
||||
h, m, s, ms = map(int, re.split('[:,]', time_str))
|
||||
return timedelta(hours=h, minutes=m, seconds=s, milliseconds=ms)
|
||||
for match in pattern.finditer(content):
|
||||
entries.append(SrtEntry(int(match.group(1)), to_td(match.group(2)), to_td(match.group(3)), match.group(4)))
|
||||
return entries
|
||||
|
||||
class App:
|
||||
def __init__(self, root):
|
||||
self.root = root
|
||||
self.root.title("字幕听感优化器 V3.4 - 黄金语速版 (最终决定版)")
|
||||
self.root.geometry("1200x800")
|
||||
self.srt_path = ""; self.original_entries = []; self.optimized_entries = []
|
||||
self.build_ui(); sv_ttk.set_theme("dark")
|
||||
|
||||
# ... (GUI方法不变)
|
||||
def build_ui(self):
|
||||
main_pane = ttk.PanedWindow(self.root, orient=tk.HORIZONTAL)
|
||||
main_pane.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
|
||||
control_frame = ttk.Frame(main_pane, width=250)
|
||||
main_pane.add(control_frame, weight=0)
|
||||
load_btn = ttk.Button(control_frame, text="1. 加载SRT文件", command=self.load_srt, style="Accent.TButton")
|
||||
load_btn.pack(fill=tk.X, pady=5)
|
||||
self.optimize_btn = ttk.Button(control_frame, text="2. 优化节奏", command=self.run_optimization, state="disabled")
|
||||
self.optimize_btn.pack(fill=tk.X, pady=5)
|
||||
self.save_btn = ttk.Button(control_frame, text="3. 另存为...", command=self.save_srt, state="disabled")
|
||||
self.save_btn.pack(fill=tk.X, pady=5)
|
||||
separator = ttk.Separator(control_frame, orient=tk.HORIZONTAL)
|
||||
separator.pack(fill=tk.X, pady=15)
|
||||
self.info_label = ttk.Label(control_frame, text="请先加载SRT文件", anchor="w", wraplength=230, justify="left")
|
||||
self.info_label.pack(fill=tk.X, pady=5)
|
||||
table_container = ttk.Frame(main_pane)
|
||||
main_pane.add(table_container, weight=1)
|
||||
left_frame = ttk.Labelframe(table_container, text="原始字幕")
|
||||
left_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(0, 5))
|
||||
right_frame = ttk.Labelframe(table_container, text="优化后 (绿色为新生成)")
|
||||
right_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(5, 0))
|
||||
self.tree_orig = self.create_treeview(left_frame)
|
||||
self.tree_optim = self.create_treeview(right_frame)
|
||||
self.tree_optim.tag_configure("new", background="#3a6b3a")
|
||||
def create_treeview(self, parent):
|
||||
cols = ("#0", "开始时间", "结束时间", "时长", "字数", "语速", "文本")
|
||||
tree = ttk.Treeview(parent, columns=cols[1:], show="headings")
|
||||
for col in cols: tree.heading(col, text=col)
|
||||
tree.column("#0", width=40, anchor="center"); tree.column("开始时间", width=90); tree.column("结束时间", width=90)
|
||||
tree.column("时长", width=50, anchor="e"); tree.column("字数", width=40, anchor="e")
|
||||
tree.column("语速", width=50, anchor="e"); tree.column("文本", width=300)
|
||||
vsb = ttk.Scrollbar(parent, orient="vertical", command=tree.yview)
|
||||
tree.configure(yscrollcommand=vsb.set); tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True); vsb.pack(side=tk.RIGHT, fill=tk.Y)
|
||||
return tree
|
||||
def load_srt(self):
|
||||
path = filedialog.askopenfilename(filetypes=[("SRT Subtitles", "*.srt")])
|
||||
if not path: return
|
||||
self.srt_path = path
|
||||
try:
|
||||
with open(path, 'r', encoding='utf-8-sig') as f: content = f.read()
|
||||
self.original_entries = parse_srt(content)
|
||||
self.populate_tree(self.tree_orig, self.original_entries)
|
||||
self.tree_optim.delete(*self.tree_optim.get_children())
|
||||
self.info_label.config(text=f"已加载: {os.path.basename(path)}\n共 {len(self.original_entries)} 条字幕。")
|
||||
self.optimize_btn.config(state="normal"); self.save_btn.config(state="disabled")
|
||||
except Exception as e: messagebox.showerror("加载失败", f"无法加载或解析文件: {e}")
|
||||
def populate_tree(self, tree, entries, highlight_new=False):
|
||||
tree.delete(*tree.get_children())
|
||||
for entry in entries:
|
||||
values = (entry.start_str, entry.end_str, f"{entry.duration:.2f}s", entry.char_count, f"{entry.speed:.2f}", entry.text)
|
||||
tags = ("new",) if highlight_new and entry.is_new else ()
|
||||
tree.insert("", "end", text=str(entry.index), values=values, tags=tags)
|
||||
def save_srt(self):
|
||||
if not self.optimized_entries: return
|
||||
original_basename = os.path.splitext(os.path.basename(self.srt_path))[0]
|
||||
save_path = filedialog.asksaveasfilename(defaultextension=".srt", initialfile=f"{original_basename}_optimized.srt", filetypes=[("SRT Subtitles", "*.srt")])
|
||||
if not save_path: return
|
||||
try:
|
||||
with open(save_path, 'w', encoding='utf-8') as f:
|
||||
for entry in self.optimized_entries: f.write(entry.to_srt_block())
|
||||
messagebox.showinfo("保存成功", f"优化后的SRT文件已保存至:\n{save_path}")
|
||||
except Exception as e: messagebox.showerror("保存失败", f"无法保存文件: {e}")
|
||||
|
||||
def run_optimization(self):
|
||||
if not self.original_entries: return
|
||||
paced_entries = self._optimize_pacing(self.original_entries)
|
||||
self.optimized_entries = self._perform_suturing(paced_entries)
|
||||
for i, entry in enumerate(self.optimized_entries): entry.index = i + 1
|
||||
self.populate_tree(self.tree_optim, self.optimized_entries, highlight_new=True)
|
||||
self.save_btn.config(state="normal")
|
||||
self.info_label.config(text=f"优化完成!\n原 {len(self.original_entries)} 句 -> 新 {len(self.optimized_entries)} 句")
|
||||
messagebox.showinfo("优化完成", "字幕节奏已优化!请在右侧检查结果。")
|
||||
|
||||
def _optimize_pacing(self, entries):
|
||||
# --- 核心算法 V3.4 修正处 ---
|
||||
temp_entries = []
|
||||
for entry in entries:
|
||||
entry.text = entry.text.replace('\n', ' ').strip()
|
||||
|
||||
is_slow_and_short = entry.speed < TARGET_SPEED_STRICT and entry.char_count < 20 and entry.char_count > 0
|
||||
is_fast_or_long = entry.speed > FAST_SPEED_THRESHOLD or entry.char_count > SPLIT_CHARS_THRESHOLD
|
||||
|
||||
if is_slow_and_short:
|
||||
ideal_duration_sec = entry.char_count / TARGET_SPEED_STRICT
|
||||
ideal_duration_sec = max(ideal_duration_sec, MIN_DURATION_THRESHOLD)
|
||||
new_end_td = entry.start_td + timedelta(seconds=ideal_duration_sec)
|
||||
new_entry = SrtEntry(0, entry.start_td, new_end_td, entry.text)
|
||||
new_entry.is_new = True
|
||||
temp_entries.append(new_entry)
|
||||
continue
|
||||
|
||||
elif is_fast_or_long:
|
||||
parts = re.split(r'([。,、?!,?!])', entry.text)
|
||||
sub_sentences = []
|
||||
current_part = ""
|
||||
for i, part in enumerate(parts):
|
||||
part = part.strip()
|
||||
if not part: continue
|
||||
if i % 2 == 0: current_part += part
|
||||
else: current_part += part; sub_sentences.append(current_part.strip()); current_part = ""
|
||||
if current_part.strip(): sub_sentences.append(current_part.strip())
|
||||
|
||||
if len(sub_sentences) == 1 and entry.char_count > 12:
|
||||
text = sub_sentences[0]; words = list(jieba.cut(text))
|
||||
if len(words) > 1:
|
||||
mid_len = len(text) / 2; best_split_index = -1; min_diff = float('inf'); current_len = 0
|
||||
for i, word in enumerate(words[:-1]):
|
||||
current_len += len(word); diff = abs(current_len - mid_len)
|
||||
if diff < min_diff: min_diff = diff; best_split_index = i + 1
|
||||
if best_split_index != -1:
|
||||
part1 = "".join(words[:best_split_index]); part2 = "".join(words[best_split_index:])
|
||||
sub_sentences = [part1.strip(), part2.strip()]
|
||||
|
||||
if len(sub_sentences) > 1:
|
||||
current_start_td = entry.start_td
|
||||
|
||||
# <--- BUG 修复处 ---
|
||||
# 之前: sum(entry.char_count for entry in sub_sentences)
|
||||
# 修正: 直接计算每个字符串的字符数
|
||||
total_chars_in_block = sum(len(re.sub(r'[\s,.?!。,、?!]', '', s)) for s in sub_sentences)
|
||||
# --- 修复结束 ---
|
||||
|
||||
if total_chars_in_block == 0:
|
||||
# 如果切分后没有有效字符,直接保留原始条目
|
||||
entry.is_new = False
|
||||
temp_entries.append(entry)
|
||||
continue
|
||||
|
||||
num_gaps = len(sub_sentences) - 1
|
||||
total_natural_pause_sec = num_gaps * NATURAL_PAUSE_DURATION
|
||||
available_speech_time_sec = entry.duration - total_natural_pause_sec
|
||||
|
||||
if available_speech_time_sec > total_chars_in_block / (FAST_SPEED_THRESHOLD * 1.2):
|
||||
pause_td = timedelta(seconds=NATURAL_PAUSE_DURATION)
|
||||
for i, sub_sentence_text in enumerate(sub_sentences):
|
||||
sub_char_count = len(re.sub(r'[\s,.?!。,、?!]', '', sub_sentence_text))
|
||||
|
||||
# 增加一个保护,防止 total_chars_in_block 为0导致除零错误
|
||||
if total_chars_in_block > 0:
|
||||
speech_duration_sec = (sub_char_count / total_chars_in_block) * available_speech_time_sec
|
||||
else:
|
||||
speech_duration_sec = 0
|
||||
|
||||
speech_duration_td = timedelta(seconds=speech_duration_sec)
|
||||
|
||||
new_entry = SrtEntry(0, current_start_td, current_start_td + speech_duration_td, sub_sentence_text)
|
||||
new_entry.is_new = True
|
||||
temp_entries.append(new_entry)
|
||||
|
||||
current_start_td += speech_duration_td
|
||||
if i < num_gaps: current_start_td += pause_td
|
||||
continue
|
||||
|
||||
entry.is_new = False
|
||||
temp_entries.append(entry)
|
||||
return temp_entries
|
||||
|
||||
def _perform_suturing(self, entries):
|
||||
# ... (此函数完全不变)
|
||||
if not entries: return []
|
||||
final_entries = []; merge_buffer = []
|
||||
def flush_buffer():
|
||||
nonlocal merge_buffer
|
||||
if not merge_buffer: return
|
||||
if len(merge_buffer) == 1: final_entries.append(merge_buffer[0])
|
||||
else:
|
||||
start_time = merge_buffer[0].start_td; end_time = merge_buffer[-1].end_td
|
||||
combined_text = " ".join(e.text for e in merge_buffer)
|
||||
merged_entry = SrtEntry(0, start_time, end_time, combined_text); merged_entry.is_new = True
|
||||
final_entries.append(merged_entry)
|
||||
merge_buffer = []
|
||||
for entry in entries:
|
||||
if entry.duration < MIN_DURATION_THRESHOLD or entry.char_count < MIN_CHARS_THRESHOLD:
|
||||
merge_buffer.append(entry)
|
||||
else:
|
||||
flush_buffer(); final_entries.append(entry)
|
||||
flush_buffer()
|
||||
return final_entries
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
root = tk.Tk()
|
||||
app = App(root)
|
||||
root.mainloop()
|
||||
Reference in New Issue
Block a user