语音转文本.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. import os
  2. from faster_whisper import WhisperModel
  3. def format_timestamp(seconds: float):
  4. """
  5. 将秒数转换为 SRT 时间戳格式 (HH:MM:SS,mmm)
  6. """
  7. whole_seconds = int(seconds)
  8. # milliseconds = int((seconds - whole_seconds) * 1000)
  9. hours = whole_seconds // 3600
  10. minutes = (whole_seconds % 3600) // 60
  11. secs = whole_seconds % 60
  12. return f"{hours:02d}:{minutes:02d}:{secs:02d}"
  13. def transcribe_mp3_to_srt(mp3_path, model_size="large-v3", device="cuda", compute_type="int8_float16",
  14. language=None):
  15. print(f"正在加载模型: {model_size} ({compute_type})...")
  16. # 1. 初始化模型
  17. # 这里是核心:使用 cuda 和 int8_float16 达到速度与精度的平衡
  18. model = WhisperModel(model_size, device=device, compute_type=compute_type)
  19. print(f"正在转录音频: {mp3_path} ...")
  20. # 2. 开始转录
  21. # beam_size=5 是官方推荐的精度设置
  22. # vad_filter=True 会自动过滤静音片段,极大提升长音频的处理速度
  23. segments, info = model.transcribe(
  24. mp3_path,
  25. beam_size=5,
  26. vad_filter=True,
  27. language=language
  28. )
  29. print(f"检测到语言: {info.language}, 置信度: {info.language_probability:.2f}")
  30. # 3. 输出文件名
  31. srt_filename = os.path.splitext(mp3_path)[0] + ".txt"
  32. # 4. 写入 SRT 文件
  33. # 注意:segments 是一个生成器,只有在遍历时才会真正开始计算(流式处理)
  34. with open(srt_filename, "w", encoding="utf-8") as f:
  35. for i, segment in enumerate(segments, start=1):
  36. start_time = format_timestamp(segment.start)
  37. end_time = format_timestamp(segment.end)
  38. text = segment.text.strip()
  39. # 写入 SRT 格式
  40. # f.write(f"{i}\n")
  41. # f.write(f"{start_time} --> {end_time}\n")
  42. # f.write(f"{text}\n\n")
  43. # txt 格式
  44. f.write(f"{start_time}\n")
  45. f.write(f"{text}\n\n")
  46. # 可选:实时打印进度
  47. print(f"[{start_time} -> {end_time}] {text}")
  48. print(f"\n✅ 提取完成!字幕已保存为: {srt_filename}")
  49. if __name__ == "__main__":
  50. # 替换为你的 mp3 文件路径
  51. audio_file = "/home/martin/ML/RemoteProject/untitled10/Audio/temp/audio/backyardhits2.mp3"
  52. if os.path.exists(audio_file):
  53. transcribe_mp3_to_srt(audio_file, compute_type="default")
  54. else:
  55. print(f"找不到文件: {audio_file}")
  56. print('==')