fix(video2srt): remove VAD
This commit is contained in:
@@ -19,9 +19,6 @@ DEFAULT_MODEL = Path(
|
|||||||
DEFAULT_BIN = Path(
|
DEFAULT_BIN = Path(
|
||||||
os.environ.get("WHISPER_BIN", WHISPER_DIR / "build/bin/whisper-cli")
|
os.environ.get("WHISPER_BIN", WHISPER_DIR / "build/bin/whisper-cli")
|
||||||
)
|
)
|
||||||
DEFAULT_VAD = Path(
|
|
||||||
os.environ.get("VAD_MODEL", WHISPER_DIR / "models/ggml-silero-v6.2.0.bin")
|
|
||||||
)
|
|
||||||
|
|
||||||
MIN_WORDS_TO_WRAP = 2
|
MIN_WORDS_TO_WRAP = 2
|
||||||
SRT_HEADER_LINES = 2
|
SRT_HEADER_LINES = 2
|
||||||
@@ -125,20 +122,6 @@ def process(video: Path, args: argparse.Namespace) -> None:
|
|||||||
cmd.append("--translate")
|
cmd.append("--translate")
|
||||||
if args.max_len > 0:
|
if args.max_len > 0:
|
||||||
cmd += ["--max-len", str(args.max_len), "--split-on-word"]
|
cmd += ["--max-len", str(args.max_len), "--split-on-word"]
|
||||||
if args.vad:
|
|
||||||
if args.vad_model.exists():
|
|
||||||
cmd += ["--vad", "--vad-model", str(args.vad_model)]
|
|
||||||
if args.vad_max_speech > 0:
|
|
||||||
cmd += ["-vmsd", str(args.vad_max_speech)]
|
|
||||||
else:
|
|
||||||
print(
|
|
||||||
f"warn: VAD model not found at {args.vad_model}, running without VAD",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
f" download with: sh {WHISPER_DIR}/models/download-vad-model.sh silero-v6.2.0",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
|
|
||||||
subprocess.run(cmd, check=True)
|
subprocess.run(cmd, check=True)
|
||||||
|
|
||||||
@@ -182,18 +165,6 @@ def main() -> int:
|
|||||||
p.add_argument(
|
p.add_argument(
|
||||||
"-f", "--force", action="store_true", help="Overwrite existing .srt"
|
"-f", "--force", action="store_true", help="Overwrite existing .srt"
|
||||||
)
|
)
|
||||||
p.add_argument(
|
|
||||||
"--no-vad",
|
|
||||||
dest="vad",
|
|
||||||
action="store_false",
|
|
||||||
help="Disable Silero VAD pre-filtering (VAD reduces hallucination loops)",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--vad-max-speech",
|
|
||||||
type=float,
|
|
||||||
default=15.0,
|
|
||||||
help="Max seconds of speech per VAD chunk; shorter values give tighter timestamps (default: 15)",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
p.add_argument(
|
||||||
"--max-len",
|
"--max-len",
|
||||||
type=int,
|
type=int,
|
||||||
@@ -209,9 +180,6 @@ def main() -> int:
|
|||||||
p.add_argument(
|
p.add_argument(
|
||||||
"--bin", type=Path, default=DEFAULT_BIN, help=argparse.SUPPRESS
|
"--bin", type=Path, default=DEFAULT_BIN, help=argparse.SUPPRESS
|
||||||
)
|
)
|
||||||
p.add_argument(
|
|
||||||
"--vad-model", type=Path, default=DEFAULT_VAD, help=argparse.SUPPRESS
|
|
||||||
)
|
|
||||||
|
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user