fix(video2srt): format to two lines
This commit is contained in:
@@ -9,6 +9,8 @@ TRANSLATE="${TRANSLATE:-1}"
|
|||||||
SRC_LANG="${SRC_LANG:-auto}"
|
SRC_LANG="${SRC_LANG:-auto}"
|
||||||
FORCE="${FORCE:-0}"
|
FORCE="${FORCE:-0}"
|
||||||
USE_VAD="${USE_VAD:-1}"
|
USE_VAD="${USE_VAD:-1}"
|
||||||
|
MAX_LEN="${MAX_LEN:-84}"
|
||||||
|
LINE_LEN="${LINE_LEN:-42}"
|
||||||
OUTPUT=""
|
OUTPUT=""
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
@@ -24,6 +26,8 @@ Options:
|
|||||||
-o, --output PATH Output .srt path (single input only; default: <video>.srt)
|
-o, --output PATH Output .srt path (single input only; default: <video>.srt)
|
||||||
-f, --force Overwrite existing .srt
|
-f, --force Overwrite existing .srt
|
||||||
--no-vad Disable Silero VAD pre-filtering (VAD reduces hallucination loops)
|
--no-vad Disable Silero VAD pre-filtering (VAD reduces hallucination loops)
|
||||||
|
--max-len N Max characters per SRT entry, 0 to disable (default: $MAX_LEN)
|
||||||
|
--line-len N Max characters per visible line; longer entries wrap to 2 lines, 0 to disable (default: $LINE_LEN)
|
||||||
-h, --help Show this help
|
-h, --help Show this help
|
||||||
|
|
||||||
Env overrides: WHISPER_DIR, WHISPER_MODEL, WHISPER_BIN
|
Env overrides: WHISPER_DIR, WHISPER_MODEL, WHISPER_BIN
|
||||||
@@ -39,6 +43,8 @@ while [[ $# -gt 0 ]]; do
|
|||||||
-o|--output) OUTPUT="$2"; shift 2 ;;
|
-o|--output) OUTPUT="$2"; shift 2 ;;
|
||||||
-f|--force) FORCE=1; shift ;;
|
-f|--force) FORCE=1; shift ;;
|
||||||
--no-vad) USE_VAD=0; shift ;;
|
--no-vad) USE_VAD=0; shift ;;
|
||||||
|
--max-len) MAX_LEN="$2"; shift 2 ;;
|
||||||
|
--line-len) LINE_LEN="$2"; shift 2 ;;
|
||||||
-h|--help) usage; exit 0 ;;
|
-h|--help) usage; exit 0 ;;
|
||||||
--) shift; args+=("$@"); break ;;
|
--) shift; args+=("$@"); break ;;
|
||||||
-*) echo "Unknown option: $1" >&2; usage >&2; exit 2 ;;
|
-*) echo "Unknown option: $1" >&2; usage >&2; exit 2 ;;
|
||||||
@@ -96,6 +102,9 @@ for video in "${args[@]}"; do
|
|||||||
-mc 0
|
-mc 0
|
||||||
)
|
)
|
||||||
[[ "$TRANSLATE" == "1" ]] && whisper_args+=(--translate)
|
[[ "$TRANSLATE" == "1" ]] && whisper_args+=(--translate)
|
||||||
|
if [[ "$MAX_LEN" -gt 0 ]]; then
|
||||||
|
whisper_args+=(--max-len "$MAX_LEN" --split-on-word)
|
||||||
|
fi
|
||||||
if [[ "$USE_VAD" == "1" ]]; then
|
if [[ "$USE_VAD" == "1" ]]; then
|
||||||
if [[ -f "$VAD_MODEL" ]]; then
|
if [[ -f "$VAD_MODEL" ]]; then
|
||||||
whisper_args+=(--vad --vad-model "$VAD_MODEL")
|
whisper_args+=(--vad --vad-model "$VAD_MODEL")
|
||||||
@@ -107,6 +116,50 @@ for video in "${args[@]}"; do
|
|||||||
|
|
||||||
"$WHISPER_BIN" "${whisper_args[@]}"
|
"$WHISPER_BIN" "${whisper_args[@]}"
|
||||||
|
|
||||||
|
if [[ "$LINE_LEN" -gt 0 ]]; then
|
||||||
|
python3 - "$srt" "$LINE_LEN" <<'PYEOF'
|
||||||
|
import re, sys, pathlib
|
||||||
|
|
||||||
|
path = pathlib.Path(sys.argv[1])
|
||||||
|
max_line = int(sys.argv[2])
|
||||||
|
|
||||||
|
def wrap(text, limit):
|
||||||
|
text = " ".join(text.split())
|
||||||
|
if len(text) <= limit:
|
||||||
|
return text
|
||||||
|
words = text.split(" ")
|
||||||
|
if len(words) < 2:
|
||||||
|
return text
|
||||||
|
best = None
|
||||||
|
best_score = None
|
||||||
|
cum = 0
|
||||||
|
for i, w in enumerate(words[:-1]):
|
||||||
|
cum += len(w) + (1 if i > 0 else 0)
|
||||||
|
top, bot = cum, len(text) - cum - 1
|
||||||
|
score = abs(bot - top)
|
||||||
|
if top > limit:
|
||||||
|
score += (top - limit) * 100
|
||||||
|
if bot > limit:
|
||||||
|
score += (bot - limit) * 100
|
||||||
|
if w.rstrip(",.!?:;") != w:
|
||||||
|
score -= 8
|
||||||
|
if best_score is None or score < best_score:
|
||||||
|
best_score, best = score, i
|
||||||
|
return " ".join(words[:best+1]) + "\n" + " ".join(words[best+1:])
|
||||||
|
|
||||||
|
blocks = re.split(r"\n\n+", path.read_text(encoding="utf-8").strip())
|
||||||
|
out = []
|
||||||
|
for b in blocks:
|
||||||
|
lines = b.split("\n")
|
||||||
|
if len(lines) < 3:
|
||||||
|
out.append(b)
|
||||||
|
continue
|
||||||
|
head, body = lines[:2], " ".join(lines[2:])
|
||||||
|
out.append("\n".join(head + [wrap(body, max_line)]))
|
||||||
|
path.write_text("\n\n".join(out) + "\n", encoding="utf-8")
|
||||||
|
PYEOF
|
||||||
|
fi
|
||||||
|
|
||||||
rm -f "$tmpwav"
|
rm -f "$tmpwav"
|
||||||
trap - EXIT
|
trap - EXIT
|
||||||
echo "<< $srt"
|
echo "<< $srt"
|
||||||
|
|||||||
Reference in New Issue
Block a user