Allows setting max lines and max line length, enables DeepL, adds user setup bridge

This commit is contained in:
Guillaume Tâche
2024-08-20 21:31:10 +02:00
parent 273a6e996f
commit 44c317f207
49 changed files with 752 additions and 298 deletions

View File

@@ -55,10 +55,8 @@ public class WhisperXSubtitleExtractor extends AbstractWhisperSubtitleExtractor
args.add("True");
args.add("--compute_type");
args.add("int8");
args.add("--max_line_count");
args.add("2");
args.add("--max_line_width");
args.add("30");
args.add("--threads");
args.add(String.valueOf(Runtime.getRuntime().availableProcessors()));
if (language != Language.AUTO) {
args.add("--language");
args.add(language.iso2());

View File

@@ -1,6 +1,8 @@
package com.github.gtache.autosubtitle.subtitle.parser.json.whisperx;
import com.github.gtache.autosubtitle.Language;
import com.github.gtache.autosubtitle.modules.impl.MaxLineLength;
import com.github.gtache.autosubtitle.modules.impl.MaxLines;
import com.github.gtache.autosubtitle.subtitle.Subtitle;
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
import com.github.gtache.autosubtitle.subtitle.converter.ParseException;
@@ -12,10 +14,14 @@ import com.google.gson.Gson;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.prefs.Preferences;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* {@link SubtitleConverter} implementation for JSON files
@@ -23,11 +29,21 @@ import java.util.stream.Collectors;
@Singleton
public class JSONSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
private static final String MAX_LINE_LENGTH = "maxLineLength";
private static final String MAX_LINES = "maxLines";
private static final Pattern SPLIT_PATTERN = Pattern.compile("[ \n]+");
private final Gson gson;
private final Preferences preferences;
private final int defaultMaxLineLength;
private final int defaultMaxLines;
@Inject
JSONSubtitleConverter(final Gson gson) {
JSONSubtitleConverter(final Gson gson, final Preferences preferences, @MaxLineLength final int defaultMaxLineLength, @MaxLines final int defaultMaxLines) {
this.gson = Objects.requireNonNull(gson);
this.preferences = Objects.requireNonNull(preferences);
this.defaultMaxLineLength = defaultMaxLineLength;
this.defaultMaxLines = defaultMaxLines;
}
@Override
@@ -42,10 +58,15 @@ public class JSONSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
public SubtitleCollectionImpl<SubtitleImpl> parse(final String content) throws ParseException {
try {
final var json = gson.fromJson(content, JSONSubtitles.class);
final var subtitles = json.segments().stream().map(s -> {
final var start = (long) s.start() * 1000L;
final var end = (long) s.end() * 1000L;
return new SubtitleImpl(s.text(), start, end, null, null);
final var subtitles = json.segments().stream().flatMap(s -> {
final var start = (long) (s.start() * 1000L);
final var end = (long) (s.end() * 1000L);
if (s.words().isEmpty()) {
return Stream.of(new SubtitleImpl(s.text(), start, end, null, null));
} else {
return splitSubtitle(s);
}
}).sorted(Comparator.comparing(Subtitle::start).thenComparing(Subtitle::end)).toList();
final var language = Language.getLanguage(json.language());
final var subtitlesText = subtitles.stream().map(Subtitle::content).collect(Collectors.joining(""));
@@ -55,6 +76,79 @@ public class JSONSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
}
}
private Stream<SubtitleImpl> splitSubtitle(final JSONSubtitleSegment segment) {
final var maxLineLength = preferences.getInt(MAX_LINE_LENGTH, defaultMaxLineLength);
final var maxLines = preferences.getInt(MAX_LINES, defaultMaxLines);
final var text = segment.text();
if (text.length() <= maxLineLength) {
final var start = (long) (segment.start() * 1000L);
final var end = (long) (segment.end() * 1000L);
return Stream.of(new SubtitleImpl(text.replace("\n", " "), start, end, null, null));
} else if (text.length() <= maxLines * maxLineLength) {
return splitSubtitleLines(segment);
} else {
return splitSubtitleWords(segment);
}
}
private Stream<SubtitleImpl> splitSubtitleLines(final JSONSubtitleSegment segment) {
final var maxLineLength = preferences.getInt(MAX_LINE_LENGTH, defaultMaxLineLength);
final var text = segment.text();
final var split = SPLIT_PATTERN.split(text);
final var builder = new StringBuilder(text.length());
for (final var s : split) {
final var newLength = builder.length() + s.length();
if (areDifferentLines(builder.length(), newLength, maxLineLength)) {
builder.append("\n").append(s);
} else {
builder.append(" ").append(s);
}
}
final var start = (long) (segment.start() * 1000L);
final var end = (long) (segment.end() * 1000L);
return Stream.of(new SubtitleImpl(builder.toString(), start, end, null, null));
}
private static boolean areDifferentLines(final int currentLength, final int newLength, final int maxLength) {
return currentLength / (maxLength + 1) < newLength / (maxLength + 1);
}
private Stream<SubtitleImpl> splitSubtitleWords(final JSONSubtitleSegment segment) {
final var maxLineLength = preferences.getInt(MAX_LINE_LENGTH, defaultMaxLineLength);
final var maxLines = preferences.getInt(MAX_LINES, defaultMaxLines);
final var ret = new ArrayList<SubtitleImpl>(segment.text().length() / (maxLines * maxLineLength));
final var builder = new StringBuilder(maxLines * maxLineLength);
final var words = segment.words();
var currentStart = -1L;
var currentEnd = -1L;
for (final var word : words) {
final var text = word.word();
final var start = (long) (word.start() * 1000L);
final var end = (long) (word.end() * 1000L);
if (currentStart < 0) {
currentStart = start;
}
final var newLength = builder.length() + text.length();
if (newLength > maxLineLength * maxLines) {
final var newSubtitle = new SubtitleImpl(builder.toString(), currentStart, currentEnd, null, null);
ret.add(newSubtitle);
builder.delete(0, builder.length());
builder.append(text);
currentStart = start == 0 ? currentEnd : start;
} else if (areDifferentLines(builder.length(), newLength, maxLineLength)) {
builder.append("\n").append(text);
} else {
builder.append(" ").append(text);
}
currentEnd = end == 0 ? currentEnd : end;
}
if (!builder.isEmpty()) {
final var newSubtitle = new SubtitleImpl(builder.toString(), currentStart, currentEnd, null, null);
ret.add(newSubtitle);
}
return ret.stream();
}
@Override
public boolean canParse(final Path file) {
return file.getFileName().toString().endsWith(".json");

View File

@@ -4,6 +4,7 @@
module com.github.gtache.autosubtitle.whisperx {
requires transitive com.github.gtache.autosubtitle.whisper.common;
requires org.apache.logging.log4j;
requires java.prefs;
exports com.github.gtache.autosubtitle.whisperx;
exports com.github.gtache.autosubtitle.setup.whisperx;