|
|
|
|
@@ -1,6 +1,8 @@
|
|
|
|
|
package com.github.gtache.autosubtitle.subtitle.parser.json.whisperx;
|
|
|
|
|
|
|
|
|
|
import com.github.gtache.autosubtitle.Language;
|
|
|
|
|
import com.github.gtache.autosubtitle.modules.impl.MaxLineLength;
|
|
|
|
|
import com.github.gtache.autosubtitle.modules.impl.MaxLines;
|
|
|
|
|
import com.github.gtache.autosubtitle.subtitle.Subtitle;
|
|
|
|
|
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
|
|
|
|
|
import com.github.gtache.autosubtitle.subtitle.converter.ParseException;
|
|
|
|
|
@@ -12,10 +14,14 @@ import com.google.gson.Gson;
|
|
|
|
|
import javax.inject.Inject;
|
|
|
|
|
import javax.inject.Singleton;
|
|
|
|
|
import java.nio.file.Path;
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.Comparator;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.Objects;
|
|
|
|
|
import java.util.prefs.Preferences;
|
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
import java.util.stream.Stream;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* {@link SubtitleConverter} implementation for JSON files
|
|
|
|
|
@@ -23,11 +29,21 @@ import java.util.stream.Collectors;
|
|
|
|
|
@Singleton
|
|
|
|
|
public class JSONSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
|
|
|
|
|
|
|
|
|
|
private static final String MAX_LINE_LENGTH = "maxLineLength";
|
|
|
|
|
private static final String MAX_LINES = "maxLines";
|
|
|
|
|
private static final Pattern SPLIT_PATTERN = Pattern.compile("[ \n]+");
|
|
|
|
|
|
|
|
|
|
private final Gson gson;
|
|
|
|
|
private final Preferences preferences;
|
|
|
|
|
private final int defaultMaxLineLength;
|
|
|
|
|
private final int defaultMaxLines;
|
|
|
|
|
|
|
|
|
|
@Inject
|
|
|
|
|
JSONSubtitleConverter(final Gson gson) {
|
|
|
|
|
JSONSubtitleConverter(final Gson gson, final Preferences preferences, @MaxLineLength final int defaultMaxLineLength, @MaxLines final int defaultMaxLines) {
|
|
|
|
|
this.gson = Objects.requireNonNull(gson);
|
|
|
|
|
this.preferences = Objects.requireNonNull(preferences);
|
|
|
|
|
this.defaultMaxLineLength = defaultMaxLineLength;
|
|
|
|
|
this.defaultMaxLines = defaultMaxLines;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
@@ -42,10 +58,15 @@ public class JSONSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
|
|
|
|
|
public SubtitleCollectionImpl<SubtitleImpl> parse(final String content) throws ParseException {
|
|
|
|
|
try {
|
|
|
|
|
final var json = gson.fromJson(content, JSONSubtitles.class);
|
|
|
|
|
final var subtitles = json.segments().stream().map(s -> {
|
|
|
|
|
final var start = (long) s.start() * 1000L;
|
|
|
|
|
final var end = (long) s.end() * 1000L;
|
|
|
|
|
return new SubtitleImpl(s.text(), start, end, null, null);
|
|
|
|
|
final var subtitles = json.segments().stream().flatMap(s -> {
|
|
|
|
|
final var start = (long) (s.start() * 1000L);
|
|
|
|
|
final var end = (long) (s.end() * 1000L);
|
|
|
|
|
if (s.words().isEmpty()) {
|
|
|
|
|
return Stream.of(new SubtitleImpl(s.text(), start, end, null, null));
|
|
|
|
|
} else {
|
|
|
|
|
return splitSubtitle(s);
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}).sorted(Comparator.comparing(Subtitle::start).thenComparing(Subtitle::end)).toList();
|
|
|
|
|
final var language = Language.getLanguage(json.language());
|
|
|
|
|
final var subtitlesText = subtitles.stream().map(Subtitle::content).collect(Collectors.joining(""));
|
|
|
|
|
@@ -55,6 +76,79 @@ public class JSONSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private Stream<SubtitleImpl> splitSubtitle(final JSONSubtitleSegment segment) {
|
|
|
|
|
final var maxLineLength = preferences.getInt(MAX_LINE_LENGTH, defaultMaxLineLength);
|
|
|
|
|
final var maxLines = preferences.getInt(MAX_LINES, defaultMaxLines);
|
|
|
|
|
final var text = segment.text();
|
|
|
|
|
if (text.length() <= maxLineLength) {
|
|
|
|
|
final var start = (long) (segment.start() * 1000L);
|
|
|
|
|
final var end = (long) (segment.end() * 1000L);
|
|
|
|
|
return Stream.of(new SubtitleImpl(text.replace("\n", " "), start, end, null, null));
|
|
|
|
|
} else if (text.length() <= maxLines * maxLineLength) {
|
|
|
|
|
return splitSubtitleLines(segment);
|
|
|
|
|
} else {
|
|
|
|
|
return splitSubtitleWords(segment);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private Stream<SubtitleImpl> splitSubtitleLines(final JSONSubtitleSegment segment) {
|
|
|
|
|
final var maxLineLength = preferences.getInt(MAX_LINE_LENGTH, defaultMaxLineLength);
|
|
|
|
|
final var text = segment.text();
|
|
|
|
|
final var split = SPLIT_PATTERN.split(text);
|
|
|
|
|
final var builder = new StringBuilder(text.length());
|
|
|
|
|
for (final var s : split) {
|
|
|
|
|
final var newLength = builder.length() + s.length();
|
|
|
|
|
if (areDifferentLines(builder.length(), newLength, maxLineLength)) {
|
|
|
|
|
builder.append("\n").append(s);
|
|
|
|
|
} else {
|
|
|
|
|
builder.append(" ").append(s);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
final var start = (long) (segment.start() * 1000L);
|
|
|
|
|
final var end = (long) (segment.end() * 1000L);
|
|
|
|
|
return Stream.of(new SubtitleImpl(builder.toString(), start, end, null, null));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static boolean areDifferentLines(final int currentLength, final int newLength, final int maxLength) {
|
|
|
|
|
return currentLength / (maxLength + 1) < newLength / (maxLength + 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private Stream<SubtitleImpl> splitSubtitleWords(final JSONSubtitleSegment segment) {
|
|
|
|
|
final var maxLineLength = preferences.getInt(MAX_LINE_LENGTH, defaultMaxLineLength);
|
|
|
|
|
final var maxLines = preferences.getInt(MAX_LINES, defaultMaxLines);
|
|
|
|
|
final var ret = new ArrayList<SubtitleImpl>(segment.text().length() / (maxLines * maxLineLength));
|
|
|
|
|
final var builder = new StringBuilder(maxLines * maxLineLength);
|
|
|
|
|
final var words = segment.words();
|
|
|
|
|
var currentStart = -1L;
|
|
|
|
|
var currentEnd = -1L;
|
|
|
|
|
for (final var word : words) {
|
|
|
|
|
final var text = word.word();
|
|
|
|
|
final var start = (long) (word.start() * 1000L);
|
|
|
|
|
final var end = (long) (word.end() * 1000L);
|
|
|
|
|
if (currentStart < 0) {
|
|
|
|
|
currentStart = start;
|
|
|
|
|
}
|
|
|
|
|
final var newLength = builder.length() + text.length();
|
|
|
|
|
if (newLength > maxLineLength * maxLines) {
|
|
|
|
|
final var newSubtitle = new SubtitleImpl(builder.toString(), currentStart, currentEnd, null, null);
|
|
|
|
|
ret.add(newSubtitle);
|
|
|
|
|
builder.delete(0, builder.length());
|
|
|
|
|
builder.append(text);
|
|
|
|
|
currentStart = start == 0 ? currentEnd : start;
|
|
|
|
|
} else if (areDifferentLines(builder.length(), newLength, maxLineLength)) {
|
|
|
|
|
builder.append("\n").append(text);
|
|
|
|
|
} else {
|
|
|
|
|
builder.append(" ").append(text);
|
|
|
|
|
}
|
|
|
|
|
currentEnd = end == 0 ? currentEnd : end;
|
|
|
|
|
}
|
|
|
|
|
if (!builder.isEmpty()) {
|
|
|
|
|
final var newSubtitle = new SubtitleImpl(builder.toString(), currentStart, currentEnd, null, null);
|
|
|
|
|
ret.add(newSubtitle);
|
|
|
|
|
}
|
|
|
|
|
return ret.stream();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public boolean canParse(final Path file) {
|
|
|
|
|
return file.getFileName().toString().endsWith(".json");
|
|
|
|
|
|