diff --git a/deepl/src/main/java/com/github/gtache/autosubtitle/translation/deepl/DeepLTranslator.java b/deepl/src/main/java/com/github/gtache/autosubtitle/translation/deepl/DeepLTranslator.java index 7ce4bea..e491b13 100644 --- a/deepl/src/main/java/com/github/gtache/autosubtitle/translation/deepl/DeepLTranslator.java +++ b/deepl/src/main/java/com/github/gtache/autosubtitle/translation/deepl/DeepLTranslator.java @@ -1,6 +1,7 @@ package com.github.gtache.autosubtitle.translation.deepl; import com.deepl.api.DeepLException; +import com.deepl.api.TextTranslationOptions; import com.github.gtache.autosubtitle.Language; import com.github.gtache.autosubtitle.subtitle.Subtitle; import com.github.gtache.autosubtitle.subtitle.SubtitleCollection; @@ -12,14 +13,17 @@ import com.github.pemistahl.lingua.api.LanguageDetector; import javax.inject.Inject; import java.util.ArrayList; +import java.util.List; import java.util.Objects; import java.util.prefs.Preferences; +import java.util.regex.Pattern; import java.util.stream.Collectors; /** * DeepL implementation of {@link Translator} */ public class DeepLTranslator implements Translator { + private static final Pattern START_END_PATTERN = Pattern.compile("(?\\d+)(?.+?)(?\\d+)"); private final Preferences preferences; private final LanguageDetector languageDetector; @@ -61,12 +65,65 @@ public class DeepLTranslator implements Translator { @Override public SubtitleCollection translate(final SubtitleCollection collection, final Language from, final Language to) throws TranslationException { - final var subtitles = new ArrayList(collection.subtitles().size()); - for (final var subtitle : collection.subtitles()) { - subtitles.add(translate(subtitle, from, to)); + final var toTranslate = getToTranslate(collection); + try { + final var result = translate(toTranslate, from, to); + final var text = result.stream().map(Subtitle::content).collect(Collectors.joining("")); + return new SubtitleCollectionImpl<>(text, result, to); + } catch (final DeepLException e) { + throw new TranslationException(e); + } catch (final InterruptedException e) { + Thread.currentThread().interrupt(); + throw new TranslationException(e); } - final var text = subtitles.stream().map(Subtitle::content).collect(Collectors.joining("")); - return new SubtitleCollectionImpl<>(text, subtitles, to); + } + + private List translate(final Iterable toTranslate, final Language from, final Language to) throws DeepLException, InterruptedException, TranslationException { + final var result = new ArrayList(); + final var options = new TextTranslationOptions().setIgnoreTags(List.of("start", "end")).setPreserveFormatting(true).setTagHandling("xml"); + final var currentTranslator = getTranslator(); + if (currentTranslator == null) { + throw new TranslationException("DeepL API key is not set"); + } else { + for (final var text : toTranslate) { + final var from2 = from == Language.EN ? "en-GB" : from.iso2(); + final var to2 = to == Language.EN ? "en-GB" : to.iso2(); + final var translated = currentTranslator.translateText(text, from2, to2, options); + final var parsed = parseTranslated(translated.getText()); + result.addAll(parsed); + } + return result; + } + } + + private static List getToTranslate(final SubtitleCollection collection) { + final var ret = new ArrayList(); + final var builder = new StringBuilder(32000); + for (final var subtitle : collection.subtitles()) { + final var transformed = "" + subtitle.start() + "" + subtitle.content() + "" + subtitle.end() + ""; + if (builder.length() + transformed.length() > 32000) { + ret.add(builder.toString()); + builder.setLength(0); + } + builder.append(transformed); + } + if (!builder.isEmpty()) { + ret.add(builder.toString()); + } + return ret; + } + + private static List parseTranslated(final CharSequence text) { + final var matcher = START_END_PATTERN.matcher(text); + final var subtitles = new ArrayList(); + while (matcher.find()) { + final var start = Long.parseLong(matcher.group("start")); + final var end = Long.parseLong(matcher.group("end")); + final var content = matcher.group("content"); + final var subtitle = new SubtitleImpl(content, start, end, null, null); + subtitles.add(subtitle); + } + return subtitles; } private com.deepl.api.Translator getTranslator() { diff --git a/ffmpeg/src/main/java/com/github/gtache/autosubtitle/ffmpeg/FFmpegVideoConverter.java b/ffmpeg/src/main/java/com/github/gtache/autosubtitle/ffmpeg/FFmpegVideoConverter.java index 79ee40b..2bb08d5 100644 --- a/ffmpeg/src/main/java/com/github/gtache/autosubtitle/ffmpeg/FFmpegVideoConverter.java +++ b/ffmpeg/src/main/java/com/github/gtache/autosubtitle/ffmpeg/FFmpegVideoConverter.java @@ -128,7 +128,7 @@ public class FFmpegVideoConverter extends AbstractProcessRunner implements Video } private SubtitleConverter getSubtitleConverter() { - return converterProvider.getConverter(preferences.get("outputFormat", "ass")); + return converterProvider.getConverter(preferences.get("outputFormat", "srt")); } private static String escapeVF(final String path) { diff --git a/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/JSONSubtitleConverter.java b/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/JSONSubtitleConverter.java index 1d2d01c..7703bcf 100644 --- a/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/JSONSubtitleConverter.java +++ b/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/JSONSubtitleConverter.java @@ -133,7 +133,7 @@ public class JSONSubtitleConverter implements SubtitleConverter { if (newLength > maxLineLength * maxLines) { final var newSubtitle = new SubtitleImpl(builder.toString(), currentStart, currentEnd, null, null); ret.add(newSubtitle); - builder.delete(0, builder.length()); + builder.setLength(0); builder.append(text); currentStart = start == 0 ? currentEnd : start; } else if (areDifferentLines(builder.length(), newLength, maxLineLength)) {