DeepL translation works

This commit is contained in:
Guillaume Tâche
2024-08-25 19:48:55 +02:00
parent 728b563d8b
commit e93a47f4e2
3 changed files with 64 additions and 7 deletions

View File

@@ -1,6 +1,7 @@
package com.github.gtache.autosubtitle.translation.deepl;
import com.deepl.api.DeepLException;
import com.deepl.api.TextTranslationOptions;
import com.github.gtache.autosubtitle.Language;
import com.github.gtache.autosubtitle.subtitle.Subtitle;
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
@@ -12,14 +13,17 @@ import com.github.pemistahl.lingua.api.LanguageDetector;
import javax.inject.Inject;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.prefs.Preferences;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* DeepL implementation of {@link Translator}
*/
public class DeepLTranslator implements Translator<Subtitle> {
private static final Pattern START_END_PATTERN = Pattern.compile("<start>(?<start>\\d+)</start>(?<content>.+?)<end>(?<end>\\d+)</end>");
private final Preferences preferences;
private final LanguageDetector languageDetector;
@@ -61,12 +65,65 @@ public class DeepLTranslator implements Translator<Subtitle> {
@Override
public SubtitleCollection<Subtitle> translate(final SubtitleCollection<?> collection, final Language from, final Language to) throws TranslationException {
final var subtitles = new ArrayList<Subtitle>(collection.subtitles().size());
for (final var subtitle : collection.subtitles()) {
subtitles.add(translate(subtitle, from, to));
final var toTranslate = getToTranslate(collection);
try {
final var result = translate(toTranslate, from, to);
final var text = result.stream().map(Subtitle::content).collect(Collectors.joining(""));
return new SubtitleCollectionImpl<>(text, result, to);
} catch (final DeepLException e) {
throw new TranslationException(e);
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
throw new TranslationException(e);
}
final var text = subtitles.stream().map(Subtitle::content).collect(Collectors.joining(""));
return new SubtitleCollectionImpl<>(text, subtitles, to);
}
private List<Subtitle> translate(final Iterable<String> toTranslate, final Language from, final Language to) throws DeepLException, InterruptedException, TranslationException {
final var result = new ArrayList<Subtitle>();
final var options = new TextTranslationOptions().setIgnoreTags(List.of("start", "end")).setPreserveFormatting(true).setTagHandling("xml");
final var currentTranslator = getTranslator();
if (currentTranslator == null) {
throw new TranslationException("DeepL API key is not set");
} else {
for (final var text : toTranslate) {
final var from2 = from == Language.EN ? "en-GB" : from.iso2();
final var to2 = to == Language.EN ? "en-GB" : to.iso2();
final var translated = currentTranslator.translateText(text, from2, to2, options);
final var parsed = parseTranslated(translated.getText());
result.addAll(parsed);
}
return result;
}
}
private static List<String> getToTranslate(final SubtitleCollection<?> collection) {
final var ret = new ArrayList<String>();
final var builder = new StringBuilder(32000);
for (final var subtitle : collection.subtitles()) {
final var transformed = "<start>" + subtitle.start() + "</start>" + subtitle.content() + "<end>" + subtitle.end() + "</end>";
if (builder.length() + transformed.length() > 32000) {
ret.add(builder.toString());
builder.setLength(0);
}
builder.append(transformed);
}
if (!builder.isEmpty()) {
ret.add(builder.toString());
}
return ret;
}
private static List<Subtitle> parseTranslated(final CharSequence text) {
final var matcher = START_END_PATTERN.matcher(text);
final var subtitles = new ArrayList<Subtitle>();
while (matcher.find()) {
final var start = Long.parseLong(matcher.group("start"));
final var end = Long.parseLong(matcher.group("end"));
final var content = matcher.group("content");
final var subtitle = new SubtitleImpl(content, start, end, null, null);
subtitles.add(subtitle);
}
return subtitles;
}
private com.deepl.api.Translator getTranslator() {

View File

@@ -128,7 +128,7 @@ public class FFmpegVideoConverter extends AbstractProcessRunner implements Video
}
private SubtitleConverter<?> getSubtitleConverter() {
return converterProvider.getConverter(preferences.get("outputFormat", "ass"));
return converterProvider.getConverter(preferences.get("outputFormat", "srt"));
}
private static String escapeVF(final String path) {

View File

@@ -133,7 +133,7 @@ public class JSONSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
if (newLength > maxLineLength * maxLines) {
final var newSubtitle = new SubtitleImpl(builder.toString(), currentStart, currentEnd, null, null);
ret.add(newSubtitle);
builder.delete(0, builder.length());
builder.setLength(0);
builder.append(text);
currentStart = start == 0 ? currentEnd : start;
} else if (areDifferentLines(builder.length(), newLength, maxLineLength)) {