DeepL translation works
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
package com.github.gtache.autosubtitle.translation.deepl;
|
package com.github.gtache.autosubtitle.translation.deepl;
|
||||||
|
|
||||||
import com.deepl.api.DeepLException;
|
import com.deepl.api.DeepLException;
|
||||||
|
import com.deepl.api.TextTranslationOptions;
|
||||||
import com.github.gtache.autosubtitle.Language;
|
import com.github.gtache.autosubtitle.Language;
|
||||||
import com.github.gtache.autosubtitle.subtitle.Subtitle;
|
import com.github.gtache.autosubtitle.subtitle.Subtitle;
|
||||||
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
|
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
|
||||||
@@ -12,14 +13,17 @@ import com.github.pemistahl.lingua.api.LanguageDetector;
|
|||||||
|
|
||||||
import javax.inject.Inject;
|
import javax.inject.Inject;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.prefs.Preferences;
|
import java.util.prefs.Preferences;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DeepL implementation of {@link Translator}
|
* DeepL implementation of {@link Translator}
|
||||||
*/
|
*/
|
||||||
public class DeepLTranslator implements Translator<Subtitle> {
|
public class DeepLTranslator implements Translator<Subtitle> {
|
||||||
|
private static final Pattern START_END_PATTERN = Pattern.compile("<start>(?<start>\\d+)</start>(?<content>.+?)<end>(?<end>\\d+)</end>");
|
||||||
|
|
||||||
private final Preferences preferences;
|
private final Preferences preferences;
|
||||||
private final LanguageDetector languageDetector;
|
private final LanguageDetector languageDetector;
|
||||||
@@ -61,12 +65,65 @@ public class DeepLTranslator implements Translator<Subtitle> {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SubtitleCollection<Subtitle> translate(final SubtitleCollection<?> collection, final Language from, final Language to) throws TranslationException {
|
public SubtitleCollection<Subtitle> translate(final SubtitleCollection<?> collection, final Language from, final Language to) throws TranslationException {
|
||||||
final var subtitles = new ArrayList<Subtitle>(collection.subtitles().size());
|
final var toTranslate = getToTranslate(collection);
|
||||||
for (final var subtitle : collection.subtitles()) {
|
try {
|
||||||
subtitles.add(translate(subtitle, from, to));
|
final var result = translate(toTranslate, from, to);
|
||||||
|
final var text = result.stream().map(Subtitle::content).collect(Collectors.joining(""));
|
||||||
|
return new SubtitleCollectionImpl<>(text, result, to);
|
||||||
|
} catch (final DeepLException e) {
|
||||||
|
throw new TranslationException(e);
|
||||||
|
} catch (final InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
throw new TranslationException(e);
|
||||||
}
|
}
|
||||||
final var text = subtitles.stream().map(Subtitle::content).collect(Collectors.joining(""));
|
}
|
||||||
return new SubtitleCollectionImpl<>(text, subtitles, to);
|
|
||||||
|
private List<Subtitle> translate(final Iterable<String> toTranslate, final Language from, final Language to) throws DeepLException, InterruptedException, TranslationException {
|
||||||
|
final var result = new ArrayList<Subtitle>();
|
||||||
|
final var options = new TextTranslationOptions().setIgnoreTags(List.of("start", "end")).setPreserveFormatting(true).setTagHandling("xml");
|
||||||
|
final var currentTranslator = getTranslator();
|
||||||
|
if (currentTranslator == null) {
|
||||||
|
throw new TranslationException("DeepL API key is not set");
|
||||||
|
} else {
|
||||||
|
for (final var text : toTranslate) {
|
||||||
|
final var from2 = from == Language.EN ? "en-GB" : from.iso2();
|
||||||
|
final var to2 = to == Language.EN ? "en-GB" : to.iso2();
|
||||||
|
final var translated = currentTranslator.translateText(text, from2, to2, options);
|
||||||
|
final var parsed = parseTranslated(translated.getText());
|
||||||
|
result.addAll(parsed);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> getToTranslate(final SubtitleCollection<?> collection) {
|
||||||
|
final var ret = new ArrayList<String>();
|
||||||
|
final var builder = new StringBuilder(32000);
|
||||||
|
for (final var subtitle : collection.subtitles()) {
|
||||||
|
final var transformed = "<start>" + subtitle.start() + "</start>" + subtitle.content() + "<end>" + subtitle.end() + "</end>";
|
||||||
|
if (builder.length() + transformed.length() > 32000) {
|
||||||
|
ret.add(builder.toString());
|
||||||
|
builder.setLength(0);
|
||||||
|
}
|
||||||
|
builder.append(transformed);
|
||||||
|
}
|
||||||
|
if (!builder.isEmpty()) {
|
||||||
|
ret.add(builder.toString());
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Subtitle> parseTranslated(final CharSequence text) {
|
||||||
|
final var matcher = START_END_PATTERN.matcher(text);
|
||||||
|
final var subtitles = new ArrayList<Subtitle>();
|
||||||
|
while (matcher.find()) {
|
||||||
|
final var start = Long.parseLong(matcher.group("start"));
|
||||||
|
final var end = Long.parseLong(matcher.group("end"));
|
||||||
|
final var content = matcher.group("content");
|
||||||
|
final var subtitle = new SubtitleImpl(content, start, end, null, null);
|
||||||
|
subtitles.add(subtitle);
|
||||||
|
}
|
||||||
|
return subtitles;
|
||||||
}
|
}
|
||||||
|
|
||||||
private com.deepl.api.Translator getTranslator() {
|
private com.deepl.api.Translator getTranslator() {
|
||||||
|
|||||||
@@ -128,7 +128,7 @@ public class FFmpegVideoConverter extends AbstractProcessRunner implements Video
|
|||||||
}
|
}
|
||||||
|
|
||||||
private SubtitleConverter<?> getSubtitleConverter() {
|
private SubtitleConverter<?> getSubtitleConverter() {
|
||||||
return converterProvider.getConverter(preferences.get("outputFormat", "ass"));
|
return converterProvider.getConverter(preferences.get("outputFormat", "srt"));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String escapeVF(final String path) {
|
private static String escapeVF(final String path) {
|
||||||
|
|||||||
@@ -133,7 +133,7 @@ public class JSONSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
|
|||||||
if (newLength > maxLineLength * maxLines) {
|
if (newLength > maxLineLength * maxLines) {
|
||||||
final var newSubtitle = new SubtitleImpl(builder.toString(), currentStart, currentEnd, null, null);
|
final var newSubtitle = new SubtitleImpl(builder.toString(), currentStart, currentEnd, null, null);
|
||||||
ret.add(newSubtitle);
|
ret.add(newSubtitle);
|
||||||
builder.delete(0, builder.length());
|
builder.setLength(0);
|
||||||
builder.append(text);
|
builder.append(text);
|
||||||
currentStart = start == 0 ? currentEnd : start;
|
currentStart = start == 0 ? currentEnd : start;
|
||||||
} else if (areDifferentLines(builder.length(), newLength, maxLineLength)) {
|
} else if (areDifferentLines(builder.length(), newLength, maxLineLength)) {
|
||||||
|
|||||||
Reference in New Issue
Block a user