Adds WhisperX, reworks UI (still needs some work), theoretically usable

This commit is contained in:
Guillaume Tâche
2024-08-17 22:05:04 +02:00
parent 7bddf53bab
commit 3fa51eb95b
204 changed files with 4787 additions and 1321 deletions

View File

@@ -0,0 +1,72 @@
package com.github.gtache.autosubtitle.archive.impl;
import com.github.gtache.autosubtitle.archive.Archiver;
import javax.inject.Inject;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
/**
* Zip implementation of {@link Archiver}
*/
public class ZipDecompresser implements Archiver {
@Inject
ZipDecompresser() {
}
@Override
public void compress(final List<Path> files, final Path destination) throws IOException {
throw new UnsupportedOperationException("Not supported");
}
@Override
public void decompress(final Path archive, final Path destination) throws IOException {
if (!isPathSupported(archive)) {
throw new IllegalArgumentException("Unsupported path : " + archive);
}
try (final var zis = new ZipInputStream(Files.newInputStream(archive))) {
var zipEntry = zis.getNextEntry();
while (zipEntry != null) {
final var newFile = newFile(destination, zipEntry);
if (zipEntry.isDirectory()) {
Files.createDirectories(newFile);
} else {
// fix for Windows-created archives
final var parent = newFile.getParent();
Files.createDirectories(parent);
// write file content
try (final var fos = Files.newOutputStream(newFile)) {
zis.transferTo(fos);
}
}
zipEntry = zis.getNextEntry();
}
zis.closeEntry();
}
}
private static Path newFile(final Path destinationDir, final ZipEntry zipEntry) throws IOException {
final var destPath = destinationDir.resolve(zipEntry.getName());
final var destDirPath = destinationDir.toAbsolutePath().toString();
final var destFilePath = destPath.toAbsolutePath().toString();
if (!destFilePath.startsWith(destDirPath + File.separator)) {
throw new IOException("Entry is outside of the target dir: " + zipEntry.getName());
}
return destPath;
}
@Override
public String archiveExtension() {
return "zip";
}
}

View File

@@ -1,23 +1,33 @@
package com.github.gtache.autosubtitle.modules.impl;
import com.github.gtache.autosubtitle.archive.Archiver;
import com.github.gtache.autosubtitle.archive.impl.ZipDecompresser;
import com.github.gtache.autosubtitle.impl.Architecture;
import com.github.gtache.autosubtitle.impl.DaggerException;
import com.github.gtache.autosubtitle.impl.OS;
import com.github.gtache.autosubtitle.modules.setup.impl.SetupModule;
import com.github.gtache.autosubtitle.modules.subtitle.impl.SubtitleModule;
import dagger.Binds;
import dagger.Module;
import dagger.Provides;
import dagger.multibindings.IntoMap;
import dagger.multibindings.StringKey;
/**
* Dagger module for Core
*/
@Module(includes = {SetupModule.class, SubtitleModule.class})
public final class CoreModule {
public abstract class CoreModule {
private CoreModule() {
}
@Binds
@StringKey("zip")
@IntoMap
abstract Archiver bindsZipDecompresser(final ZipDecompresser decompresser);
@Provides
static OS providesOS() {
final var os = OS.getOS();

View File

@@ -0,0 +1,16 @@
package com.github.gtache.autosubtitle.modules.setup.impl;
import javax.inject.Qualifier;
import java.lang.annotation.Documented;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.Target;
import static java.lang.annotation.RetentionPolicy.RUNTIME;
@Qualifier
@Documented
@Retention(RUNTIME)
@Target({ElementType.PARAMETER, ElementType.METHOD, ElementType.FIELD})
public @interface CacheRoot {
}

View File

@@ -4,6 +4,8 @@ import dagger.Module;
import dagger.Provides;
import java.net.http.HttpClient;
import java.nio.file.Path;
import java.nio.file.Paths;
/**
* Dagger core module for setup
@@ -14,6 +16,18 @@ public final class SetupModule {
private SetupModule() {
}
@Provides
@CacheRoot
static Path providesCacheRoot() {
return Paths.get("cache");
}
@Provides
@ToolsRoot
static Path providesToolsRoot() {
return Paths.get("tools");
}
@Provides
static HttpClient providesHttpClient() {
return HttpClient.newHttpClient();

View File

@@ -0,0 +1,16 @@
package com.github.gtache.autosubtitle.modules.setup.impl;
import javax.inject.Qualifier;
import java.lang.annotation.Documented;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.Target;
import static java.lang.annotation.RetentionPolicy.RUNTIME;
@Qualifier
@Documented
@Retention(RUNTIME)
@Target({ElementType.PARAMETER, ElementType.METHOD, ElementType.FIELD})
public @interface ToolsRoot {
}

View File

@@ -1,7 +1,9 @@
package com.github.gtache.autosubtitle.modules.subtitle.impl;
import com.github.gtache.autosubtitle.subtitle.SubtitleImporterExporter;
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter;
import com.github.gtache.autosubtitle.subtitle.converter.impl.SRTSubtitleConverter;
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleImporterExporterImpl;
import dagger.Binds;
import dagger.Module;
import dagger.multibindings.IntoMap;
@@ -20,4 +22,7 @@ public abstract class SubtitleModule {
@IntoMap
@StringKey("srt")
abstract SubtitleConverter bindsSubtitleConverter(final SRTSubtitleConverter converter);
@Binds
abstract SubtitleImporterExporter bindsSubtitleImporterExporter(final SubtitleImporterExporterImpl impl);
}

View File

@@ -9,7 +9,6 @@ import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.time.Duration;
import java.util.List;
import java.util.concurrent.CompletableFuture;
/**
* Base implementation of {@link ProcessRunner}
@@ -19,19 +18,9 @@ public abstract class AbstractProcessRunner implements ProcessRunner {
private static final Logger logger = LogManager.getLogger(AbstractProcessRunner.class);
@Override
public ProcessResult run(final List<String> args) throws IOException {
public ProcessResult run(final List<String> args, final Duration duration) throws IOException {
final var listener = startListen(args);
CompletableFuture.runAsync(() -> {
try {
var line = listener.readLine();
while (line != null) {
line = listener.readLine();
}
} catch (final IOException e) {
logger.error("Error listening to process output of {}", args, e);
}
});
return listener.join(Duration.ofHours(1));
return listener.join(duration);
}
@Override
@@ -51,11 +40,12 @@ public abstract class AbstractProcessRunner implements ProcessRunner {
/**
* Runs a process and writes the output to the log
*
* @param args the command
* @param args the command
* @param duration The maximum duration to wait for
* @return the result
* @throws IOException if an error occurs
*/
protected ProcessResult runListen(final List<String> args) throws IOException {
protected ProcessResult runListen(final List<String> args, final Duration duration) throws IOException {
final var listener = startListen(args);
var line = listener.readLine();
final var processName = args.getFirst();
@@ -63,6 +53,6 @@ public abstract class AbstractProcessRunner implements ProcessRunner {
logger.info("[{}]: {}", processName, line);
line = listener.readLine();
}
return listener.join(Duration.ofHours(1));
return listener.join(duration);
}
}

View File

@@ -2,22 +2,25 @@ package com.github.gtache.autosubtitle.process.impl;
import com.github.gtache.autosubtitle.process.ProcessListener;
import com.github.gtache.autosubtitle.process.ProcessResult;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.TimeUnit;
/**
* Implementation of {@link ProcessListener}
*/
public class ProcessListenerImpl implements ProcessListener {
private static final Logger logger = LogManager.getLogger(ProcessListenerImpl.class);
private final Process process;
private final BufferedReader reader;
private final List<String> output;
@@ -30,7 +33,7 @@ public class ProcessListenerImpl implements ProcessListener {
public ProcessListenerImpl(final Process process) {
this.process = Objects.requireNonNull(process);
this.reader = new BufferedReader(new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8));
this.output = new ArrayList<>();
this.output = new CopyOnWriteArrayList<>();
}
@Override
@@ -48,7 +51,24 @@ public class ProcessListenerImpl implements ProcessListener {
}
@Override
public ProcessResult join(final Duration duration) throws IOException {
public ProcessResult join(final Duration duration) {
//Read to ensure process doesn't get stuck
CompletableFuture.runAsync(() -> {
try {
var line = readLine();
while (line != null) {
line = readLine();
}
} catch (final IOException e) {
logger.error("Error listening to process output of {}", process, e);
} finally {
try {
reader.close();
} catch (final IOException e) {
logger.warn("Error closing reader of {}", process, e);
}
}
});
try {
process.waitFor(duration.getSeconds(), TimeUnit.SECONDS);
} catch (final InterruptedException e) {
@@ -58,11 +78,6 @@ public class ProcessListenerImpl implements ProcessListener {
if (process.isAlive()) {
process.destroyForcibly();
}
//Reads lines to output
while (readLine() != null) {
//Do nothing
}
reader.close();
return new ProcessResultImpl(process.exitValue(), output);
}
}

View File

@@ -9,7 +9,6 @@ import com.github.gtache.autosubtitle.subtitle.impl.SubtitleCollectionImpl;
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleImpl;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.util.Arrays;
import java.util.Comparator;
import java.util.stream.Collectors;
@@ -20,17 +19,17 @@ import static java.util.Objects.requireNonNull;
/**
* Converts subtitles to SRT format
*/
@Singleton
public class SRTSubtitleConverter implements SubtitleConverter {
public class SRTSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
private final Translator translator;
private final Translator<?> translator;
@Inject
SRTSubtitleConverter(final Translator translator) {
this.translator = requireNonNull(translator);
}
public String format(final SubtitleCollection collection) {
@Override
public String format(final SubtitleCollection<?> collection) {
final var subtitles = collection.subtitles().stream().sorted(Comparator.comparing(Subtitle::start).thenComparing(Subtitle::end)).toList();
return IntStream.range(0, subtitles.size()).mapToObj(i -> {
final var subtitle = subtitles.get(i);
@@ -51,7 +50,7 @@ public class SRTSubtitleConverter implements SubtitleConverter {
}
@Override
public SubtitleCollection parse(final String content) throws ParseException {
public SubtitleCollectionImpl<SubtitleImpl> parse(final String content) throws ParseException {
try {
final var elements = content.split("\n\n");
final var subtitles = Arrays.stream(elements).filter(element -> !element.isBlank()).map(element -> {
@@ -66,7 +65,7 @@ public class SRTSubtitleConverter implements SubtitleConverter {
return new SubtitleImpl(text, start, end, null, null);
}).toList();
final var text = subtitles.stream().map(Subtitle::content).collect(Collectors.joining(" "));
return new SubtitleCollectionImpl(text, subtitles, translator.getLanguage(text));
return new SubtitleCollectionImpl<>(text, subtitles, translator.getLanguage(text));
} catch (final Exception e) {
throw new ParseException(e);
}

View File

@@ -13,8 +13,8 @@ import static java.util.Objects.requireNonNull;
/**
* Implementation of {@link SubtitleCollection}
*/
public record SubtitleCollectionImpl(String text, Collection<? extends Subtitle> subtitles,
Language language) implements SubtitleCollection {
public record SubtitleCollectionImpl<T extends Subtitle>(String text, Collection<T> subtitles,
Language language) implements SubtitleCollection<T> {
public SubtitleCollectionImpl {
Objects.requireNonNull(text);

View File

@@ -22,4 +22,8 @@ public record SubtitleImpl(String content, long start, long end, Font font, Boun
throw new IllegalArgumentException("start must be <= end : " + start + " > " + end);
}
}
public SubtitleImpl(final Subtitle subtitle) {
this(subtitle.content(), subtitle.start(), subtitle.end(), subtitle.font(), subtitle.bounds());
}
}

View File

@@ -0,0 +1,141 @@
package com.github.gtache.autosubtitle.subtitle.impl;
import com.github.gtache.autosubtitle.Language;
import com.github.gtache.autosubtitle.archive.Archiver;
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
import com.github.gtache.autosubtitle.subtitle.SubtitleImporterExporter;
import com.github.gtache.autosubtitle.subtitle.converter.ParseException;
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import javax.inject.Inject;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumMap;
import java.util.Map;
import java.util.stream.Collectors;
/**
* Implementation of {@link SubtitleImporterExporter}
*/
public class SubtitleImporterExporterImpl implements SubtitleImporterExporter<SubtitleImpl> {
private static final Logger logger = LogManager.getLogger(SubtitleImporterExporterImpl.class);
private final Map<String, Archiver> archiverMap;
private final Map<String, SubtitleConverter<?>> converterMap;
@Inject
SubtitleImporterExporterImpl(final Map<String, Archiver> archiverMap, final Map<String, SubtitleConverter> converterMap) {
this.archiverMap = Map.copyOf(archiverMap);
this.converterMap = converterMap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}
@Override
public Map<Language, SubtitleCollection<SubtitleImpl>> importSubtitles(final Path file) throws IOException, ParseException {
final var fileName = file.getFileName().toString();
final var extension = fileName.substring(fileName.lastIndexOf('.') + 1);
if (archiverMap.containsKey(extension)) {
return loadArchive(file);
} else {
final var loaded = loadSingleFile(file);
logger.info("Loaded {}", file);
return Map.of(loaded.language(), loaded);
}
}
private SubtitleCollection<SubtitleImpl> loadSingleFile(final Path file) throws ParseException {
final var fileName = file.getFileName().toString();
final var extension = fileName.substring(fileName.lastIndexOf('.') + 1);
final var parser = converterMap.get(extension);
if (parser == null) {
throw new ParseException("No converter found for " + file);
} else {
final var parsed = parser.parse(file);
return new SubtitleCollectionImpl<>(parsed.text(), parsed.subtitles().stream().map(SubtitleImpl::new).toList(), parsed.language());
}
}
private Map<Language, SubtitleCollection<SubtitleImpl>> loadArchive(final Path file) throws IOException, ParseException {
final var fileName = file.getFileName().toString();
final var extension = fileName.substring(fileName.lastIndexOf('.') + 1);
final var archiver = archiverMap.get(extension);
final var tempDirectory = Files.createTempDirectory("autosubtitle");
archiver.decompress(file, tempDirectory);
final var files = new ArrayList<Path>();
try (final var stream = Files.list(tempDirectory)) {
stream.forEach(files::add);
}
final var map = new EnumMap<Language, SubtitleCollection<SubtitleImpl>>(Language.class);
for (final var path : files) {
final var loaded = loadSingleFile(path);
map.put(loaded.language(), loaded);
Files.deleteIfExists(path);
}
Files.deleteIfExists(tempDirectory);
logger.info("Loaded {}", file);
return map;
}
@Override
public void exportSubtitles(final Collection<? extends SubtitleCollection<?>> collections, final Path file) throws IOException {
final var fileName = file.getFileName().toString();
final var extension = fileName.substring(fileName.lastIndexOf('.') + 1);
if (archiverMap.containsKey(extension)) {
saveArchive(file, collections);
} else if (collections.size() == 1) {
saveSingleFile(file, collections.iterator().next());
} else {
throw new IllegalArgumentException("Cannot export multiple collections to a non-archive file : " + file);
}
}
private void saveArchive(final Path file, final Iterable<? extends SubtitleCollection<?>> collections) throws IOException {
final var fileName = file.getFileName().toString();
final var extension = fileName.substring(fileName.lastIndexOf('.') + 1);
final var archiver = archiverMap.get(extension);
final var singleExporter = converterMap.getOrDefault("json", converterMap.values().iterator().next());
final var tempDir = Files.createTempDirectory("autosubtitle");
for (final var collection : collections) {
final var subtitleFile = tempDir.resolve(collection.language() + "." + singleExporter.formatName());
saveSingleFile(subtitleFile, collection);
}
final var files = new ArrayList<Path>();
try (final var stream = Files.list(tempDir)) {
stream.forEach(files::add);
}
archiver.compress(files, file);
for (final var path : files) {
Files.deleteIfExists(path);
}
Files.deleteIfExists(tempDir);
logger.info("Saved {}", file);
}
private void saveSingleFile(final Path file, final SubtitleCollection<?> collection) throws IOException {
final var fileName = file.getFileName().toString();
final var extension = fileName.substring(fileName.lastIndexOf('.') + 1);
final var converter = converterMap.get(extension);
if (converter == null) {
throw new IOException("No converter found for " + file);
} else {
final var string = converter.format(collection);
Files.writeString(file, string);
logger.info("Saved {}", file);
}
}
@Override
public Collection<String> supportedArchiveExtensions() {
return archiverMap.keySet();
}
@Override
public Collection<String> supportedSingleFileExtensions() {
return converterMap.keySet();
}
}

View File

@@ -9,6 +9,7 @@ module com.github.gtache.autosubtitle.core {
requires org.apache.logging.log4j;
exports com.github.gtache.autosubtitle.impl;
exports com.github.gtache.autosubtitle.archive.impl;
exports com.github.gtache.autosubtitle.process.impl;
exports com.github.gtache.autosubtitle.setup.impl;
exports com.github.gtache.autosubtitle.subtitle.impl;

View File

@@ -0,0 +1,56 @@
package com.github.gtache.autosubtitle.archive.impl;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import static org.junit.jupiter.api.Assertions.*;
class TestZipDecompresser {
private final ZipDecompresser zipDecompresser;
TestZipDecompresser() {
this.zipDecompresser = new ZipDecompresser();
}
@Test
void testIsPathSupported() {
assertTrue(zipDecompresser.isPathSupported(Path.of("test.zip")));
assertFalse(zipDecompresser.isPathSupported(Path.of("test")));
assertFalse(zipDecompresser.isPathSupported(Path.of("test.txt")));
assertFalse(zipDecompresser.isPathSupported(Path.of("test.zip2")));
}
@Test
void testDecompress(@TempDir final Path tempDir) throws IOException {
final var file = tempDir.resolve("test.zip");
try (final var in = getClass().getResourceAsStream("in.zip")) {
Files.copy(in, file);
}
zipDecompresser.decompress(file, tempDir);
final var inTxt = tempDir.resolve("in.txt");
final var bin = tempDir.resolve("bin");
final var binTxt = bin.resolve("bin.txt");
final var lib = tempDir.resolve("lib");
final var libTxt = lib.resolve("lib.txt");
assertTrue(Files.exists(inTxt));
assertEquals("in", Files.readString(inTxt));
assertTrue(Files.exists(bin));
assertTrue(Files.exists(binTxt));
assertEquals("bin", Files.readString(binTxt));
assertTrue(Files.exists(lib));
assertTrue(Files.exists(libTxt));
assertEquals("lib", Files.readString(libTxt));
}
@Test
void testIllegal() {
assertThrows(IllegalArgumentException.class, () -> zipDecompresser.decompress(Paths.get("file.txt"), Paths.get("target")));
}
}

View File

@@ -22,7 +22,7 @@ class TestAbstractProcessRunner {
@Test
void testRun() throws IOException {
final var expected = new ProcessResultImpl(0, List.of("1", "2", "3"));
final var actual = dummyProcessRunner.run(ARGS);
final var actual = dummyProcessRunner.run(ARGS, Duration.ofSeconds(5));
assertEquals(expected, actual);
}
@@ -50,7 +50,7 @@ class TestAbstractProcessRunner {
@Test
void testRunListen() throws IOException {
final var result = dummyProcessRunner.runListen(ARGS);
final var result = dummyProcessRunner.runListen(ARGS, Duration.ofSeconds(5));
assertEquals(0, result.exitCode());
assertEquals(List.of("1", "2", "3"), result.output());
}

View File

@@ -50,7 +50,7 @@ class TestSRTSubtitleConverter {
final var end2 = 12L * 60 * 60 * 1000 + 23 * 60 * 1000 + 34 * 1000 + 457;
final var subtitle1 = new SubtitleImpl("test5 test6\ntest7 test8", start1, end1, null, null);
final var subtitle2 = new SubtitleImpl("test1 test2\ntest3 test4", start2, end2, null, null);
final var subtitles = new SubtitleCollectionImpl(subtitle1.content() + " " + subtitle2.content(), Arrays.asList(subtitle1, subtitle2), language);
final var subtitles = new SubtitleCollectionImpl<>(subtitle1.content() + " " + subtitle2.content(), Arrays.asList(subtitle1, subtitle2), language);
final var converter = new SRTSubtitleConverter(translator);
assertEquals(subtitles, converter.parse(in));
assertEquals(in, converter.format(subtitles));

View File

@@ -3,6 +3,7 @@ package com.github.gtache.autosubtitle.subtitle.extractor.impl;
import com.github.gtache.autosubtitle.Audio;
import com.github.gtache.autosubtitle.Language;
import com.github.gtache.autosubtitle.Video;
import com.github.gtache.autosubtitle.subtitle.Subtitle;
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractEvent;
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractException;
@@ -56,12 +57,12 @@ class TestAbstractSubtitleExtractor {
private static final class DummySubtitleExtractor extends AbstractSubtitleExtractor {
@Override
public SubtitleCollection extract(final Video video, final Language language, final ExtractionModel model) throws ExtractException {
public SubtitleCollection<Subtitle> extract(final Video video, final Language language, final ExtractionModel model) throws ExtractException {
throw new UnsupportedOperationException();
}
@Override
public SubtitleCollection extract(final Audio audio, final Language language, final ExtractionModel model) throws ExtractException {
public SubtitleCollection<Subtitle> extract(final Audio audio, final Language language, final ExtractionModel model) throws ExtractException {
throw new UnsupportedOperationException();
}
}

View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="INFO">
<Appenders>
<Console name="Console" target="SYSTEM_OUT">
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
</Console>
</Appenders>
<Loggers>
<Root level="info">
<AppenderRef ref="Console"/>
</Root>
</Loggers>
</Configuration>