Adds WhisperX, reworks UI (still needs some work), theoretically usable
This commit is contained in:
19
whisper/base/pom.xml
Normal file
19
whisper/base/pom.xml
Normal file
@@ -0,0 +1,19 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>com.github.gtache.autosubtitle</groupId>
|
||||
<artifactId>autosubtitle-whisper</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>autosubtitle-whisper-base</artifactId>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.github.gtache.autosubtitle</groupId>
|
||||
<artifactId>autosubtitle-whisper-common</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
@@ -0,0 +1,43 @@
|
||||
package com.github.gtache.autosubtitle.modules.setup.whisper.base;
|
||||
|
||||
import com.github.gtache.autosubtitle.modules.setup.impl.SubtitleExtractorSetup;
|
||||
import com.github.gtache.autosubtitle.modules.setup.impl.ToolsRoot;
|
||||
import com.github.gtache.autosubtitle.modules.setup.whisper.PythonVersion;
|
||||
import com.github.gtache.autosubtitle.modules.setup.whisper.WhisperBundledRoot;
|
||||
import com.github.gtache.autosubtitle.modules.setup.whisper.WhisperCommonSetupModule;
|
||||
import com.github.gtache.autosubtitle.modules.setup.whisper.WhisperVenvPath;
|
||||
import com.github.gtache.autosubtitle.setup.SetupManager;
|
||||
import com.github.gtache.autosubtitle.setup.whisper.base.WhisperSetupManager;
|
||||
import dagger.Binds;
|
||||
import dagger.Module;
|
||||
import dagger.Provides;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
/**
|
||||
* Setup module for Whisper
|
||||
*/
|
||||
@Module(includes = WhisperCommonSetupModule.class)
|
||||
public abstract class WhisperSetupModule {
|
||||
@Binds
|
||||
@SubtitleExtractorSetup
|
||||
abstract SetupManager bindsSubtitleExtractorSetupManager(final WhisperSetupManager manager);
|
||||
|
||||
@Provides
|
||||
@PythonVersion
|
||||
static String providesPythonVersion() {
|
||||
return "3.9.19";
|
||||
}
|
||||
|
||||
@Provides
|
||||
@WhisperBundledRoot
|
||||
static Path providesWhisperBundledRoot(@ToolsRoot final Path root) {
|
||||
return root.resolve("whisper");
|
||||
}
|
||||
|
||||
@Provides
|
||||
@WhisperVenvPath
|
||||
static Path providesWhisperVenvPath(@WhisperBundledRoot final Path root) {
|
||||
return root.resolve("whisper-env");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.github.gtache.autosubtitle.modules.subtitle.extractor.whisper.base;
|
||||
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.SubtitleExtractor;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.whisper.base.WhisperSubtitleExtractor;
|
||||
import dagger.Binds;
|
||||
import dagger.Module;
|
||||
|
||||
/**
|
||||
* Dagger module for Whisper
|
||||
*/
|
||||
@Module
|
||||
public abstract class WhisperExtractorModule {
|
||||
|
||||
private WhisperExtractorModule() {
|
||||
|
||||
}
|
||||
|
||||
@Binds
|
||||
abstract SubtitleExtractor bindsSubtitleExtractor(final WhisperSubtitleExtractor extractor);
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
package com.github.gtache.autosubtitle.modules.subtitle.parser.json.whisper.base;
|
||||
|
||||
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter;
|
||||
import com.github.gtache.autosubtitle.subtitle.parser.json.whisper.base.JSONSubtitleConverter;
|
||||
import com.google.gson.Gson;
|
||||
import dagger.Binds;
|
||||
import dagger.Module;
|
||||
import dagger.Provides;
|
||||
import dagger.multibindings.IntoMap;
|
||||
import dagger.multibindings.StringKey;
|
||||
|
||||
import javax.inject.Singleton;
|
||||
|
||||
@Module
|
||||
public abstract class WhisperJsonModule {
|
||||
|
||||
private WhisperJsonModule() {
|
||||
|
||||
}
|
||||
|
||||
@Binds
|
||||
@IntoMap
|
||||
@StringKey("json")
|
||||
abstract SubtitleConverter bindsJSONSubtitleConverter(final JSONSubtitleConverter subtitleConverter);
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
static Gson providesGson() {
|
||||
return new Gson();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
package com.github.gtache.autosubtitle.modules.whisper.base;
|
||||
|
||||
import com.github.gtache.autosubtitle.modules.setup.whisper.base.WhisperSetupModule;
|
||||
import com.github.gtache.autosubtitle.modules.subtitle.extractor.whisper.base.WhisperExtractorModule;
|
||||
import com.github.gtache.autosubtitle.modules.subtitle.parser.json.whisper.base.WhisperJsonModule;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractionModelProvider;
|
||||
import com.github.gtache.autosubtitle.whisper.WhisperExtractionModelProvider;
|
||||
import dagger.Binds;
|
||||
import dagger.Module;
|
||||
|
||||
/**
|
||||
* Dagger module for Whisper
|
||||
*/
|
||||
@Module(includes = {WhisperSetupModule.class, WhisperJsonModule.class, WhisperExtractorModule.class})
|
||||
public abstract class WhisperModule {
|
||||
|
||||
private WhisperModule() {
|
||||
|
||||
}
|
||||
|
||||
@Binds
|
||||
abstract ExtractionModelProvider bindsExtractionModelProvider(final WhisperExtractionModelProvider provider);
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
package com.github.gtache.autosubtitle.setup.whisper.base;
|
||||
|
||||
import com.github.gtache.autosubtitle.setup.SetupException;
|
||||
import com.github.gtache.autosubtitle.setup.conda.CondaSetupManager;
|
||||
import com.github.gtache.autosubtitle.setup.whisper.AbstractWhisperSetupManager;
|
||||
import com.github.gtache.autosubtitle.setup.whisper.WhisperSetupConfiguration;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.time.Duration;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* {@link com.github.gtache.autosubtitle.setup.SetupManager} for Whisper
|
||||
*/
|
||||
@Singleton
|
||||
public class WhisperSetupManager extends AbstractWhisperSetupManager {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(WhisperSetupManager.class);
|
||||
|
||||
@Inject
|
||||
WhisperSetupManager(final CondaSetupManager condaSetupManager, final WhisperSetupConfiguration configuration) {
|
||||
super(condaSetupManager, configuration);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return "Whisper";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void installWhisper() throws SetupException {
|
||||
final var path = getPythonPath();
|
||||
try {
|
||||
logger.info("Installing whisper");
|
||||
final var result = run(List.of(path.toString(), "-m", "pip", "install", "-U", "openai-whisper", "numpy<2"), Duration.ofMinutes(15));
|
||||
if (result.exitCode() == 0) {
|
||||
logger.info("Whisper installed");
|
||||
} else {
|
||||
throw new SetupException("Error installing whisper: " + result.output());
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
throw new SetupException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isWhisperInstalled() throws SetupException {
|
||||
final var path = getPythonPath();
|
||||
if (Files.exists(path)) {
|
||||
try {
|
||||
final var result = run(List.of(path.toString(), "-m", "pip", "show", "openai-whisper"), Duration.ofSeconds(5));
|
||||
return result.exitCode() == 0;
|
||||
} catch (final IOException e) {
|
||||
throw new SetupException(e);
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
package com.github.gtache.autosubtitle.subtitle.extractor.whisper.base;
|
||||
|
||||
import com.github.gtache.autosubtitle.Language;
|
||||
import com.github.gtache.autosubtitle.impl.OS;
|
||||
import com.github.gtache.autosubtitle.modules.setup.whisper.WhisperVenvPath;
|
||||
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractionModel;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.SubtitleExtractor;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.whisper.AbstractWhisperSubtitleExtractor;
|
||||
import com.github.gtache.autosubtitle.whisper.WhisperModels;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Whisper implementation of {@link SubtitleExtractor}
|
||||
*/
|
||||
@Singleton
|
||||
public class WhisperSubtitleExtractor extends AbstractWhisperSubtitleExtractor {
|
||||
|
||||
|
||||
@Inject
|
||||
WhisperSubtitleExtractor(@WhisperVenvPath final Path venvPath, final Map<String, SubtitleConverter> converters, final OS os) {
|
||||
super(venvPath, converters, os);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> createArgs(final Path path, final Language language, final ExtractionModel model, final Path outputDir) {
|
||||
final var args = new ArrayList<String>(14);
|
||||
args.add(getPythonPath().toString());
|
||||
args.add("-m");
|
||||
args.add("whisper");
|
||||
args.add("--verbose");
|
||||
args.add("False");
|
||||
args.add("--model");
|
||||
if (model != WhisperModels.LARGE && language == Language.EN) {
|
||||
args.add(model.name().toLowerCase() + ".en");
|
||||
} else {
|
||||
args.add(model.name().toLowerCase());
|
||||
}
|
||||
args.add("--task");
|
||||
args.add("transcribe");
|
||||
args.add("--output_dir");
|
||||
args.add(outputDir.toString());
|
||||
args.add("--output_format");
|
||||
args.add("json");
|
||||
if (language != Language.AUTO) {
|
||||
args.add("--language");
|
||||
args.add(language.iso2());
|
||||
}
|
||||
args.add(path.toString());
|
||||
return args;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
package com.github.gtache.autosubtitle.subtitle.parser.json.whisper.base;
|
||||
|
||||
import com.github.gtache.autosubtitle.Language;
|
||||
import com.github.gtache.autosubtitle.subtitle.Subtitle;
|
||||
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
|
||||
import com.github.gtache.autosubtitle.subtitle.converter.ParseException;
|
||||
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter;
|
||||
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleCollectionImpl;
|
||||
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleImpl;
|
||||
import com.google.gson.Gson;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* {@link SubtitleConverter} implementation for JSON files
|
||||
*/
|
||||
@Singleton
|
||||
public class JSONSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(JSONSubtitleConverter.class);
|
||||
private final Gson gson;
|
||||
|
||||
@Inject
|
||||
JSONSubtitleConverter(final Gson gson) {
|
||||
this.gson = Objects.requireNonNull(gson);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String format(final SubtitleCollection<?> collection) {
|
||||
final var id = new AtomicInteger(0);
|
||||
final var segments = collection.subtitles().stream().map(s -> new JSONSubtitleSegment(id.incrementAndGet(), 0, s.start() / (double) 1000,
|
||||
s.end() / (double) 1000, s.content(), List.of(), 0, 0, 0, 0)).toList();
|
||||
final var subtitles = new JSONSubtitles(collection.text(), segments, collection.language().iso2());
|
||||
return gson.toJson(subtitles);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SubtitleCollectionImpl<SubtitleImpl> parse(final String content) throws ParseException {
|
||||
try {
|
||||
final var json = gson.fromJson(content, JSONSubtitles.class);
|
||||
final var subtitles = json.segments().stream().map(s -> {
|
||||
final var start = (long) s.start() * 1000L;
|
||||
final var end = (long) s.end() * 1000L;
|
||||
return new SubtitleImpl(s.text(), start, end, null, null);
|
||||
}).sorted(Comparator.comparing(Subtitle::start).thenComparing(Subtitle::end)).toList();
|
||||
final var language = Language.getLanguage(json.language());
|
||||
final var subtitlesText = subtitles.stream().map(Subtitle::content).collect(Collectors.joining(""));
|
||||
if (!Objects.equals(json.text(), subtitlesText)) {
|
||||
logger.warn("Not same text: {}\n\n{}", json.text(), subtitlesText);
|
||||
}
|
||||
return new SubtitleCollectionImpl<>(json.text(), subtitles, language);
|
||||
} catch (final Exception e) {
|
||||
throw new ParseException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canParse(final Path file) {
|
||||
return file.getFileName().toString().endsWith(".json");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String formatName() {
|
||||
return "json";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
package com.github.gtache.autosubtitle.subtitle.parser.json.whisper.base;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record JSONSubtitleSegment(int id, int seek, double start, double end, String text, List<Integer> tokens,
|
||||
double temperature, double avg_logprob, double compression_ratio,
|
||||
double no_speech_prob) {
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
package com.github.gtache.autosubtitle.subtitle.parser.json.whisper.base;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record JSONSubtitles(String text, List<JSONSubtitleSegment> segments, String language) {
|
||||
}
|
||||
15
whisper/base/src/main/java/module-info.java
Normal file
15
whisper/base/src/main/java/module-info.java
Normal file
@@ -0,0 +1,15 @@
|
||||
/**
|
||||
* Base whisper implementation (openai-whisper)
|
||||
*/
|
||||
module com.github.gtache.autosubtitle.whisper.base {
|
||||
requires transitive com.github.gtache.autosubtitle.whisper.common;
|
||||
requires com.github.gtache.autosubtitle.core;
|
||||
requires org.apache.logging.log4j;
|
||||
requires com.google.gson;
|
||||
|
||||
exports com.github.gtache.autosubtitle.setup.whisper.base;
|
||||
exports com.github.gtache.autosubtitle.modules.whisper.base;
|
||||
exports com.github.gtache.autosubtitle.modules.setup.whisper.base;
|
||||
exports com.github.gtache.autosubtitle.subtitle.extractor.whisper.base;
|
||||
exports com.github.gtache.autosubtitle.modules.subtitle.extractor.whisper.base;
|
||||
}
|
||||
Reference in New Issue
Block a user