Moves some modules and files, adds save subtitles

This commit is contained in:
Guillaume Tâche
2024-08-06 20:03:19 +02:00
parent 5efdaa6f63
commit c2efb71195
117 changed files with 989 additions and 353 deletions

View File

@@ -0,0 +1,16 @@
package com.github.gtache.autosubtitle.modules.subtitle.extractor.whisper;
import com.github.gtache.autosubtitle.subtitle.extractor.SubtitleExtractor;
import com.github.gtache.autosubtitle.subtitle.extractor.whisper.WhisperSubtitleExtractor;
import dagger.Binds;
import dagger.Module;
/**
* Dagger module for Whisper
*/
@Module
public abstract class WhisperExtractorModule {
@Binds
abstract SubtitleExtractor bindsSubtitleExtractor(final WhisperSubtitleExtractor extractor);
}

View File

@@ -0,0 +1,27 @@
package com.github.gtache.autosubtitle.modules.subtitle.parser.json.whisper;
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter;
import com.github.gtache.autosubtitle.subtitle.parser.json.whisper.JSONSubtitleConverter;
import com.google.gson.Gson;
import dagger.Binds;
import dagger.Module;
import dagger.Provides;
import dagger.multibindings.IntoMap;
import dagger.multibindings.StringKey;
import javax.inject.Singleton;
@Module
public abstract class WhisperJsonModule {
@Binds
@IntoMap
@StringKey("json")
abstract SubtitleConverter bindsJSONSubtitleConverter(final JSONSubtitleConverter subtitleConverter);
@Provides
@Singleton
static Gson providesGson() {
return new Gson();
}
}

View File

@@ -1,22 +1,18 @@
package com.github.gtache.autosubtitle.modules.whisper;
import com.github.gtache.autosubtitle.modules.whisper.json.JsonModule;
import com.github.gtache.autosubtitle.subtitle.ExtractionModelProvider;
import com.github.gtache.autosubtitle.subtitle.SubtitleExtractor;
import com.github.gtache.autosubtitle.modules.subtitle.extractor.whisper.WhisperExtractorModule;
import com.github.gtache.autosubtitle.modules.subtitle.parser.json.whisper.WhisperJsonModule;
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractionModelProvider;
import com.github.gtache.autosubtitle.whisper.WhisperExtractionModelProvider;
import com.github.gtache.autosubtitle.whisper.WhisperSubtitleExtractor;
import dagger.Binds;
import dagger.Module;
/**
* Dagger module for Whisper
*/
@Module(includes = JsonModule.class)
@Module(includes = {WhisperJsonModule.class, WhisperExtractorModule.class})
public abstract class WhisperModule {
@Binds
abstract SubtitleExtractor bindsSubtitleExtractor(final WhisperSubtitleExtractor extractor);
@Binds
abstract ExtractionModelProvider bindsExtractionModelProvider(final WhisperExtractionModelProvider provider);
}

View File

@@ -1,23 +0,0 @@
package com.github.gtache.autosubtitle.modules.whisper.json;
import com.github.gtache.autosubtitle.whisper.SubtitleParser;
import com.github.gtache.autosubtitle.whisper.json.JsonSubtitleParser;
import com.google.gson.Gson;
import dagger.Binds;
import dagger.Module;
import dagger.Provides;
import javax.inject.Singleton;
@Module
public abstract class JsonModule {
@Binds
abstract SubtitleParser bindsSubtitleParser(final JsonSubtitleParser subtitleParser);
@Provides
@Singleton
static Gson providesGson() {
return new Gson();
}
}

View File

@@ -1,4 +1,4 @@
package com.github.gtache.autosubtitle.whisper;
package com.github.gtache.autosubtitle.subtitle.extractor.whisper;
import com.github.gtache.autosubtitle.Audio;
import com.github.gtache.autosubtitle.File;
@@ -7,13 +7,18 @@ import com.github.gtache.autosubtitle.Video;
import com.github.gtache.autosubtitle.impl.OS;
import com.github.gtache.autosubtitle.modules.setup.whisper.WhisperVenvPath;
import com.github.gtache.autosubtitle.process.impl.AbstractProcessRunner;
import com.github.gtache.autosubtitle.subtitle.ExtractEvent;
import com.github.gtache.autosubtitle.subtitle.ExtractException;
import com.github.gtache.autosubtitle.subtitle.ExtractionModel;
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
import com.github.gtache.autosubtitle.subtitle.SubtitleExtractor;
import com.github.gtache.autosubtitle.subtitle.SubtitleExtractorListener;
import com.github.gtache.autosubtitle.subtitle.impl.ExtractEventImpl;
import com.github.gtache.autosubtitle.subtitle.converter.ParseException;
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter;
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractEvent;
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractException;
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractionModel;
import com.github.gtache.autosubtitle.subtitle.extractor.SubtitleExtractor;
import com.github.gtache.autosubtitle.subtitle.extractor.SubtitleExtractorListener;
import com.github.gtache.autosubtitle.subtitle.extractor.impl.ExtractEventImpl;
import com.github.gtache.autosubtitle.whisper.WhisperModels;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import javax.inject.Inject;
import javax.inject.Singleton;
@@ -23,6 +28,7 @@ import java.nio.file.Path;
import java.time.Duration;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
@@ -34,16 +40,17 @@ import static java.util.Objects.requireNonNull;
@Singleton
public class WhisperSubtitleExtractor extends AbstractProcessRunner implements SubtitleExtractor {
private static final Logger logger = LogManager.getLogger(WhisperSubtitleExtractor.class);
private static final Pattern LINE_PROGRESS_PATTERN = Pattern.compile("^\\[\\d{2}:\\d{2}\\.\\d{3} --> (?<minutes>\\d{2}):(?<seconds>\\d{2})\\.(?<millis>\\d{3})]");
private final Path venvPath;
private final SubtitleParser parser;
private final SubtitleConverter converter;
private final OS os;
private final Set<SubtitleExtractorListener> listeners;
@Inject
WhisperSubtitleExtractor(@WhisperVenvPath final Path venvPath, final SubtitleParser parser, final OS os) {
WhisperSubtitleExtractor(@WhisperVenvPath final Path venvPath, final Map<String, SubtitleConverter> converters, final OS os) {
this.venvPath = requireNonNull(venvPath);
this.parser = requireNonNull(parser);
this.converter = requireNonNull(converters.get("json"));
this.os = requireNonNull(os);
this.listeners = new HashSet<>();
}
@@ -113,6 +120,7 @@ public class WhisperSubtitleExtractor extends AbstractProcessRunner implements S
var line = processListener.readLine();
var oldProgress = -1.0;
while (line != null) {
logger.info("Whisper output : {}", line);
final var newProgress = computeProgress(line, duration, oldProgress);
notifyListeners(new ExtractEventImpl(line, newProgress));
oldProgress = newProgress;
@@ -123,7 +131,11 @@ public class WhisperSubtitleExtractor extends AbstractProcessRunner implements S
final var filename = path.getFileName().toString();
final var subtitleFilename = filename.substring(0, filename.lastIndexOf('.')) + ".json";
final var subtitleFile = outputDir.resolve(subtitleFilename);
return parser.parse(subtitleFile);
try {
return converter.parse(subtitleFile);
} catch (final ParseException e) {
throw new ExtractException(e);
}
} else {
throw new ExtractException("Error extracting subtitles: " + result.output());
}
@@ -132,7 +144,7 @@ public class WhisperSubtitleExtractor extends AbstractProcessRunner implements S
}
}
private double computeProgress(final CharSequence line, final long duration, final double oldProgress) {
private static double computeProgress(final CharSequence line, final long duration, final double oldProgress) {
final var matcher = LINE_PROGRESS_PATTERN.matcher(line);
if (matcher.find()) {
final var minutes = Integer.parseInt(matcher.group("minutes"));

View File

@@ -0,0 +1,75 @@
package com.github.gtache.autosubtitle.subtitle.parser.json.whisper;
import com.github.gtache.autosubtitle.Language;
import com.github.gtache.autosubtitle.subtitle.Subtitle;
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
import com.github.gtache.autosubtitle.subtitle.converter.ParseException;
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter;
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleCollectionImpl;
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleImpl;
import com.google.gson.Gson;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.nio.file.Path;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
/**
* {@link SubtitleConverter} implementation for JSON files
*/
@Singleton
public class JSONSubtitleConverter implements SubtitleConverter {
private static final Logger logger = LogManager.getLogger(JSONSubtitleConverter.class);
private final Gson gson;
@Inject
JSONSubtitleConverter(final Gson gson) {
this.gson = Objects.requireNonNull(gson);
}
@Override
public String format(final SubtitleCollection collection) {
final var id = new AtomicInteger(0);
final var segments = collection.subtitles().stream().map(s -> new JSONSubtitleSegment(id.incrementAndGet(), 0, s.start() / (double) 1000,
s.end() / (double) 1000, s.content(), List.of(), 0, 0, 0, 0)).toList();
final var subtitles = new JSONSubtitles(collection.text(), segments, collection.language().iso2());
return gson.toJson(subtitles);
}
@Override
public SubtitleCollection parse(final String content) throws ParseException {
try {
final var json = gson.fromJson(content, JSONSubtitles.class);
final var subtitles = json.segments().stream().map(s -> {
final var start = (long) s.start() * 1000L;
final var end = (long) s.end() * 1000L;
return new SubtitleImpl(s.text(), start, end, null, null);
}).sorted(Comparator.comparing(Subtitle::start).thenComparing(Subtitle::end)).toList();
final var language = Language.getLanguage(json.language());
final var subtitlesText = subtitles.stream().map(Subtitle::content).collect(Collectors.joining(" "));
if (!Objects.equals(json.text(), subtitlesText)) {
logger.warn("Not same text: {}\n\n{}", json.text(), subtitlesText);
}
return new SubtitleCollectionImpl(json.text(), subtitles, language);
} catch (final Exception e) {
throw new ParseException(e);
}
}
@Override
public boolean canParse(final Path file) {
return file.getFileName().toString().endsWith(".json");
}
@Override
public String formatName() {
return "json";
}
}

View File

@@ -1,4 +1,4 @@
package com.github.gtache.autosubtitle.whisper.json;
package com.github.gtache.autosubtitle.subtitle.parser.json.whisper;
import java.util.List;

View File

@@ -1,4 +1,4 @@
package com.github.gtache.autosubtitle.whisper.json;
package com.github.gtache.autosubtitle.subtitle.parser.json.whisper;
import java.util.List;

View File

@@ -1,13 +0,0 @@
package com.github.gtache.autosubtitle.whisper;
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
import java.io.IOException;
import java.nio.file.Path;
public interface SubtitleParser {
SubtitleCollection parse(String text);
SubtitleCollection parse(Path file) throws IOException;
}

View File

@@ -1,7 +1,7 @@
package com.github.gtache.autosubtitle.whisper;
import com.github.gtache.autosubtitle.subtitle.ExtractionModel;
import com.github.gtache.autosubtitle.subtitle.ExtractionModelProvider;
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractionModel;
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractionModelProvider;
import javax.inject.Inject;
import javax.inject.Singleton;

View File

@@ -1,6 +1,6 @@
package com.github.gtache.autosubtitle.whisper;
import com.github.gtache.autosubtitle.subtitle.ExtractionModel;
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractionModel;
/**
* Whisper models

View File

@@ -1,54 +0,0 @@
package com.github.gtache.autosubtitle.whisper.json;
import com.github.gtache.autosubtitle.Language;
import com.github.gtache.autosubtitle.subtitle.Subtitle;
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleCollectionImpl;
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleImpl;
import com.github.gtache.autosubtitle.whisper.SubtitleParser;
import com.google.gson.Gson;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Comparator;
import java.util.Objects;
import java.util.stream.Collectors;
@Singleton
public class JsonSubtitleParser implements SubtitleParser {
private static final Logger logger = LogManager.getLogger(JsonSubtitleParser.class);
private final Gson gson;
@Inject
JsonSubtitleParser(final Gson gson) {
this.gson = Objects.requireNonNull(gson);
}
@Override
public SubtitleCollection parse(final Path file) throws IOException {
final var content = Files.readString(file);
return parse(content);
}
@Override
public SubtitleCollection parse(final String text) {
final var json = gson.fromJson(text, JSONSubtitles.class);
final var subtitles = json.segments().stream().map(s -> {
final var start = (long) s.start() * 1000L;
final var end = (long) s.end() * 1000L;
return new SubtitleImpl(s.text(), start, end, null, null);
}).sorted(Comparator.comparing(Subtitle::start).thenComparing(Subtitle::end)).toList();
final var language = Language.getLanguage(json.language());
final var subtitlesText = subtitles.stream().map(Subtitle::content).collect(Collectors.joining(" "));
if (!Objects.equals(json.text(), subtitlesText)) {
logger.warn("Not same text: {} vs {}", json.text(), subtitlesText);
}
return new SubtitleCollectionImpl(json.text(), subtitles, language);
}
}

View File

@@ -9,9 +9,11 @@ module com.github.gtache.autosubtitle.whisper {
requires transitive java.compiler; //Don't know why dagger generates @Generated here, need to debug
exports com.github.gtache.autosubtitle.whisper;
exports com.github.gtache.autosubtitle.whisper.json;
exports com.github.gtache.autosubtitle.setup.whisper;
exports com.github.gtache.autosubtitle.modules.whisper;
exports com.github.gtache.autosubtitle.modules.whisper.json;
exports com.github.gtache.autosubtitle.modules.setup.whisper;
exports com.github.gtache.autosubtitle.subtitle.extractor.whisper;
exports com.github.gtache.autosubtitle.modules.subtitle.extractor.whisper;
exports com.github.gtache.autosubtitle.modules.subtitle.parser.json.whisper;
exports com.github.gtache.autosubtitle.subtitle.parser.json.whisper;
}