Moves some modules and files, adds save subtitles
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
package com.github.gtache.autosubtitle.modules.subtitle.extractor.whisper;
|
||||
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.SubtitleExtractor;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.whisper.WhisperSubtitleExtractor;
|
||||
import dagger.Binds;
|
||||
import dagger.Module;
|
||||
|
||||
/**
|
||||
* Dagger module for Whisper
|
||||
*/
|
||||
@Module
|
||||
public abstract class WhisperExtractorModule {
|
||||
|
||||
@Binds
|
||||
abstract SubtitleExtractor bindsSubtitleExtractor(final WhisperSubtitleExtractor extractor);
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.github.gtache.autosubtitle.modules.subtitle.parser.json.whisper;
|
||||
|
||||
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter;
|
||||
import com.github.gtache.autosubtitle.subtitle.parser.json.whisper.JSONSubtitleConverter;
|
||||
import com.google.gson.Gson;
|
||||
import dagger.Binds;
|
||||
import dagger.Module;
|
||||
import dagger.Provides;
|
||||
import dagger.multibindings.IntoMap;
|
||||
import dagger.multibindings.StringKey;
|
||||
|
||||
import javax.inject.Singleton;
|
||||
|
||||
@Module
|
||||
public abstract class WhisperJsonModule {
|
||||
|
||||
@Binds
|
||||
@IntoMap
|
||||
@StringKey("json")
|
||||
abstract SubtitleConverter bindsJSONSubtitleConverter(final JSONSubtitleConverter subtitleConverter);
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
static Gson providesGson() {
|
||||
return new Gson();
|
||||
}
|
||||
}
|
||||
@@ -1,22 +1,18 @@
|
||||
package com.github.gtache.autosubtitle.modules.whisper;
|
||||
|
||||
import com.github.gtache.autosubtitle.modules.whisper.json.JsonModule;
|
||||
import com.github.gtache.autosubtitle.subtitle.ExtractionModelProvider;
|
||||
import com.github.gtache.autosubtitle.subtitle.SubtitleExtractor;
|
||||
import com.github.gtache.autosubtitle.modules.subtitle.extractor.whisper.WhisperExtractorModule;
|
||||
import com.github.gtache.autosubtitle.modules.subtitle.parser.json.whisper.WhisperJsonModule;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractionModelProvider;
|
||||
import com.github.gtache.autosubtitle.whisper.WhisperExtractionModelProvider;
|
||||
import com.github.gtache.autosubtitle.whisper.WhisperSubtitleExtractor;
|
||||
import dagger.Binds;
|
||||
import dagger.Module;
|
||||
|
||||
/**
|
||||
* Dagger module for Whisper
|
||||
*/
|
||||
@Module(includes = JsonModule.class)
|
||||
@Module(includes = {WhisperJsonModule.class, WhisperExtractorModule.class})
|
||||
public abstract class WhisperModule {
|
||||
|
||||
@Binds
|
||||
abstract SubtitleExtractor bindsSubtitleExtractor(final WhisperSubtitleExtractor extractor);
|
||||
|
||||
@Binds
|
||||
abstract ExtractionModelProvider bindsExtractionModelProvider(final WhisperExtractionModelProvider provider);
|
||||
}
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
package com.github.gtache.autosubtitle.modules.whisper.json;
|
||||
|
||||
import com.github.gtache.autosubtitle.whisper.SubtitleParser;
|
||||
import com.github.gtache.autosubtitle.whisper.json.JsonSubtitleParser;
|
||||
import com.google.gson.Gson;
|
||||
import dagger.Binds;
|
||||
import dagger.Module;
|
||||
import dagger.Provides;
|
||||
|
||||
import javax.inject.Singleton;
|
||||
|
||||
@Module
|
||||
public abstract class JsonModule {
|
||||
|
||||
@Binds
|
||||
abstract SubtitleParser bindsSubtitleParser(final JsonSubtitleParser subtitleParser);
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
static Gson providesGson() {
|
||||
return new Gson();
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.github.gtache.autosubtitle.whisper;
|
||||
package com.github.gtache.autosubtitle.subtitle.extractor.whisper;
|
||||
|
||||
import com.github.gtache.autosubtitle.Audio;
|
||||
import com.github.gtache.autosubtitle.File;
|
||||
@@ -7,13 +7,18 @@ import com.github.gtache.autosubtitle.Video;
|
||||
import com.github.gtache.autosubtitle.impl.OS;
|
||||
import com.github.gtache.autosubtitle.modules.setup.whisper.WhisperVenvPath;
|
||||
import com.github.gtache.autosubtitle.process.impl.AbstractProcessRunner;
|
||||
import com.github.gtache.autosubtitle.subtitle.ExtractEvent;
|
||||
import com.github.gtache.autosubtitle.subtitle.ExtractException;
|
||||
import com.github.gtache.autosubtitle.subtitle.ExtractionModel;
|
||||
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
|
||||
import com.github.gtache.autosubtitle.subtitle.SubtitleExtractor;
|
||||
import com.github.gtache.autosubtitle.subtitle.SubtitleExtractorListener;
|
||||
import com.github.gtache.autosubtitle.subtitle.impl.ExtractEventImpl;
|
||||
import com.github.gtache.autosubtitle.subtitle.converter.ParseException;
|
||||
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractEvent;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractException;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractionModel;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.SubtitleExtractor;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.SubtitleExtractorListener;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.impl.ExtractEventImpl;
|
||||
import com.github.gtache.autosubtitle.whisper.WhisperModels;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
@@ -23,6 +28,7 @@ import java.nio.file.Path;
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@@ -34,16 +40,17 @@ import static java.util.Objects.requireNonNull;
|
||||
@Singleton
|
||||
public class WhisperSubtitleExtractor extends AbstractProcessRunner implements SubtitleExtractor {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(WhisperSubtitleExtractor.class);
|
||||
private static final Pattern LINE_PROGRESS_PATTERN = Pattern.compile("^\\[\\d{2}:\\d{2}\\.\\d{3} --> (?<minutes>\\d{2}):(?<seconds>\\d{2})\\.(?<millis>\\d{3})]");
|
||||
private final Path venvPath;
|
||||
private final SubtitleParser parser;
|
||||
private final SubtitleConverter converter;
|
||||
private final OS os;
|
||||
private final Set<SubtitleExtractorListener> listeners;
|
||||
|
||||
@Inject
|
||||
WhisperSubtitleExtractor(@WhisperVenvPath final Path venvPath, final SubtitleParser parser, final OS os) {
|
||||
WhisperSubtitleExtractor(@WhisperVenvPath final Path venvPath, final Map<String, SubtitleConverter> converters, final OS os) {
|
||||
this.venvPath = requireNonNull(venvPath);
|
||||
this.parser = requireNonNull(parser);
|
||||
this.converter = requireNonNull(converters.get("json"));
|
||||
this.os = requireNonNull(os);
|
||||
this.listeners = new HashSet<>();
|
||||
}
|
||||
@@ -113,6 +120,7 @@ public class WhisperSubtitleExtractor extends AbstractProcessRunner implements S
|
||||
var line = processListener.readLine();
|
||||
var oldProgress = -1.0;
|
||||
while (line != null) {
|
||||
logger.info("Whisper output : {}", line);
|
||||
final var newProgress = computeProgress(line, duration, oldProgress);
|
||||
notifyListeners(new ExtractEventImpl(line, newProgress));
|
||||
oldProgress = newProgress;
|
||||
@@ -123,7 +131,11 @@ public class WhisperSubtitleExtractor extends AbstractProcessRunner implements S
|
||||
final var filename = path.getFileName().toString();
|
||||
final var subtitleFilename = filename.substring(0, filename.lastIndexOf('.')) + ".json";
|
||||
final var subtitleFile = outputDir.resolve(subtitleFilename);
|
||||
return parser.parse(subtitleFile);
|
||||
try {
|
||||
return converter.parse(subtitleFile);
|
||||
} catch (final ParseException e) {
|
||||
throw new ExtractException(e);
|
||||
}
|
||||
} else {
|
||||
throw new ExtractException("Error extracting subtitles: " + result.output());
|
||||
}
|
||||
@@ -132,7 +144,7 @@ public class WhisperSubtitleExtractor extends AbstractProcessRunner implements S
|
||||
}
|
||||
}
|
||||
|
||||
private double computeProgress(final CharSequence line, final long duration, final double oldProgress) {
|
||||
private static double computeProgress(final CharSequence line, final long duration, final double oldProgress) {
|
||||
final var matcher = LINE_PROGRESS_PATTERN.matcher(line);
|
||||
if (matcher.find()) {
|
||||
final var minutes = Integer.parseInt(matcher.group("minutes"));
|
||||
@@ -0,0 +1,75 @@
|
||||
package com.github.gtache.autosubtitle.subtitle.parser.json.whisper;
|
||||
|
||||
import com.github.gtache.autosubtitle.Language;
|
||||
import com.github.gtache.autosubtitle.subtitle.Subtitle;
|
||||
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
|
||||
import com.github.gtache.autosubtitle.subtitle.converter.ParseException;
|
||||
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter;
|
||||
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleCollectionImpl;
|
||||
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleImpl;
|
||||
import com.google.gson.Gson;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* {@link SubtitleConverter} implementation for JSON files
|
||||
*/
|
||||
@Singleton
|
||||
public class JSONSubtitleConverter implements SubtitleConverter {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(JSONSubtitleConverter.class);
|
||||
private final Gson gson;
|
||||
|
||||
@Inject
|
||||
JSONSubtitleConverter(final Gson gson) {
|
||||
this.gson = Objects.requireNonNull(gson);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String format(final SubtitleCollection collection) {
|
||||
final var id = new AtomicInteger(0);
|
||||
final var segments = collection.subtitles().stream().map(s -> new JSONSubtitleSegment(id.incrementAndGet(), 0, s.start() / (double) 1000,
|
||||
s.end() / (double) 1000, s.content(), List.of(), 0, 0, 0, 0)).toList();
|
||||
final var subtitles = new JSONSubtitles(collection.text(), segments, collection.language().iso2());
|
||||
return gson.toJson(subtitles);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SubtitleCollection parse(final String content) throws ParseException {
|
||||
try {
|
||||
final var json = gson.fromJson(content, JSONSubtitles.class);
|
||||
final var subtitles = json.segments().stream().map(s -> {
|
||||
final var start = (long) s.start() * 1000L;
|
||||
final var end = (long) s.end() * 1000L;
|
||||
return new SubtitleImpl(s.text(), start, end, null, null);
|
||||
}).sorted(Comparator.comparing(Subtitle::start).thenComparing(Subtitle::end)).toList();
|
||||
final var language = Language.getLanguage(json.language());
|
||||
final var subtitlesText = subtitles.stream().map(Subtitle::content).collect(Collectors.joining(" "));
|
||||
if (!Objects.equals(json.text(), subtitlesText)) {
|
||||
logger.warn("Not same text: {}\n\n{}", json.text(), subtitlesText);
|
||||
}
|
||||
return new SubtitleCollectionImpl(json.text(), subtitles, language);
|
||||
} catch (final Exception e) {
|
||||
throw new ParseException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canParse(final Path file) {
|
||||
return file.getFileName().toString().endsWith(".json");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String formatName() {
|
||||
return "json";
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.github.gtache.autosubtitle.whisper.json;
|
||||
package com.github.gtache.autosubtitle.subtitle.parser.json.whisper;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.github.gtache.autosubtitle.whisper.json;
|
||||
package com.github.gtache.autosubtitle.subtitle.parser.json.whisper;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
package com.github.gtache.autosubtitle.whisper;
|
||||
|
||||
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public interface SubtitleParser {
|
||||
|
||||
SubtitleCollection parse(String text);
|
||||
|
||||
SubtitleCollection parse(Path file) throws IOException;
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
package com.github.gtache.autosubtitle.whisper;
|
||||
|
||||
import com.github.gtache.autosubtitle.subtitle.ExtractionModel;
|
||||
import com.github.gtache.autosubtitle.subtitle.ExtractionModelProvider;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractionModel;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractionModelProvider;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
package com.github.gtache.autosubtitle.whisper;
|
||||
|
||||
import com.github.gtache.autosubtitle.subtitle.ExtractionModel;
|
||||
import com.github.gtache.autosubtitle.subtitle.extractor.ExtractionModel;
|
||||
|
||||
/**
|
||||
* Whisper models
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
package com.github.gtache.autosubtitle.whisper.json;
|
||||
|
||||
import com.github.gtache.autosubtitle.Language;
|
||||
import com.github.gtache.autosubtitle.subtitle.Subtitle;
|
||||
import com.github.gtache.autosubtitle.subtitle.SubtitleCollection;
|
||||
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleCollectionImpl;
|
||||
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleImpl;
|
||||
import com.github.gtache.autosubtitle.whisper.SubtitleParser;
|
||||
import com.google.gson.Gson;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Comparator;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Singleton
|
||||
public class JsonSubtitleParser implements SubtitleParser {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(JsonSubtitleParser.class);
|
||||
private final Gson gson;
|
||||
|
||||
@Inject
|
||||
JsonSubtitleParser(final Gson gson) {
|
||||
this.gson = Objects.requireNonNull(gson);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SubtitleCollection parse(final Path file) throws IOException {
|
||||
final var content = Files.readString(file);
|
||||
return parse(content);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SubtitleCollection parse(final String text) {
|
||||
final var json = gson.fromJson(text, JSONSubtitles.class);
|
||||
final var subtitles = json.segments().stream().map(s -> {
|
||||
final var start = (long) s.start() * 1000L;
|
||||
final var end = (long) s.end() * 1000L;
|
||||
return new SubtitleImpl(s.text(), start, end, null, null);
|
||||
}).sorted(Comparator.comparing(Subtitle::start).thenComparing(Subtitle::end)).toList();
|
||||
final var language = Language.getLanguage(json.language());
|
||||
final var subtitlesText = subtitles.stream().map(Subtitle::content).collect(Collectors.joining(" "));
|
||||
if (!Objects.equals(json.text(), subtitlesText)) {
|
||||
logger.warn("Not same text: {} vs {}", json.text(), subtitlesText);
|
||||
}
|
||||
return new SubtitleCollectionImpl(json.text(), subtitles, language);
|
||||
}
|
||||
}
|
||||
@@ -9,9 +9,11 @@ module com.github.gtache.autosubtitle.whisper {
|
||||
requires transitive java.compiler; //Don't know why dagger generates @Generated here, need to debug
|
||||
|
||||
exports com.github.gtache.autosubtitle.whisper;
|
||||
exports com.github.gtache.autosubtitle.whisper.json;
|
||||
exports com.github.gtache.autosubtitle.setup.whisper;
|
||||
exports com.github.gtache.autosubtitle.modules.whisper;
|
||||
exports com.github.gtache.autosubtitle.modules.whisper.json;
|
||||
exports com.github.gtache.autosubtitle.modules.setup.whisper;
|
||||
exports com.github.gtache.autosubtitle.subtitle.extractor.whisper;
|
||||
exports com.github.gtache.autosubtitle.modules.subtitle.extractor.whisper;
|
||||
exports com.github.gtache.autosubtitle.modules.subtitle.parser.json.whisper;
|
||||
exports com.github.gtache.autosubtitle.subtitle.parser.json.whisper;
|
||||
}
|
||||
Reference in New Issue
Block a user