Adds tests for whisperx
This commit is contained in:
@@ -54,7 +54,7 @@ public class WhisperXSetupManager extends AbstractWhisperSetupManager {
|
|||||||
@Override
|
@Override
|
||||||
protected boolean isWhisperInstalled() throws SetupException {
|
protected boolean isWhisperInstalled() throws SetupException {
|
||||||
final var path = getPythonPath();
|
final var path = getPythonPath();
|
||||||
if (Files.exists(path)) {
|
if (Files.isRegularFile(path)) {
|
||||||
try {
|
try {
|
||||||
final var result = processRunner().run(List.of(path.toString(), "-m", "pip", "show", "whisperx"), Duration.ofSeconds(5));
|
final var result = processRunner().run(List.of(path.toString(), "-m", "pip", "show", "whisperx"), Duration.ofSeconds(5));
|
||||||
return result.exitCode() == 0;
|
return result.exitCode() == 0;
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ public class WhisperXSubtitleExtractor extends AbstractWhisperSubtitleExtractor
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<String> createArgs(final Path path, final Language language, final ExtractionModel model, final Path outputDir) {
|
protected List<String> createArgs(final Path path, final Language language, final ExtractionModel model, final Path outputDir) {
|
||||||
final var args = new ArrayList<String>(14);
|
final var args = new ArrayList<String>();
|
||||||
args.add(getPythonPath().toString());
|
args.add(getPythonPath().toString());
|
||||||
args.add("-m");
|
args.add("-m");
|
||||||
args.add("whisperx");
|
args.add("whisperx");
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ public class JSONSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
|
|||||||
}
|
}
|
||||||
}).sorted(Comparator.comparing(Subtitle::start).thenComparing(Subtitle::end)).toList();
|
}).sorted(Comparator.comparing(Subtitle::start).thenComparing(Subtitle::end)).toList();
|
||||||
final var language = Language.getLanguage(json.language());
|
final var language = Language.getLanguage(json.language());
|
||||||
final var subtitlesText = subtitles.stream().map(Subtitle::content).collect(Collectors.joining(""));
|
final var subtitlesText = subtitles.stream().map(s -> s.content().trim()).collect(Collectors.joining(" "));
|
||||||
return new SubtitleCollectionImpl<>(subtitlesText, subtitles, language);
|
return new SubtitleCollectionImpl<>(subtitlesText, subtitles, language);
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParseException(e);
|
throw new ParseException(e);
|
||||||
@@ -101,6 +101,8 @@ public class JSONSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
|
|||||||
final var newLength = builder.length() + s.length();
|
final var newLength = builder.length() + s.length();
|
||||||
if (areDifferentLines(builder.length(), newLength, maxLineLength)) {
|
if (areDifferentLines(builder.length(), newLength, maxLineLength)) {
|
||||||
builder.append("\n").append(s);
|
builder.append("\n").append(s);
|
||||||
|
} else if (builder.isEmpty()) {
|
||||||
|
builder.append(s);
|
||||||
} else {
|
} else {
|
||||||
builder.append(" ").append(s);
|
builder.append(" ").append(s);
|
||||||
}
|
}
|
||||||
@@ -138,6 +140,8 @@ public class JSONSubtitleConverter implements SubtitleConverter<SubtitleImpl> {
|
|||||||
currentStart = start == 0 ? currentEnd : start;
|
currentStart = start == 0 ? currentEnd : start;
|
||||||
} else if (areDifferentLines(builder.length(), newLength, maxLineLength)) {
|
} else if (areDifferentLines(builder.length(), newLength, maxLineLength)) {
|
||||||
builder.append("\n").append(text);
|
builder.append("\n").append(text);
|
||||||
|
} else if (builder.isEmpty()) {
|
||||||
|
builder.append(text);
|
||||||
} else {
|
} else {
|
||||||
builder.append(" ").append(text);
|
builder.append(" ").append(text);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,27 @@
|
|||||||
|
package com.github.gtache.autosubtitle.modules.setup.whisperx;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
class TestWhisperXSetupModule {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testPythonVersion() {
|
||||||
|
assertEquals("3.10", WhisperXSetupModule.providesPythonVersion());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testWHisperXBundledRoot() {
|
||||||
|
final var root = Paths.get("root");
|
||||||
|
assertEquals(root.resolve("whisperx"), WhisperXSetupModule.providesWhisperXBundledRoot(root));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testWhisperXVenvPath() {
|
||||||
|
final var root = Paths.get("root");
|
||||||
|
assertEquals(root.resolve("whisperx-env"), WhisperXSetupModule.providesWhisperXVenvPath(root));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
package com.github.gtache.autosubtitle.modules.subtitle.parser.json.whisperx;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
|
||||||
|
|
||||||
|
class TestWhisperXJsonModule {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testGson() {
|
||||||
|
assertInstanceOf(Gson.class, WhisperXJsonModule.providesGson());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,123 @@
|
|||||||
|
package com.github.gtache.autosubtitle.setup.whisperx;
|
||||||
|
|
||||||
|
import com.github.gtache.autosubtitle.impl.OS;
|
||||||
|
import com.github.gtache.autosubtitle.process.ProcessResult;
|
||||||
|
import com.github.gtache.autosubtitle.process.ProcessRunner;
|
||||||
|
import com.github.gtache.autosubtitle.setup.SetupException;
|
||||||
|
import com.github.gtache.autosubtitle.setup.conda.CondaSetupManager;
|
||||||
|
import com.github.gtache.autosubtitle.setup.whisper.WhisperSetupConfiguration;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.http.HttpClient;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
class TestWhisperXSetupManager {
|
||||||
|
|
||||||
|
private final CondaSetupManager condaSetupManager;
|
||||||
|
private final ProcessRunner processRunner;
|
||||||
|
private final WhisperSetupConfiguration configuration;
|
||||||
|
private final HttpClient httpClient;
|
||||||
|
private final WhisperXSetupManager whisperXSetupManager;
|
||||||
|
|
||||||
|
TestWhisperXSetupManager(@Mock final CondaSetupManager condaSetupManager, @Mock final WhisperSetupConfiguration configuration,
|
||||||
|
@Mock final ProcessRunner processRunner, @Mock final HttpClient httpClient) {
|
||||||
|
this.condaSetupManager = Objects.requireNonNull(condaSetupManager);
|
||||||
|
this.processRunner = Objects.requireNonNull(processRunner);
|
||||||
|
this.configuration = Objects.requireNonNull(configuration);
|
||||||
|
this.httpClient = Objects.requireNonNull(httpClient);
|
||||||
|
this.whisperXSetupManager = new WhisperXSetupManager(condaSetupManager, configuration, processRunner, httpClient);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testName() {
|
||||||
|
assertEquals("WhisperX", whisperXSetupManager.name());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testInstallWhisperException(@TempDir final Path path) throws IOException {
|
||||||
|
when(configuration.venvPath()).thenReturn(path);
|
||||||
|
final var python = path.resolve("python");
|
||||||
|
when(processRunner.run(List.of(python.toString(), "-m", "pip", "install", "-U", "git+https://github.com/m-bain/whisperx.git", "numpy<2"), Duration.ofMinutes(15))).thenThrow(IOException.class);
|
||||||
|
assertThrows(SetupException.class, whisperXSetupManager::installWhisper);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testInstallWhisperBadExitCode(@TempDir final Path path) throws IOException {
|
||||||
|
when(configuration.venvPath()).thenReturn(path);
|
||||||
|
final var python = path.resolve("python");
|
||||||
|
final var result = mock(ProcessResult.class);
|
||||||
|
when(result.exitCode()).thenReturn(1);
|
||||||
|
when(processRunner.run(List.of(python.toString(), "-m", "pip", "install", "-U", "git+https://github.com/m-bain/whisperx.git", "numpy<2"), Duration.ofMinutes(15))).thenReturn(result);
|
||||||
|
assertThrows(SetupException.class, whisperXSetupManager::installWhisper);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testInstallWhisper(@TempDir final Path path) throws IOException {
|
||||||
|
when(configuration.venvPath()).thenReturn(path);
|
||||||
|
final var python = path.resolve("python");
|
||||||
|
when(processRunner.run(List.of(python.toString(), "-m", "pip", "install", "-U", "git+https://github.com/m-bain/whisperx.git", "numpy<2"), Duration.ofMinutes(15))).thenReturn(mock(ProcessResult.class));
|
||||||
|
assertDoesNotThrow(whisperXSetupManager::installWhisper);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testIsWhisperInstalledException(@TempDir final Path tempDir) throws IOException {
|
||||||
|
when(configuration.os()).thenReturn(OS.LINUX);
|
||||||
|
when(configuration.venvPath()).thenReturn(tempDir);
|
||||||
|
final var file = tempDir.resolve("python");
|
||||||
|
Files.createFile(file);
|
||||||
|
when(processRunner.run(List.of(file.toString(), "-m", "pip", "show", "whisperx"), Duration.ofSeconds(5))).thenThrow(IOException.class);
|
||||||
|
assertThrows(SetupException.class, whisperXSetupManager::isWhisperInstalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testIsWhisperInstalledTrue(@TempDir final Path tempDir) throws IOException, SetupException {
|
||||||
|
when(configuration.os()).thenReturn(OS.LINUX);
|
||||||
|
when(configuration.venvPath()).thenReturn(tempDir);
|
||||||
|
final var file = tempDir.resolve("python");
|
||||||
|
Files.createFile(file);
|
||||||
|
final var result = mock(ProcessResult.class);
|
||||||
|
when(processRunner.run(List.of(file.toString(), "-m", "pip", "show", "whisperx"), Duration.ofSeconds(5))).thenReturn(result);
|
||||||
|
assertTrue(whisperXSetupManager.isWhisperInstalled());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testIsWhisperInstalledBadExitCode(@TempDir final Path tempDir) throws IOException, SetupException {
|
||||||
|
when(configuration.os()).thenReturn(OS.LINUX);
|
||||||
|
when(configuration.venvPath()).thenReturn(tempDir);
|
||||||
|
final var file = tempDir.resolve("python");
|
||||||
|
Files.createFile(file);
|
||||||
|
final var result = mock(ProcessResult.class);
|
||||||
|
when(result.exitCode()).thenReturn(1);
|
||||||
|
when(processRunner.run(List.of(file.toString(), "-m", "pip", "show", "whisperx"), Duration.ofSeconds(5))).thenReturn(result);
|
||||||
|
assertFalse(whisperXSetupManager.isWhisperInstalled());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testGetWhisperInstalledFileDoesntExist() throws SetupException {
|
||||||
|
when(configuration.venvPath()).thenReturn(Paths.get("path"));
|
||||||
|
assertFalse(whisperXSetupManager.isWhisperInstalled());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testIllegal() {
|
||||||
|
assertThrows(NullPointerException.class, () -> new WhisperXSetupManager(null, configuration, processRunner, httpClient));
|
||||||
|
assertThrows(NullPointerException.class, () -> new WhisperXSetupManager(condaSetupManager, null, processRunner, httpClient));
|
||||||
|
assertThrows(NullPointerException.class, () -> new WhisperXSetupManager(condaSetupManager, configuration, null, httpClient));
|
||||||
|
assertThrows(NullPointerException.class, () -> new WhisperXSetupManager(condaSetupManager, configuration, processRunner, null));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,94 @@
|
|||||||
|
package com.github.gtache.autosubtitle.subtitle.extractor.whisperx;
|
||||||
|
|
||||||
|
import com.github.gtache.autosubtitle.Language;
|
||||||
|
import com.github.gtache.autosubtitle.impl.OS;
|
||||||
|
import com.github.gtache.autosubtitle.process.ProcessRunner;
|
||||||
|
import com.github.gtache.autosubtitle.subtitle.Subtitle;
|
||||||
|
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter;
|
||||||
|
import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverterProvider;
|
||||||
|
import com.github.gtache.autosubtitle.whisper.WhisperModels;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||||
|
import static org.mockito.Mockito.doReturn;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
class TestWhisperXSubtitleExtractor {
|
||||||
|
|
||||||
|
private final Path venvPath;
|
||||||
|
private final SubtitleConverterProvider converterProvider;
|
||||||
|
private final ProcessRunner processRunner;
|
||||||
|
private final OS os;
|
||||||
|
private final WhisperXSubtitleExtractor whisperXSubtitleExtractor;
|
||||||
|
|
||||||
|
TestWhisperXSubtitleExtractor(@Mock final SubtitleConverterProvider converterProvider, @Mock final SubtitleConverter<Subtitle> converter,
|
||||||
|
@Mock final ProcessRunner processRunner) {
|
||||||
|
this.converterProvider = Objects.requireNonNull(converterProvider);
|
||||||
|
doReturn(converter).when(converterProvider).getConverter("json");
|
||||||
|
this.processRunner = Objects.requireNonNull(processRunner);
|
||||||
|
this.venvPath = Paths.get("path");
|
||||||
|
this.os = OS.LINUX;
|
||||||
|
this.whisperXSubtitleExtractor = new WhisperXSubtitleExtractor(venvPath, converterProvider, processRunner, os);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testEN() {
|
||||||
|
final var path = Paths.get("in");
|
||||||
|
final var outputPath = Paths.get("out");
|
||||||
|
final var language = Language.EN;
|
||||||
|
final var model = WhisperModels.MEDIUM;
|
||||||
|
final var expected = List.of(venvPath.resolve("python").toString(),
|
||||||
|
"-m", "whisperx", "--verbose", "False", "--model", "medium.en", "--task", "transcribe",
|
||||||
|
"--output_dir", outputPath.toString(), "--output_format", "json", "--device", "cpu",
|
||||||
|
"--condition_on_previous_text", "True", "--print_progress", "True", "--compute_type",
|
||||||
|
"int8", "--threads", String.valueOf(Runtime.getRuntime().availableProcessors()), "--language",
|
||||||
|
"en", path.toString());
|
||||||
|
assertEquals(expected, whisperXSubtitleExtractor.createArgs(path, language, model, outputPath));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testENLarge() {
|
||||||
|
final var path = Paths.get("in");
|
||||||
|
final var outputPath = Paths.get("out");
|
||||||
|
final var language = Language.EN;
|
||||||
|
final var model = WhisperModels.LARGE;
|
||||||
|
final var expected = List.of(venvPath.resolve("python").toString(),
|
||||||
|
"-m", "whisperx", "--verbose", "False", "--model", "large", "--task", "transcribe",
|
||||||
|
"--output_dir", outputPath.toString(), "--output_format", "json", "--device", "cpu",
|
||||||
|
"--condition_on_previous_text", "True", "--print_progress", "True", "--compute_type",
|
||||||
|
"int8", "--threads", String.valueOf(Runtime.getRuntime().availableProcessors()), "--language",
|
||||||
|
"en", path.toString());
|
||||||
|
assertEquals(expected, whisperXSubtitleExtractor.createArgs(path, language, model, outputPath));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testAuto() {
|
||||||
|
final var path = Paths.get("in");
|
||||||
|
final var outputPath = Paths.get("out");
|
||||||
|
final var language = Language.AUTO;
|
||||||
|
final var model = WhisperModels.SMALL;
|
||||||
|
final var expected = List.of(venvPath.resolve("python").toString(),
|
||||||
|
"-m", "whisperx", "--verbose", "False", "--model", "small", "--task", "transcribe",
|
||||||
|
"--output_dir", outputPath.toString(), "--output_format", "json", "--device", "cpu",
|
||||||
|
"--condition_on_previous_text", "True", "--print_progress", "True", "--compute_type",
|
||||||
|
"int8", "--threads", String.valueOf(Runtime.getRuntime().availableProcessors()), path.toString());
|
||||||
|
assertEquals(expected, whisperXSubtitleExtractor.createArgs(path, language, model, outputPath));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testIllegal() {
|
||||||
|
assertThrows(NullPointerException.class, () -> new WhisperXSubtitleExtractor(null, converterProvider, processRunner, os));
|
||||||
|
assertThrows(NullPointerException.class, () -> new WhisperXSubtitleExtractor(venvPath, null, processRunner, os));
|
||||||
|
assertThrows(NullPointerException.class, () -> new WhisperXSubtitleExtractor(venvPath, converterProvider, null, os));
|
||||||
|
assertThrows(NullPointerException.class, () -> new WhisperXSubtitleExtractor(venvPath, converterProvider, processRunner, null));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,87 @@
|
|||||||
|
package com.github.gtache.autosubtitle.subtitle.parser.json.whisperx;
|
||||||
|
|
||||||
|
import com.github.gtache.autosubtitle.Language;
|
||||||
|
import com.github.gtache.autosubtitle.subtitle.Subtitle;
|
||||||
|
import com.github.gtache.autosubtitle.subtitle.converter.ParseException;
|
||||||
|
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleCollectionImpl;
|
||||||
|
import com.github.gtache.autosubtitle.subtitle.impl.SubtitleImpl;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.params.ParameterizedTest;
|
||||||
|
import org.junit.jupiter.params.provider.CsvSource;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.prefs.Preferences;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
class TestJSONSubtitleConverter {
|
||||||
|
|
||||||
|
private final Gson gson;
|
||||||
|
private final Preferences preferences;
|
||||||
|
private final int defaultMaxLineLength;
|
||||||
|
private final int defaultMaxLines;
|
||||||
|
private final JSONSubtitleConverter converter;
|
||||||
|
|
||||||
|
TestJSONSubtitleConverter() {
|
||||||
|
this.gson = new Gson();
|
||||||
|
this.preferences = mock(Preferences.class);
|
||||||
|
this.defaultMaxLineLength = 100;
|
||||||
|
when(preferences.getInt("maxLineLength", defaultMaxLineLength)).thenReturn(defaultMaxLineLength);
|
||||||
|
this.defaultMaxLines = 2;
|
||||||
|
when(preferences.getInt("maxLines", defaultMaxLines)).thenReturn(defaultMaxLines);
|
||||||
|
this.converter = new JSONSubtitleConverter(gson, preferences, defaultMaxLineLength, defaultMaxLines);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testFormatName() {
|
||||||
|
assertEquals("json", converter.formatName());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testParseFormat() throws IOException, ParseException {
|
||||||
|
final var in = new String(getClass().getResourceAsStream("whisperx-in.json").readAllBytes(), StandardCharsets.UTF_8);
|
||||||
|
final var out = new String(getClass().getResourceAsStream("whisperx-out.json").readAllBytes(), StandardCharsets.UTF_8);
|
||||||
|
final var expected = new SubtitleCollectionImpl<Subtitle>("This is a test. Yes.", List.of(new SubtitleImpl("This is a test.", 9, 410, null, null), new SubtitleImpl("Yes.", 450, 6963, null, null)), Language.FR);
|
||||||
|
assertEquals(expected, converter.parse(in));
|
||||||
|
assertEquals(out, converter.format(expected, null));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testParseOverMaxWords() throws IOException, ParseException {
|
||||||
|
final var in = new String(getClass().getResourceAsStream("whisperx-max-words.json").readAllBytes(), StandardCharsets.UTF_8);
|
||||||
|
final var expected = new SubtitleCollectionImpl<Subtitle>("aaaaaaaaaa bbbbbbbbbb cccccccccc dddddddddd eeeeeeeeee ffffffffff gggggggggg hhhhhhhhhh iiiiiiiiii\njjjjjjjjjj kkkkkkkkkk llllllllll mmmmmmmmmm nnnnnnnnnn oooooooooo pppppppppp qqqqqqqqqq rrrrrrrrrr ssssssssss tttttttttt uuuuuuuuuu vvvvvvvvvv wwwwwwwwww xxxxxxxxxx yyyyyyyyyy zzzzzzzzzz Yes.",
|
||||||
|
List.of(new SubtitleImpl("aaaaaaaaaa bbbbbbbbbb cccccccccc dddddddddd eeeeeeeeee ffffffffff gggggggggg hhhhhhhhhh iiiiiiiiii\njjjjjjjjjj kkkkkkkkkk llllllllll mmmmmmmmmm nnnnnnnnnn oooooooooo pppppppppp qqqqqqqqqq rrrrrrrrrr", 0, 18000, null, null),
|
||||||
|
new SubtitleImpl("ssssssssss tttttttttt uuuuuuuuuu vvvvvvvvvv wwwwwwwwww xxxxxxxxxx yyyyyyyyyy zzzzzzzzzz", 18000, 26000, null, null),
|
||||||
|
new SubtitleImpl("Yes.", 30000, 31000, null, null)), Language.EN);
|
||||||
|
assertEquals(expected, converter.parse(in));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testParseOverMaxLines() throws IOException, ParseException {
|
||||||
|
final var in = new String(getClass().getResourceAsStream("whisperx-max-lines.json").readAllBytes(), StandardCharsets.UTF_8);
|
||||||
|
final var expected = new SubtitleCollectionImpl<Subtitle>("aaaaaaaaaa bbbbbbbbbb cccccccccc dddddddddd eeeeeeeeee ffffffffff gggggggggg hhhhhhhhhh iiiiiiiiii\njjjjjjjjjj kkkkkkkkkk llllllllll Yes.",
|
||||||
|
List.of(new SubtitleImpl("aaaaaaaaaa bbbbbbbbbb cccccccccc dddddddddd eeeeeeeeee ffffffffff gggggggggg hhhhhhhhhh iiiiiiiiii\njjjjjjjjjj kkkkkkkkkk llllllllll", 0, 18000, null, null),
|
||||||
|
new SubtitleImpl("Yes.", 30000, 31000, null, null)), Language.EN);
|
||||||
|
assertEquals(expected, converter.parse(in));
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@CsvSource({
|
||||||
|
"test.json,true",
|
||||||
|
".json,true",
|
||||||
|
"abcd.json,true",
|
||||||
|
"abcd.json2,false",
|
||||||
|
"abcd.js,false",
|
||||||
|
"abcd.jso,false",
|
||||||
|
"json,false",
|
||||||
|
})
|
||||||
|
void testCanParse(final String name, final boolean expected) {
|
||||||
|
assertEquals(expected, converter.canParse(Path.of(name)));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
package com.github.gtache.autosubtitle.subtitle.parser.json.whisperx;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
|
||||||
|
class TestJSONSubtitleSegment {
|
||||||
|
@Test
|
||||||
|
void testGetters() {
|
||||||
|
final var start = 0.0;
|
||||||
|
final var end = 1.0;
|
||||||
|
final var text = "test";
|
||||||
|
final var words = List.of(mock(JSONSubtitleWords.class));
|
||||||
|
final var jsonSubtitleSegment = new JSONSubtitleSegment(start, end, text, words);
|
||||||
|
assertEquals(start, jsonSubtitleSegment.start());
|
||||||
|
assertEquals(end, jsonSubtitleSegment.end());
|
||||||
|
assertEquals(text, jsonSubtitleSegment.text());
|
||||||
|
assertEquals(words, jsonSubtitleSegment.words());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
package com.github.gtache.autosubtitle.subtitle.parser.json.whisperx;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
class TestJSONSubtitleWords {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testGetters() {
|
||||||
|
final var word = "test";
|
||||||
|
final var start = 0.0;
|
||||||
|
final var end = 1.0;
|
||||||
|
final var score = 2.0;
|
||||||
|
final var jsonSubtitleWords = new JSONSubtitleWords(word, start, end, score);
|
||||||
|
assertEquals(word, jsonSubtitleWords.word());
|
||||||
|
assertEquals(start, jsonSubtitleWords.start());
|
||||||
|
assertEquals(end, jsonSubtitleWords.end());
|
||||||
|
assertEquals(score, jsonSubtitleWords.score());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
package com.github.gtache.autosubtitle.subtitle.parser.json.whisperx;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
|
||||||
|
class TestJSONSubtitles {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testGetters() {
|
||||||
|
final var segments = List.of(mock(JSONSubtitleSegment.class));
|
||||||
|
final var language = "language";
|
||||||
|
final var jsonSubtitles = new JSONSubtitles(segments, language);
|
||||||
|
assertEquals(segments, jsonSubtitles.segments());
|
||||||
|
assertEquals(language, jsonSubtitles.language());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
package com.github.gtache.autosubtitle.whisperx;
|
||||||
|
|
||||||
|
import com.github.gtache.autosubtitle.whisper.WhisperModels;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
class TestWhisperXExtractionModelProvider {
|
||||||
|
|
||||||
|
private final WhisperXExtractionModelProvider provider;
|
||||||
|
|
||||||
|
TestWhisperXExtractionModelProvider() {
|
||||||
|
this.provider = new WhisperXExtractionModelProvider();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testGetAvailableExtractionModels() {
|
||||||
|
assertEquals(Arrays.asList(WhisperModels.values()), provider.getAvailableExtractionModels());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testGetDefaultExtractionModel() {
|
||||||
|
assertEquals(WhisperModels.LARGE, provider.getDefaultExtractionModel());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testGetExtractionModel() {
|
||||||
|
for (final var value : WhisperModels.values()) {
|
||||||
|
assertEquals(value, provider.getExtractionModel(value.name()));
|
||||||
|
assertEquals(value, provider.getExtractionModel(value.name().toLowerCase()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,87 @@
|
|||||||
|
{
|
||||||
|
"segments": [
|
||||||
|
{
|
||||||
|
"start": 0.009,
|
||||||
|
"end": 0.41,
|
||||||
|
"text": "This is a test.",
|
||||||
|
"words": [
|
||||||
|
{
|
||||||
|
"word": "this",
|
||||||
|
"start": 0.009,
|
||||||
|
"end": 0.089,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "is",
|
||||||
|
"start": 0.109,
|
||||||
|
"end": 0.249,
|
||||||
|
"score": 0.33
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "a",
|
||||||
|
"start": 0.27,
|
||||||
|
"end": 0.31,
|
||||||
|
"score": 0.29
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "test",
|
||||||
|
"start": 0.33,
|
||||||
|
"end": 0.41,
|
||||||
|
"score": 0.002
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": 0.45,
|
||||||
|
"end": 6.963,
|
||||||
|
"text": "Yes.",
|
||||||
|
"words": [
|
||||||
|
{
|
||||||
|
"word": "Yes",
|
||||||
|
"start": 0.45,
|
||||||
|
"end": 6.963,
|
||||||
|
"score": 0.682
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"word_segments": [
|
||||||
|
{
|
||||||
|
"word": "This",
|
||||||
|
"start": 0.009,
|
||||||
|
"end": 0.089,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "is",
|
||||||
|
"start": 0.109,
|
||||||
|
"end": 0.249,
|
||||||
|
"score": 0.33
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "a",
|
||||||
|
"start": 0.27,
|
||||||
|
"end": 0.31,
|
||||||
|
"score": 0.29
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "test",
|
||||||
|
"start": 0.33,
|
||||||
|
"end": 0.41,
|
||||||
|
"score": 0.002
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "Yes",
|
||||||
|
"start": 0.45,
|
||||||
|
"end": 6.963,
|
||||||
|
"score": 0.682
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"language": "fr"
|
||||||
|
}
|
||||||
@@ -0,0 +1,97 @@
|
|||||||
|
{
|
||||||
|
"segments": [
|
||||||
|
{
|
||||||
|
"start": 0.000,
|
||||||
|
"end": 18.0,
|
||||||
|
"text": "aaaaaaaaaa bbbbbbbbbb cccccccccc dddddddddd eeeeeeeeee ffffffffff gggggggggg hhhhhhhhhh iiiiiiiiii jjjjjjjjjj kkkkkkkkkk llllllllll",
|
||||||
|
"words": [
|
||||||
|
{
|
||||||
|
"word": "aaaaaaaaaa",
|
||||||
|
"start": 0.000,
|
||||||
|
"end": 1.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "bbbbbbbbbb",
|
||||||
|
"start": 1.000,
|
||||||
|
"end": 2.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "cccccccccc",
|
||||||
|
"start": 2.000,
|
||||||
|
"end": 3.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "dddddddddd",
|
||||||
|
"start": 3.000,
|
||||||
|
"end": 4.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "eeeeeeeeee",
|
||||||
|
"start": 4.000,
|
||||||
|
"end": 5.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "ffffffffff",
|
||||||
|
"start": 5.000,
|
||||||
|
"end": 6.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "gggggggggg",
|
||||||
|
"start": 6.000,
|
||||||
|
"end": 7.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "hhhhhhhhhh",
|
||||||
|
"start": 7.000,
|
||||||
|
"end": 8.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "iiiiiiiiii",
|
||||||
|
"start": 8.000,
|
||||||
|
"end": 9.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "jjjjjjjjjj",
|
||||||
|
"start": 9.000,
|
||||||
|
"end": 10.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "kkkkkkkkkk",
|
||||||
|
"start": 10.000,
|
||||||
|
"end": 11.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "llllllllll",
|
||||||
|
"start": 11.000,
|
||||||
|
"end": 12.0,
|
||||||
|
"score": 0.304
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": 30.00,
|
||||||
|
"end": 31.000,
|
||||||
|
"text": "Yes.",
|
||||||
|
"words": [
|
||||||
|
{
|
||||||
|
"word": "Yes",
|
||||||
|
"start": 30.00,
|
||||||
|
"end": 31.000,
|
||||||
|
"score": 0.682
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"language": "en"
|
||||||
|
}
|
||||||
@@ -0,0 +1,181 @@
|
|||||||
|
{
|
||||||
|
"segments": [
|
||||||
|
{
|
||||||
|
"start": 0.000,
|
||||||
|
"end": 33.0,
|
||||||
|
"text": "aaaaaaaaaa bbbbbbbbbb cccccccccc dddddddddd eeeeeeeeee ffffffffff gggggggggg hhhhhhhhhh iiiiiiiiii jjjjjjjjjj kkkkkkkkkk llllllllll mmmmmmmmmm nnnnnnnnnn oooooooooo pppppppppp qqqqqqqqqq rrrrrrrrrr ssssssssss tttttttttt uuuuuuuuuu vvvvvvvvvv wwwwwwwwww xxxxxxxxxx yyyyyyyyyy zzzzzzzzzz",
|
||||||
|
"words": [
|
||||||
|
{
|
||||||
|
"word": "aaaaaaaaaa",
|
||||||
|
"start": 0.000,
|
||||||
|
"end": 1.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "bbbbbbbbbb",
|
||||||
|
"start": 1.000,
|
||||||
|
"end": 2.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "cccccccccc",
|
||||||
|
"start": 2.000,
|
||||||
|
"end": 3.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "dddddddddd",
|
||||||
|
"start": 3.000,
|
||||||
|
"end": 4.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "eeeeeeeeee",
|
||||||
|
"start": 4.000,
|
||||||
|
"end": 5.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "ffffffffff",
|
||||||
|
"start": 5.000,
|
||||||
|
"end": 6.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "gggggggggg",
|
||||||
|
"start": 6.000,
|
||||||
|
"end": 7.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "hhhhhhhhhh",
|
||||||
|
"start": 7.000,
|
||||||
|
"end": 8.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "iiiiiiiiii",
|
||||||
|
"start": 8.000,
|
||||||
|
"end": 9.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "jjjjjjjjjj",
|
||||||
|
"start": 9.000,
|
||||||
|
"end": 10.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "kkkkkkkkkk",
|
||||||
|
"start": 10.000,
|
||||||
|
"end": 11.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "llllllllll",
|
||||||
|
"start": 11.000,
|
||||||
|
"end": 12.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "mmmmmmmmmm",
|
||||||
|
"start": 12.000,
|
||||||
|
"end": 13.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "nnnnnnnnnn",
|
||||||
|
"start": 13.000,
|
||||||
|
"end": 14.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "oooooooooo",
|
||||||
|
"start": 14.000,
|
||||||
|
"end": 15.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "pppppppppp",
|
||||||
|
"start": 15.000,
|
||||||
|
"end": 16.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "qqqqqqqqqq",
|
||||||
|
"start": 16.000,
|
||||||
|
"end": 17.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "rrrrrrrrrr",
|
||||||
|
"start": 17.000,
|
||||||
|
"end": 18.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "ssssssssss",
|
||||||
|
"start": 18.000,
|
||||||
|
"end": 19.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "tttttttttt",
|
||||||
|
"start": 19.000,
|
||||||
|
"end": 20.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "uuuuuuuuuu",
|
||||||
|
"start": 20.000,
|
||||||
|
"end": 21.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "vvvvvvvvvv",
|
||||||
|
"start": 21.000,
|
||||||
|
"end": 22.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "wwwwwwwwww",
|
||||||
|
"start": 22.000,
|
||||||
|
"end": 23.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "xxxxxxxxxx",
|
||||||
|
"start": 23.000,
|
||||||
|
"end": 24.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "yyyyyyyyyy",
|
||||||
|
"start": 24.000,
|
||||||
|
"end": 25.0,
|
||||||
|
"score": 0.304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"word": "zzzzzzzzzz",
|
||||||
|
"start": 25.000,
|
||||||
|
"end": 26.0,
|
||||||
|
"score": 0.304
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": 30.00,
|
||||||
|
"end": 31.000,
|
||||||
|
"text": "Yes.",
|
||||||
|
"words": [
|
||||||
|
{
|
||||||
|
"word": "Yes",
|
||||||
|
"start": 30.00,
|
||||||
|
"end": 31.000,
|
||||||
|
"score": 0.682
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"language": "en"
|
||||||
|
}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"segments":[{"start":0.009,"end":0.41,"text":"This is a test.","words":[]},{"start":0.45,"end":6.963,"text":"Yes.","words":[]}],"language":"fr"}
|
||||||
Reference in New Issue
Block a user