diff --git a/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/setup/whisperx/WhisperXSetupManager.java b/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/setup/whisperx/WhisperXSetupManager.java index dc7fe21..04f65eb 100644 --- a/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/setup/whisperx/WhisperXSetupManager.java +++ b/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/setup/whisperx/WhisperXSetupManager.java @@ -54,7 +54,7 @@ public class WhisperXSetupManager extends AbstractWhisperSetupManager { @Override protected boolean isWhisperInstalled() throws SetupException { final var path = getPythonPath(); - if (Files.exists(path)) { + if (Files.isRegularFile(path)) { try { final var result = processRunner().run(List.of(path.toString(), "-m", "pip", "show", "whisperx"), Duration.ofSeconds(5)); return result.exitCode() == 0; diff --git a/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/subtitle/extractor/whisperx/WhisperXSubtitleExtractor.java b/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/subtitle/extractor/whisperx/WhisperXSubtitleExtractor.java index 8ed0eff..5b4eed6 100644 --- a/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/subtitle/extractor/whisperx/WhisperXSubtitleExtractor.java +++ b/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/subtitle/extractor/whisperx/WhisperXSubtitleExtractor.java @@ -29,7 +29,7 @@ public class WhisperXSubtitleExtractor extends AbstractWhisperSubtitleExtractor @Override protected List createArgs(final Path path, final Language language, final ExtractionModel model, final Path outputDir) { - final var args = new ArrayList(14); + final var args = new ArrayList(); args.add(getPythonPath().toString()); args.add("-m"); args.add("whisperx"); diff --git a/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/JSONSubtitleConverter.java b/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/JSONSubtitleConverter.java index 7703bcf..505ae5b 100644 --- a/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/JSONSubtitleConverter.java +++ b/whisper/whisperx/src/main/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/JSONSubtitleConverter.java @@ -70,7 +70,7 @@ public class JSONSubtitleConverter implements SubtitleConverter { } }).sorted(Comparator.comparing(Subtitle::start).thenComparing(Subtitle::end)).toList(); final var language = Language.getLanguage(json.language()); - final var subtitlesText = subtitles.stream().map(Subtitle::content).collect(Collectors.joining("")); + final var subtitlesText = subtitles.stream().map(s -> s.content().trim()).collect(Collectors.joining(" ")); return new SubtitleCollectionImpl<>(subtitlesText, subtitles, language); } catch (final Exception e) { throw new ParseException(e); @@ -101,6 +101,8 @@ public class JSONSubtitleConverter implements SubtitleConverter { final var newLength = builder.length() + s.length(); if (areDifferentLines(builder.length(), newLength, maxLineLength)) { builder.append("\n").append(s); + } else if (builder.isEmpty()) { + builder.append(s); } else { builder.append(" ").append(s); } @@ -138,6 +140,8 @@ public class JSONSubtitleConverter implements SubtitleConverter { currentStart = start == 0 ? currentEnd : start; } else if (areDifferentLines(builder.length(), newLength, maxLineLength)) { builder.append("\n").append(text); + } else if (builder.isEmpty()) { + builder.append(text); } else { builder.append(" ").append(text); } diff --git a/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/modules/setup/whisperx/TestWhisperXSetupModule.java b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/modules/setup/whisperx/TestWhisperXSetupModule.java new file mode 100644 index 0000000..a80953a --- /dev/null +++ b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/modules/setup/whisperx/TestWhisperXSetupModule.java @@ -0,0 +1,27 @@ +package com.github.gtache.autosubtitle.modules.setup.whisperx; + +import org.junit.jupiter.api.Test; + +import java.nio.file.Paths; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class TestWhisperXSetupModule { + + @Test + void testPythonVersion() { + assertEquals("3.10", WhisperXSetupModule.providesPythonVersion()); + } + + @Test + void testWHisperXBundledRoot() { + final var root = Paths.get("root"); + assertEquals(root.resolve("whisperx"), WhisperXSetupModule.providesWhisperXBundledRoot(root)); + } + + @Test + void testWhisperXVenvPath() { + final var root = Paths.get("root"); + assertEquals(root.resolve("whisperx-env"), WhisperXSetupModule.providesWhisperXVenvPath(root)); + } +} diff --git a/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/modules/subtitle/parser/json/whisperx/TestWhisperXJsonModule.java b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/modules/subtitle/parser/json/whisperx/TestWhisperXJsonModule.java new file mode 100644 index 0000000..494ccbf --- /dev/null +++ b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/modules/subtitle/parser/json/whisperx/TestWhisperXJsonModule.java @@ -0,0 +1,14 @@ +package com.github.gtache.autosubtitle.modules.subtitle.parser.json.whisperx; + +import com.google.gson.Gson; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertInstanceOf; + +class TestWhisperXJsonModule { + + @Test + void testGson() { + assertInstanceOf(Gson.class, WhisperXJsonModule.providesGson()); + } +} diff --git a/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/setup/whisperx/TestWhisperXSetupManager.java b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/setup/whisperx/TestWhisperXSetupManager.java new file mode 100644 index 0000000..7a9d2aa --- /dev/null +++ b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/setup/whisperx/TestWhisperXSetupManager.java @@ -0,0 +1,123 @@ +package com.github.gtache.autosubtitle.setup.whisperx; + +import com.github.gtache.autosubtitle.impl.OS; +import com.github.gtache.autosubtitle.process.ProcessResult; +import com.github.gtache.autosubtitle.process.ProcessRunner; +import com.github.gtache.autosubtitle.setup.SetupException; +import com.github.gtache.autosubtitle.setup.conda.CondaSetupManager; +import com.github.gtache.autosubtitle.setup.whisper.WhisperSetupConfiguration; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.io.IOException; +import java.net.http.HttpClient; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.List; +import java.util.Objects; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +@ExtendWith(MockitoExtension.class) +class TestWhisperXSetupManager { + + private final CondaSetupManager condaSetupManager; + private final ProcessRunner processRunner; + private final WhisperSetupConfiguration configuration; + private final HttpClient httpClient; + private final WhisperXSetupManager whisperXSetupManager; + + TestWhisperXSetupManager(@Mock final CondaSetupManager condaSetupManager, @Mock final WhisperSetupConfiguration configuration, + @Mock final ProcessRunner processRunner, @Mock final HttpClient httpClient) { + this.condaSetupManager = Objects.requireNonNull(condaSetupManager); + this.processRunner = Objects.requireNonNull(processRunner); + this.configuration = Objects.requireNonNull(configuration); + this.httpClient = Objects.requireNonNull(httpClient); + this.whisperXSetupManager = new WhisperXSetupManager(condaSetupManager, configuration, processRunner, httpClient); + } + + @Test + void testName() { + assertEquals("WhisperX", whisperXSetupManager.name()); + } + + @Test + void testInstallWhisperException(@TempDir final Path path) throws IOException { + when(configuration.venvPath()).thenReturn(path); + final var python = path.resolve("python"); + when(processRunner.run(List.of(python.toString(), "-m", "pip", "install", "-U", "git+https://github.com/m-bain/whisperx.git", "numpy<2"), Duration.ofMinutes(15))).thenThrow(IOException.class); + assertThrows(SetupException.class, whisperXSetupManager::installWhisper); + } + + @Test + void testInstallWhisperBadExitCode(@TempDir final Path path) throws IOException { + when(configuration.venvPath()).thenReturn(path); + final var python = path.resolve("python"); + final var result = mock(ProcessResult.class); + when(result.exitCode()).thenReturn(1); + when(processRunner.run(List.of(python.toString(), "-m", "pip", "install", "-U", "git+https://github.com/m-bain/whisperx.git", "numpy<2"), Duration.ofMinutes(15))).thenReturn(result); + assertThrows(SetupException.class, whisperXSetupManager::installWhisper); + } + + @Test + void testInstallWhisper(@TempDir final Path path) throws IOException { + when(configuration.venvPath()).thenReturn(path); + final var python = path.resolve("python"); + when(processRunner.run(List.of(python.toString(), "-m", "pip", "install", "-U", "git+https://github.com/m-bain/whisperx.git", "numpy<2"), Duration.ofMinutes(15))).thenReturn(mock(ProcessResult.class)); + assertDoesNotThrow(whisperXSetupManager::installWhisper); + } + + @Test + void testIsWhisperInstalledException(@TempDir final Path tempDir) throws IOException { + when(configuration.os()).thenReturn(OS.LINUX); + when(configuration.venvPath()).thenReturn(tempDir); + final var file = tempDir.resolve("python"); + Files.createFile(file); + when(processRunner.run(List.of(file.toString(), "-m", "pip", "show", "whisperx"), Duration.ofSeconds(5))).thenThrow(IOException.class); + assertThrows(SetupException.class, whisperXSetupManager::isWhisperInstalled); + } + + @Test + void testIsWhisperInstalledTrue(@TempDir final Path tempDir) throws IOException, SetupException { + when(configuration.os()).thenReturn(OS.LINUX); + when(configuration.venvPath()).thenReturn(tempDir); + final var file = tempDir.resolve("python"); + Files.createFile(file); + final var result = mock(ProcessResult.class); + when(processRunner.run(List.of(file.toString(), "-m", "pip", "show", "whisperx"), Duration.ofSeconds(5))).thenReturn(result); + assertTrue(whisperXSetupManager.isWhisperInstalled()); + } + + @Test + void testIsWhisperInstalledBadExitCode(@TempDir final Path tempDir) throws IOException, SetupException { + when(configuration.os()).thenReturn(OS.LINUX); + when(configuration.venvPath()).thenReturn(tempDir); + final var file = tempDir.resolve("python"); + Files.createFile(file); + final var result = mock(ProcessResult.class); + when(result.exitCode()).thenReturn(1); + when(processRunner.run(List.of(file.toString(), "-m", "pip", "show", "whisperx"), Duration.ofSeconds(5))).thenReturn(result); + assertFalse(whisperXSetupManager.isWhisperInstalled()); + } + + @Test + void testGetWhisperInstalledFileDoesntExist() throws SetupException { + when(configuration.venvPath()).thenReturn(Paths.get("path")); + assertFalse(whisperXSetupManager.isWhisperInstalled()); + } + + @Test + void testIllegal() { + assertThrows(NullPointerException.class, () -> new WhisperXSetupManager(null, configuration, processRunner, httpClient)); + assertThrows(NullPointerException.class, () -> new WhisperXSetupManager(condaSetupManager, null, processRunner, httpClient)); + assertThrows(NullPointerException.class, () -> new WhisperXSetupManager(condaSetupManager, configuration, null, httpClient)); + assertThrows(NullPointerException.class, () -> new WhisperXSetupManager(condaSetupManager, configuration, processRunner, null)); + } +} diff --git a/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/extractor/whisperx/TestWhisperXSubtitleExtractor.java b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/extractor/whisperx/TestWhisperXSubtitleExtractor.java new file mode 100644 index 0000000..284b157 --- /dev/null +++ b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/extractor/whisperx/TestWhisperXSubtitleExtractor.java @@ -0,0 +1,94 @@ +package com.github.gtache.autosubtitle.subtitle.extractor.whisperx; + +import com.github.gtache.autosubtitle.Language; +import com.github.gtache.autosubtitle.impl.OS; +import com.github.gtache.autosubtitle.process.ProcessRunner; +import com.github.gtache.autosubtitle.subtitle.Subtitle; +import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverter; +import com.github.gtache.autosubtitle.subtitle.converter.SubtitleConverterProvider; +import com.github.gtache.autosubtitle.whisper.WhisperModels; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.Objects; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.doReturn; + +@ExtendWith(MockitoExtension.class) +class TestWhisperXSubtitleExtractor { + + private final Path venvPath; + private final SubtitleConverterProvider converterProvider; + private final ProcessRunner processRunner; + private final OS os; + private final WhisperXSubtitleExtractor whisperXSubtitleExtractor; + + TestWhisperXSubtitleExtractor(@Mock final SubtitleConverterProvider converterProvider, @Mock final SubtitleConverter converter, + @Mock final ProcessRunner processRunner) { + this.converterProvider = Objects.requireNonNull(converterProvider); + doReturn(converter).when(converterProvider).getConverter("json"); + this.processRunner = Objects.requireNonNull(processRunner); + this.venvPath = Paths.get("path"); + this.os = OS.LINUX; + this.whisperXSubtitleExtractor = new WhisperXSubtitleExtractor(venvPath, converterProvider, processRunner, os); + } + + @Test + void testEN() { + final var path = Paths.get("in"); + final var outputPath = Paths.get("out"); + final var language = Language.EN; + final var model = WhisperModels.MEDIUM; + final var expected = List.of(venvPath.resolve("python").toString(), + "-m", "whisperx", "--verbose", "False", "--model", "medium.en", "--task", "transcribe", + "--output_dir", outputPath.toString(), "--output_format", "json", "--device", "cpu", + "--condition_on_previous_text", "True", "--print_progress", "True", "--compute_type", + "int8", "--threads", String.valueOf(Runtime.getRuntime().availableProcessors()), "--language", + "en", path.toString()); + assertEquals(expected, whisperXSubtitleExtractor.createArgs(path, language, model, outputPath)); + } + + @Test + void testENLarge() { + final var path = Paths.get("in"); + final var outputPath = Paths.get("out"); + final var language = Language.EN; + final var model = WhisperModels.LARGE; + final var expected = List.of(venvPath.resolve("python").toString(), + "-m", "whisperx", "--verbose", "False", "--model", "large", "--task", "transcribe", + "--output_dir", outputPath.toString(), "--output_format", "json", "--device", "cpu", + "--condition_on_previous_text", "True", "--print_progress", "True", "--compute_type", + "int8", "--threads", String.valueOf(Runtime.getRuntime().availableProcessors()), "--language", + "en", path.toString()); + assertEquals(expected, whisperXSubtitleExtractor.createArgs(path, language, model, outputPath)); + } + + @Test + void testAuto() { + final var path = Paths.get("in"); + final var outputPath = Paths.get("out"); + final var language = Language.AUTO; + final var model = WhisperModels.SMALL; + final var expected = List.of(venvPath.resolve("python").toString(), + "-m", "whisperx", "--verbose", "False", "--model", "small", "--task", "transcribe", + "--output_dir", outputPath.toString(), "--output_format", "json", "--device", "cpu", + "--condition_on_previous_text", "True", "--print_progress", "True", "--compute_type", + "int8", "--threads", String.valueOf(Runtime.getRuntime().availableProcessors()), path.toString()); + assertEquals(expected, whisperXSubtitleExtractor.createArgs(path, language, model, outputPath)); + } + + @Test + void testIllegal() { + assertThrows(NullPointerException.class, () -> new WhisperXSubtitleExtractor(null, converterProvider, processRunner, os)); + assertThrows(NullPointerException.class, () -> new WhisperXSubtitleExtractor(venvPath, null, processRunner, os)); + assertThrows(NullPointerException.class, () -> new WhisperXSubtitleExtractor(venvPath, converterProvider, null, os)); + assertThrows(NullPointerException.class, () -> new WhisperXSubtitleExtractor(venvPath, converterProvider, processRunner, null)); + } +} diff --git a/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/TestJSONSubtitleConverter.java b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/TestJSONSubtitleConverter.java new file mode 100644 index 0000000..0ec8d75 --- /dev/null +++ b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/TestJSONSubtitleConverter.java @@ -0,0 +1,87 @@ +package com.github.gtache.autosubtitle.subtitle.parser.json.whisperx; + +import com.github.gtache.autosubtitle.Language; +import com.github.gtache.autosubtitle.subtitle.Subtitle; +import com.github.gtache.autosubtitle.subtitle.converter.ParseException; +import com.github.gtache.autosubtitle.subtitle.impl.SubtitleCollectionImpl; +import com.github.gtache.autosubtitle.subtitle.impl.SubtitleImpl; +import com.google.gson.Gson; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.List; +import java.util.prefs.Preferences; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +class TestJSONSubtitleConverter { + + private final Gson gson; + private final Preferences preferences; + private final int defaultMaxLineLength; + private final int defaultMaxLines; + private final JSONSubtitleConverter converter; + + TestJSONSubtitleConverter() { + this.gson = new Gson(); + this.preferences = mock(Preferences.class); + this.defaultMaxLineLength = 100; + when(preferences.getInt("maxLineLength", defaultMaxLineLength)).thenReturn(defaultMaxLineLength); + this.defaultMaxLines = 2; + when(preferences.getInt("maxLines", defaultMaxLines)).thenReturn(defaultMaxLines); + this.converter = new JSONSubtitleConverter(gson, preferences, defaultMaxLineLength, defaultMaxLines); + } + + @Test + void testFormatName() { + assertEquals("json", converter.formatName()); + } + + @Test + void testParseFormat() throws IOException, ParseException { + final var in = new String(getClass().getResourceAsStream("whisperx-in.json").readAllBytes(), StandardCharsets.UTF_8); + final var out = new String(getClass().getResourceAsStream("whisperx-out.json").readAllBytes(), StandardCharsets.UTF_8); + final var expected = new SubtitleCollectionImpl("This is a test. Yes.", List.of(new SubtitleImpl("This is a test.", 9, 410, null, null), new SubtitleImpl("Yes.", 450, 6963, null, null)), Language.FR); + assertEquals(expected, converter.parse(in)); + assertEquals(out, converter.format(expected, null)); + } + + @Test + void testParseOverMaxWords() throws IOException, ParseException { + final var in = new String(getClass().getResourceAsStream("whisperx-max-words.json").readAllBytes(), StandardCharsets.UTF_8); + final var expected = new SubtitleCollectionImpl("aaaaaaaaaa bbbbbbbbbb cccccccccc dddddddddd eeeeeeeeee ffffffffff gggggggggg hhhhhhhhhh iiiiiiiiii\njjjjjjjjjj kkkkkkkkkk llllllllll mmmmmmmmmm nnnnnnnnnn oooooooooo pppppppppp qqqqqqqqqq rrrrrrrrrr ssssssssss tttttttttt uuuuuuuuuu vvvvvvvvvv wwwwwwwwww xxxxxxxxxx yyyyyyyyyy zzzzzzzzzz Yes.", + List.of(new SubtitleImpl("aaaaaaaaaa bbbbbbbbbb cccccccccc dddddddddd eeeeeeeeee ffffffffff gggggggggg hhhhhhhhhh iiiiiiiiii\njjjjjjjjjj kkkkkkkkkk llllllllll mmmmmmmmmm nnnnnnnnnn oooooooooo pppppppppp qqqqqqqqqq rrrrrrrrrr", 0, 18000, null, null), + new SubtitleImpl("ssssssssss tttttttttt uuuuuuuuuu vvvvvvvvvv wwwwwwwwww xxxxxxxxxx yyyyyyyyyy zzzzzzzzzz", 18000, 26000, null, null), + new SubtitleImpl("Yes.", 30000, 31000, null, null)), Language.EN); + assertEquals(expected, converter.parse(in)); + } + + @Test + void testParseOverMaxLines() throws IOException, ParseException { + final var in = new String(getClass().getResourceAsStream("whisperx-max-lines.json").readAllBytes(), StandardCharsets.UTF_8); + final var expected = new SubtitleCollectionImpl("aaaaaaaaaa bbbbbbbbbb cccccccccc dddddddddd eeeeeeeeee ffffffffff gggggggggg hhhhhhhhhh iiiiiiiiii\njjjjjjjjjj kkkkkkkkkk llllllllll Yes.", + List.of(new SubtitleImpl("aaaaaaaaaa bbbbbbbbbb cccccccccc dddddddddd eeeeeeeeee ffffffffff gggggggggg hhhhhhhhhh iiiiiiiiii\njjjjjjjjjj kkkkkkkkkk llllllllll", 0, 18000, null, null), + new SubtitleImpl("Yes.", 30000, 31000, null, null)), Language.EN); + assertEquals(expected, converter.parse(in)); + } + + @ParameterizedTest + @CsvSource({ + "test.json,true", + ".json,true", + "abcd.json,true", + "abcd.json2,false", + "abcd.js,false", + "abcd.jso,false", + "json,false", + }) + void testCanParse(final String name, final boolean expected) { + assertEquals(expected, converter.canParse(Path.of(name))); + } +} diff --git a/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/TestJSONSubtitleSegment.java b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/TestJSONSubtitleSegment.java new file mode 100644 index 0000000..e16acf2 --- /dev/null +++ b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/TestJSONSubtitleSegment.java @@ -0,0 +1,23 @@ +package com.github.gtache.autosubtitle.subtitle.parser.json.whisperx; + +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; + +class TestJSONSubtitleSegment { + @Test + void testGetters() { + final var start = 0.0; + final var end = 1.0; + final var text = "test"; + final var words = List.of(mock(JSONSubtitleWords.class)); + final var jsonSubtitleSegment = new JSONSubtitleSegment(start, end, text, words); + assertEquals(start, jsonSubtitleSegment.start()); + assertEquals(end, jsonSubtitleSegment.end()); + assertEquals(text, jsonSubtitleSegment.text()); + assertEquals(words, jsonSubtitleSegment.words()); + } +} diff --git a/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/TestJSONSubtitleWords.java b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/TestJSONSubtitleWords.java new file mode 100644 index 0000000..f744e93 --- /dev/null +++ b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/TestJSONSubtitleWords.java @@ -0,0 +1,21 @@ +package com.github.gtache.autosubtitle.subtitle.parser.json.whisperx; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class TestJSONSubtitleWords { + + @Test + void testGetters() { + final var word = "test"; + final var start = 0.0; + final var end = 1.0; + final var score = 2.0; + final var jsonSubtitleWords = new JSONSubtitleWords(word, start, end, score); + assertEquals(word, jsonSubtitleWords.word()); + assertEquals(start, jsonSubtitleWords.start()); + assertEquals(end, jsonSubtitleWords.end()); + assertEquals(score, jsonSubtitleWords.score()); + } +} diff --git a/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/TestJSONSubtitles.java b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/TestJSONSubtitles.java new file mode 100644 index 0000000..4f54076 --- /dev/null +++ b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/TestJSONSubtitles.java @@ -0,0 +1,20 @@ +package com.github.gtache.autosubtitle.subtitle.parser.json.whisperx; + +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; + +class TestJSONSubtitles { + + @Test + void testGetters() { + final var segments = List.of(mock(JSONSubtitleSegment.class)); + final var language = "language"; + final var jsonSubtitles = new JSONSubtitles(segments, language); + assertEquals(segments, jsonSubtitles.segments()); + assertEquals(language, jsonSubtitles.language()); + } +} diff --git a/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/whisperx/TestWhisperXExtractionModelProvider.java b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/whisperx/TestWhisperXExtractionModelProvider.java new file mode 100644 index 0000000..5c8bdff --- /dev/null +++ b/whisper/whisperx/src/test/java/com/github/gtache/autosubtitle/whisperx/TestWhisperXExtractionModelProvider.java @@ -0,0 +1,35 @@ +package com.github.gtache.autosubtitle.whisperx; + +import com.github.gtache.autosubtitle.whisper.WhisperModels; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class TestWhisperXExtractionModelProvider { + + private final WhisperXExtractionModelProvider provider; + + TestWhisperXExtractionModelProvider() { + this.provider = new WhisperXExtractionModelProvider(); + } + + @Test + void testGetAvailableExtractionModels() { + assertEquals(Arrays.asList(WhisperModels.values()), provider.getAvailableExtractionModels()); + } + + @Test + void testGetDefaultExtractionModel() { + assertEquals(WhisperModels.LARGE, provider.getDefaultExtractionModel()); + } + + @Test + void testGetExtractionModel() { + for (final var value : WhisperModels.values()) { + assertEquals(value, provider.getExtractionModel(value.name())); + assertEquals(value, provider.getExtractionModel(value.name().toLowerCase())); + } + } +} diff --git a/whisper/whisperx/src/test/resources/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/whisperx-in.json b/whisper/whisperx/src/test/resources/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/whisperx-in.json new file mode 100644 index 0000000..2d33c40 --- /dev/null +++ b/whisper/whisperx/src/test/resources/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/whisperx-in.json @@ -0,0 +1,87 @@ +{ + "segments": [ + { + "start": 0.009, + "end": 0.41, + "text": "This is a test.", + "words": [ + { + "word": "this", + "start": 0.009, + "end": 0.089, + "score": 0.304 + }, + { + "word": "is", + "start": 0.109, + "end": 0.249, + "score": 0.33 + }, + { + "word": "a", + "start": 0.27, + "end": 0.31, + "score": 0.29 + }, + { + "word": "test", + "start": 0.33, + "end": 0.41, + "score": 0.002 + }, + { + "word": "." + } + ] + }, + { + "start": 0.45, + "end": 6.963, + "text": "Yes.", + "words": [ + { + "word": "Yes", + "start": 0.45, + "end": 6.963, + "score": 0.682 + } + ] + } + ], + "word_segments": [ + { + "word": "This", + "start": 0.009, + "end": 0.089, + "score": 0.304 + }, + { + "word": "is", + "start": 0.109, + "end": 0.249, + "score": 0.33 + }, + { + "word": "a", + "start": 0.27, + "end": 0.31, + "score": 0.29 + }, + { + "word": "test", + "start": 0.33, + "end": 0.41, + "score": 0.002 + }, + { + "word": "." + }, + { + "word": "Yes", + "start": 0.45, + "end": 6.963, + "score": 0.682 + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/whisper/whisperx/src/test/resources/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/whisperx-max-lines.json b/whisper/whisperx/src/test/resources/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/whisperx-max-lines.json new file mode 100644 index 0000000..173d80b --- /dev/null +++ b/whisper/whisperx/src/test/resources/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/whisperx-max-lines.json @@ -0,0 +1,97 @@ +{ + "segments": [ + { + "start": 0.000, + "end": 18.0, + "text": "aaaaaaaaaa bbbbbbbbbb cccccccccc dddddddddd eeeeeeeeee ffffffffff gggggggggg hhhhhhhhhh iiiiiiiiii jjjjjjjjjj kkkkkkkkkk llllllllll", + "words": [ + { + "word": "aaaaaaaaaa", + "start": 0.000, + "end": 1.0, + "score": 0.304 + }, + { + "word": "bbbbbbbbbb", + "start": 1.000, + "end": 2.0, + "score": 0.304 + }, + { + "word": "cccccccccc", + "start": 2.000, + "end": 3.0, + "score": 0.304 + }, + { + "word": "dddddddddd", + "start": 3.000, + "end": 4.0, + "score": 0.304 + }, + { + "word": "eeeeeeeeee", + "start": 4.000, + "end": 5.0, + "score": 0.304 + }, + { + "word": "ffffffffff", + "start": 5.000, + "end": 6.0, + "score": 0.304 + }, + { + "word": "gggggggggg", + "start": 6.000, + "end": 7.0, + "score": 0.304 + }, + { + "word": "hhhhhhhhhh", + "start": 7.000, + "end": 8.0, + "score": 0.304 + }, + { + "word": "iiiiiiiiii", + "start": 8.000, + "end": 9.0, + "score": 0.304 + }, + { + "word": "jjjjjjjjjj", + "start": 9.000, + "end": 10.0, + "score": 0.304 + }, + { + "word": "kkkkkkkkkk", + "start": 10.000, + "end": 11.0, + "score": 0.304 + }, + { + "word": "llllllllll", + "start": 11.000, + "end": 12.0, + "score": 0.304 + } + ] + }, + { + "start": 30.00, + "end": 31.000, + "text": "Yes.", + "words": [ + { + "word": "Yes", + "start": 30.00, + "end": 31.000, + "score": 0.682 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/whisper/whisperx/src/test/resources/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/whisperx-max-words.json b/whisper/whisperx/src/test/resources/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/whisperx-max-words.json new file mode 100644 index 0000000..1b54d64 --- /dev/null +++ b/whisper/whisperx/src/test/resources/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/whisperx-max-words.json @@ -0,0 +1,181 @@ +{ + "segments": [ + { + "start": 0.000, + "end": 33.0, + "text": "aaaaaaaaaa bbbbbbbbbb cccccccccc dddddddddd eeeeeeeeee ffffffffff gggggggggg hhhhhhhhhh iiiiiiiiii jjjjjjjjjj kkkkkkkkkk llllllllll mmmmmmmmmm nnnnnnnnnn oooooooooo pppppppppp qqqqqqqqqq rrrrrrrrrr ssssssssss tttttttttt uuuuuuuuuu vvvvvvvvvv wwwwwwwwww xxxxxxxxxx yyyyyyyyyy zzzzzzzzzz", + "words": [ + { + "word": "aaaaaaaaaa", + "start": 0.000, + "end": 1.0, + "score": 0.304 + }, + { + "word": "bbbbbbbbbb", + "start": 1.000, + "end": 2.0, + "score": 0.304 + }, + { + "word": "cccccccccc", + "start": 2.000, + "end": 3.0, + "score": 0.304 + }, + { + "word": "dddddddddd", + "start": 3.000, + "end": 4.0, + "score": 0.304 + }, + { + "word": "eeeeeeeeee", + "start": 4.000, + "end": 5.0, + "score": 0.304 + }, + { + "word": "ffffffffff", + "start": 5.000, + "end": 6.0, + "score": 0.304 + }, + { + "word": "gggggggggg", + "start": 6.000, + "end": 7.0, + "score": 0.304 + }, + { + "word": "hhhhhhhhhh", + "start": 7.000, + "end": 8.0, + "score": 0.304 + }, + { + "word": "iiiiiiiiii", + "start": 8.000, + "end": 9.0, + "score": 0.304 + }, + { + "word": "jjjjjjjjjj", + "start": 9.000, + "end": 10.0, + "score": 0.304 + }, + { + "word": "kkkkkkkkkk", + "start": 10.000, + "end": 11.0, + "score": 0.304 + }, + { + "word": "llllllllll", + "start": 11.000, + "end": 12.0, + "score": 0.304 + }, + { + "word": "mmmmmmmmmm", + "start": 12.000, + "end": 13.0, + "score": 0.304 + }, + { + "word": "nnnnnnnnnn", + "start": 13.000, + "end": 14.0, + "score": 0.304 + }, + { + "word": "oooooooooo", + "start": 14.000, + "end": 15.0, + "score": 0.304 + }, + { + "word": "pppppppppp", + "start": 15.000, + "end": 16.0, + "score": 0.304 + }, + { + "word": "qqqqqqqqqq", + "start": 16.000, + "end": 17.0, + "score": 0.304 + }, + { + "word": "rrrrrrrrrr", + "start": 17.000, + "end": 18.0, + "score": 0.304 + }, + { + "word": "ssssssssss", + "start": 18.000, + "end": 19.0, + "score": 0.304 + }, + { + "word": "tttttttttt", + "start": 19.000, + "end": 20.0, + "score": 0.304 + }, + { + "word": "uuuuuuuuuu", + "start": 20.000, + "end": 21.0, + "score": 0.304 + }, + { + "word": "vvvvvvvvvv", + "start": 21.000, + "end": 22.0, + "score": 0.304 + }, + { + "word": "wwwwwwwwww", + "start": 22.000, + "end": 23.0, + "score": 0.304 + }, + { + "word": "xxxxxxxxxx", + "start": 23.000, + "end": 24.0, + "score": 0.304 + }, + { + "word": "yyyyyyyyyy", + "start": 24.000, + "end": 25.0, + "score": 0.304 + }, + { + "word": "zzzzzzzzzz", + "start": 25.000, + "end": 26.0, + "score": 0.304 + } + ] + }, + { + "start": 30.00, + "end": 31.000, + "text": "Yes.", + "words": [ + { + "word": "Yes", + "start": 30.00, + "end": 31.000, + "score": 0.682 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/whisper/whisperx/src/test/resources/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/whisperx-out.json b/whisper/whisperx/src/test/resources/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/whisperx-out.json new file mode 100644 index 0000000..854d1c6 --- /dev/null +++ b/whisper/whisperx/src/test/resources/com/github/gtache/autosubtitle/subtitle/parser/json/whisperx/whisperx-out.json @@ -0,0 +1 @@ +{"segments":[{"start":0.009,"end":0.41,"text":"This is a test.","words":[]},{"start":0.45,"end":6.963,"text":"Yes.","words":[]}],"language":"fr"} \ No newline at end of file