Exact picture seems to work

This commit is contained in:
2025-09-02 21:53:03 +02:00
parent f15208fe6d
commit b2571c191f
137 changed files with 2487 additions and 797 deletions

View File

@@ -0,0 +1,16 @@
package ch.gtache.fro.modules.oiseaux.net;
import jakarta.inject.Qualifier;
import java.lang.annotation.Documented;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
/**
* Qualifier for the oiseaux.net fetcher
*/
@Qualifier
@Documented
@Retention(RetentionPolicy.RUNTIME)
public @interface OiseauxNet {
}

View File

@@ -0,0 +1,33 @@
package ch.gtache.fro.modules.oiseaux.net;
import ch.gtache.fro.Fetcher;
import ch.gtache.fro.FetcherConfiguration;
import ch.gtache.fro.impl.FetcherConfigurationImpl;
import ch.gtache.fro.oiseaux.net.OiseauxNetFetcher;
import dagger.Binds;
import dagger.Module;
import dagger.Provides;
import dagger.multibindings.IntoSet;
import jakarta.inject.Singleton;
/**
* Dagger module for the oiseaux.net fetcher
*/
@Module
public abstract class OiseauxNetModule {
private OiseauxNetModule() {
//Empty constructor
}
@Binds
@IntoSet
abstract Fetcher bindsFetcher(OiseauxNetFetcher fetcher);
@Provides
@OiseauxNet
@Singleton
static FetcherConfiguration providesFetcherConfiguration() {
return new FetcherConfigurationImpl("oiseaux.net");
}
}

View File

@@ -37,30 +37,32 @@ final class BirdTranslationsFetcher {
final var driver = new ChromeDriver(options);
for (final var value : CommonBirds.values()) {
final var name = getName(value);
final var url = "https://www.oiseaux.net/oiseaux/" + name + ".html";
driver.get(url);
final var html = (String) driver.executeScript("return document.documentElement.outerHTML");
final var document = Jsoup.parse(html);
final var title = document.select("span[itemprop=headline]");
final var elements = document.select("span.flag");
if (title.isEmpty() || elements.isEmpty()) {
System.out.println(value + " NOT FOUND!");
} else {
createOrAppend("fr", value.name(), title.text());
for (final var element : elements) {
final var clazz = element.attr("class");
final var split = clazz.split(" ");
final var language = Arrays.stream(split).filter(s -> s.startsWith("flag-")).map(s -> s.split("-")[1]).findFirst().orElse(null);
if (language == null) {
System.out.println("Language not found in " + element);
} else {
final var next = element.nextElementSibling();
final var text = END_COMMA_PATTERN.matcher(next.text()).replaceAll("");
createOrAppend(language, value.name(), text);
if (!name.isBlank()) {
final var url = "https://www.oiseaux.net/oiseaux/" + name + ".html";
driver.get(url);
final var html = (String) driver.executeScript("return document.documentElement.outerHTML");
final var document = Jsoup.parse(html);
final var title = document.select("span[itemprop=headline]");
final var elements = document.select("span.flag");
if (title.isEmpty() || elements.isEmpty()) {
System.out.println(value + " NOT FOUND!");
} else {
createOrAppend("fr", value.name(), title.text());
for (final var element : elements) {
final var clazz = element.attr("class");
final var split = clazz.split(" ");
final var language = Arrays.stream(split).filter(s -> s.startsWith("flag-")).map(s -> s.split("-")[1]).findFirst().orElse(null);
if (language == null) {
System.out.println("Language not found in " + element);
} else {
final var next = element.nextElementSibling();
final var text = END_COMMA_PATTERN.matcher(next.text()).replaceAll("");
createOrAppend(language, value.name(), text);
}
}
}
Thread.sleep(1000);
}
Thread.sleep(1000);
}
}

View File

@@ -2,12 +2,13 @@ package ch.gtache.fro.oiseaux.net;
import ch.gtache.fro.Bird;
import ch.gtache.fro.BirdProvider;
import ch.gtache.fro.Configuration;
import ch.gtache.fro.FetchException;
import ch.gtache.fro.Fetcher;
import ch.gtache.fro.FetcherConfiguration;
import ch.gtache.fro.PictureType;
import ch.gtache.fro.SoundType;
import ch.gtache.fro.impl.CommonBirds;
import ch.gtache.fro.modules.oiseaux.net.OiseauxNet;
import ch.gtache.fro.selenium.AbstractSeleniumFetcher;
import jakarta.inject.Inject;
import jakarta.inject.Singleton;
@@ -31,7 +32,7 @@ import java.util.regex.Pattern;
public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
private static final Logger logger = LogManager.getLogger(OiseauxNetFetcher.class);
private static final Pattern SOUND_PATTERN = Pattern.compile("https://www.xeno-canto.org/(?<id>\\d+)/download");
private static final Pattern SOUND_PATTERN = Pattern.compile("https://www\\.xeno-canto\\.org/(?<id>\\d+)/download");
/**
@@ -42,7 +43,7 @@ public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
* @throws NullPointerException If any parameter is null
*/
@Inject
OiseauxNetFetcher(final BirdProvider birdProvider, final Configuration configuration) {
OiseauxNetFetcher(final BirdProvider birdProvider, @OiseauxNet final FetcherConfiguration configuration) {
super(birdProvider, configuration);
}
@@ -71,43 +72,47 @@ public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
private void downloadImages(final Bird bird) throws FetchException {
final var url = getImagesURL(bird);
logger.info("Trying to download {}", url);
try {
final var document = getDocument(url);
final var figures = document.select("figure");
for (final var figure : figures) {
handleFigure(bird, url, figure);
if (!url.isBlank()) {
logger.info("Trying to download {}", url);
try {
final var document = getDocument(url);
final var figures = document.select("figure");
for (final var figure : figures) {
handleFigure(bird, url, figure);
}
} catch (final IOException e) {
throw new FetchException("Failed to parse and download " + url, e);
}
} catch (final IOException e) {
throw new FetchException("Failed to parse and download " + url, e);
}
}
private void downloadSounds(final Bird bird) throws FetchException {
final var jsUrl = getJSUrl(bird);
try {
final var jsUrl = getJSUrl(bird);
final var content = Jsoup.connect(jsUrl).ignoreContentType(true).execute().body();
final var url = getURL(bird);
final var pageContent = getDocument(url);
final var soundTypes = getSoundTypes(pageContent);
final var matcher = SOUND_PATTERN.matcher(content);
var i = 0;
while (matcher.find()) {
final var soundUrl = matcher.group();
final var soundType = i >= soundTypes.size() ? SoundType.UNKNOWN : soundTypes.get(i);
try {
downloadSound(bird, soundUrl, soundType);
} catch (final HttpStatusException e) {
if (e.getStatusCode() == 404) {
logger.warn("Sound {} not found", soundUrl);
} else {
throw e;
if (!jsUrl.isBlank()) {
final var content = Jsoup.connect(jsUrl).ignoreContentType(true).execute().body();
final var url = getURL(bird);
final var pageContent = getDocument(url);
final var soundTypes = getSoundTypes(pageContent);
final var matcher = SOUND_PATTERN.matcher(content);
var i = 0;
while (matcher.find()) {
final var soundUrl = matcher.group();
final var soundType = i >= soundTypes.size() ? SoundType.UNKNOWN : soundTypes.get(i);
try {
downloadSound(bird, soundUrl, soundType);
} catch (final HttpStatusException e) {
if (e.getStatusCode() == 404) {
logger.warn("Sound {} not found", soundUrl);
} else {
throw e;
}
}
i++;
}
if (i != soundTypes.size()) {
logger.warn("Number of sounds ({}) does not match number of sound types ({})", i, soundTypes.size());
}
i++;
}
if (i != soundTypes.size()) {
logger.warn("Number of sounds ({}) does not match number of sound types ({})", i, soundTypes.size());
}
} catch (final HttpStatusException e) {
if (e.getStatusCode() == 404) {
@@ -116,7 +121,7 @@ public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
throw new FetchException(e);
}
} catch (final IOException e) {
throw new FetchException("Failed to parse and download " + getJSUrl(bird), e);
throw new FetchException("Failed to parse and download " + jsUrl, e);
}
}
@@ -143,12 +148,12 @@ public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
private static String getURL(final Bird bird) {
final var name = Utils.getName(bird);
return "https://www.oiseaux.net/oiseaux/" + name + ".html";
return name.isBlank() ? "" : "https://www.oiseaux.net/oiseaux/" + name + ".html";
}
private static String getJSUrl(final Bird bird) {
final var name = Utils.getName(bird);
return "https://www.oiseaux.net/front/js/espece/" + name + ".js";
return name.isBlank() ? "" : "https://www.oiseaux.net/front/js/espece/" + name + ".js";
}
private void handleFigure(final Bird bird, final String url, final Element figure) throws IOException {
@@ -198,7 +203,7 @@ public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
private static String getImagesURL(final Bird bird) {
final var name = Utils.getName(bird);
return "https://www.oiseaux.net/oiseaux/photos/" + name + ".html";
return name.isBlank() ? "" : "https://www.oiseaux.net/oiseaux/photos/" + name + ".html";
}
private static PictureType getPictureType(final String name) {

View File

@@ -14,7 +14,7 @@ final class Utils {
return correctName(bird.name().replace("_", ".").toLowerCase(Locale.ROOT));
}
static String correctName(final String name) {
private static String correctName(final String name) {
return switch (name) {
case "bec.croise.des.sapins" -> "bec-croise.des.sapins";
case "busard.saint.martin" -> "busard.saint-martin";
@@ -28,8 +28,6 @@ final class Utils {
case "hibou.moyen.duc" -> "hibou.moyen-duc";
case "martin.pecheur.d.europe" -> "martin-pecheur.d.europe";
case "mesange.a.longue.queue" -> "orite.a.longue.queue";
//case "mesange.alpestre" -> "mesange.boreale";
//case "mesange.des.saules" -> "mesange.boreale";
case "petit.duc.scops" -> "petit-duc.scops";
case "pie.grieche.a.poitrine.rose" -> "pie-grieche.a.poitrine.rose";
case "pie.grieche.a.tete.rousse" -> "pie-grieche.a.tete.rousse";
@@ -40,6 +38,7 @@ final class Utils {
case "puffin.cendre" -> "puffin.boreal";
case "roitelet.a.triple.bandeau" -> "roitelet.triple-bandeau";
case "vautour.percnoptere" -> "percnoptere.d.egypte";
case "mesange.alpestre", "mesange.des.saules" -> "";
default -> name;
};
}

View File

@@ -3,11 +3,12 @@
*/
module ch.gtache.fro.oiseaux.net {
requires transitive ch.gtache.fro.selenium;
requires org.seleniumhq.selenium.chrome_driver;
requires org.jsoup;
requires jakarta.inject;
requires java.compiler;
requires transitive jakarta.inject;
requires transitive java.compiler;
requires org.apache.logging.log4j;
requires ch.gtache.fro.api;
requires org.seleniumhq.selenium.chrome_driver;
exports ch.gtache.fro.oiseaux.net;
exports ch.gtache.fro.modules.oiseaux.net;
}

View File

@@ -2,7 +2,7 @@ package ch.gtache.fro.oiseaux.net;
import ch.gtache.fro.FetchException;
import ch.gtache.fro.impl.CommonBirdsProvider;
import ch.gtache.fro.impl.ConfigurationImpl;
import ch.gtache.fro.impl.FetcherConfigurationImpl;
import static org.mockito.Mockito.*;
@@ -10,7 +10,7 @@ class TestOiseauxNetFetcher {
public static void main(final String[] args) throws FetchException {
final var provider = spy(CommonBirdsProvider.class);
final var configuration = mock(ConfigurationImpl.class, withSettings().useConstructor(provider).defaultAnswer(CALLS_REAL_METHODS));
final var configuration = mock(FetcherConfigurationImpl.class, withSettings().useConstructor("oiseaux.net").defaultAnswer(CALLS_REAL_METHODS));
try (final var fetcher = new OiseauxNetFetcher(provider, configuration)) {
System.out.println(fetcher.fetchAll());
}