Initial commit

This commit is contained in:
2025-08-28 22:38:53 +02:00
commit f15208fe6d
232 changed files with 16821 additions and 0 deletions

24
oiseaux-net/pom.xml Normal file
View File

@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ch.gtache.fro</groupId>
<artifactId>fro</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>fro-oiseaux-net</artifactId>
<dependencies>
<dependency>
<groupId>ch.gtache.fro</groupId>
<artifactId>fro-selenium</artifactId>
</dependency>
<dependency>
<groupId>com.google.dagger</groupId>
<artifactId>dagger</artifactId>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,76 @@
package ch.gtache.fro.oiseaux.net;
import ch.gtache.fro.impl.CommonBirds;
import org.jsoup.Jsoup;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.regex.Pattern;
import static ch.gtache.fro.oiseaux.net.Utils.getName;
/**
* Fetches the translations for all bird from oiseaux.net
*/
final class BirdTranslationsFetcher {
private static final Pattern END_COMMA_PATTERN = Pattern.compile("\\s*,\\s*$");
private BirdTranslationsFetcher() {
}
/**
* Main method
*
* @param args command line arguments
*/
public static void main(final String[] args) throws Exception {
final var options = new ChromeOptions();
options.addArguments("--headless=new");
final var driver = new ChromeDriver(options);
for (final var value : CommonBirds.values()) {
final var name = getName(value);
final var url = "https://www.oiseaux.net/oiseaux/" + name + ".html";
driver.get(url);
final var html = (String) driver.executeScript("return document.documentElement.outerHTML");
final var document = Jsoup.parse(html);
final var title = document.select("span[itemprop=headline]");
final var elements = document.select("span.flag");
if (title.isEmpty() || elements.isEmpty()) {
System.out.println(value + " NOT FOUND!");
} else {
createOrAppend("fr", value.name(), title.text());
for (final var element : elements) {
final var clazz = element.attr("class");
final var split = clazz.split(" ");
final var language = Arrays.stream(split).filter(s -> s.startsWith("flag-")).map(s -> s.split("-")[1]).findFirst().orElse(null);
if (language == null) {
System.out.println("Language not found in " + element);
} else {
final var next = element.nextElementSibling();
final var text = END_COMMA_PATTERN.matcher(next.text()).replaceAll("");
createOrAppend(language, value.name(), text);
}
}
}
Thread.sleep(1000);
}
}
private static void createOrAppend(final String language, final String birdName, final String translation) throws IOException {
final var file = Path.of("BirdBundle_" + language + ".properties");
final var toWrite = "bird." + birdName + ".label=" + translation + "\n";
if (Files.exists(file)) {
Files.writeString(file, toWrite, StandardCharsets.UTF_8, StandardOpenOption.APPEND);
} else {
Files.writeString(file, toWrite, StandardCharsets.UTF_8, StandardOpenOption.CREATE);
}
}
}

View File

@@ -0,0 +1,217 @@
package ch.gtache.fro.oiseaux.net;
import ch.gtache.fro.Bird;
import ch.gtache.fro.BirdProvider;
import ch.gtache.fro.Configuration;
import ch.gtache.fro.FetchException;
import ch.gtache.fro.Fetcher;
import ch.gtache.fro.PictureType;
import ch.gtache.fro.SoundType;
import ch.gtache.fro.impl.CommonBirds;
import ch.gtache.fro.selenium.AbstractSeleniumFetcher;
import jakarta.inject.Inject;
import jakarta.inject.Singleton;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;
/**
* Implementation of {@link Fetcher} for oiseaux.net
*/
@Singleton
public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
private static final Logger logger = LogManager.getLogger(OiseauxNetFetcher.class);
private static final Pattern SOUND_PATTERN = Pattern.compile("https://www.xeno-canto.org/(?<id>\\d+)/download");
/**
* Instantiates the fetcher
*
* @param birdProvider The bird provider
* @param configuration The configuration
* @throws NullPointerException If any parameter is null
*/
@Inject
OiseauxNetFetcher(final BirdProvider birdProvider, final Configuration configuration) {
super(birdProvider, configuration);
}
@Override
protected void waitFor() throws IOException {
try {
Thread.sleep(500L);
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException(e);
}
}
@Override
public String name() {
return "oiseaux.net";
}
@Override
protected void download(final Bird bird) throws FetchException {
if (bird != CommonBirds.MESANGE_ALPESTRE && bird != CommonBirds.MESANGE_DES_SAULES) {
downloadImages(bird);
downloadSounds(bird);
}
}
private void downloadImages(final Bird bird) throws FetchException {
final var url = getImagesURL(bird);
logger.info("Trying to download {}", url);
try {
final var document = getDocument(url);
final var figures = document.select("figure");
for (final var figure : figures) {
handleFigure(bird, url, figure);
}
} catch (final IOException e) {
throw new FetchException("Failed to parse and download " + url, e);
}
}
private void downloadSounds(final Bird bird) throws FetchException {
try {
final var jsUrl = getJSUrl(bird);
final var content = Jsoup.connect(jsUrl).ignoreContentType(true).execute().body();
final var url = getURL(bird);
final var pageContent = getDocument(url);
final var soundTypes = getSoundTypes(pageContent);
final var matcher = SOUND_PATTERN.matcher(content);
var i = 0;
while (matcher.find()) {
final var soundUrl = matcher.group();
final var soundType = i >= soundTypes.size() ? SoundType.UNKNOWN : soundTypes.get(i);
try {
downloadSound(bird, soundUrl, soundType);
} catch (final HttpStatusException e) {
if (e.getStatusCode() == 404) {
logger.warn("Sound {} not found", soundUrl);
} else {
throw e;
}
}
i++;
}
if (i != soundTypes.size()) {
logger.warn("Number of sounds ({}) does not match number of sound types ({})", i, soundTypes.size());
}
} catch (final HttpStatusException e) {
if (e.getStatusCode() == 404) {
logger.warn("Sound for bird {} not found", bird);
} else {
throw new FetchException(e);
}
} catch (final IOException e) {
throw new FetchException("Failed to parse and download " + getJSUrl(bird), e);
}
}
@Override
protected String getSoundFilename(final String url) {
final var matcher = SOUND_PATTERN.matcher(url);
if (matcher.find()) {
return matcher.group("id") + ".mp3";
}
return super.getSoundFilename(url);
}
private static List<SoundType> getSoundTypes(final Document pageContent) {
final var sounds = pageContent.select("a[class=jp-playlist-item],a[class=jp-playlist-item jp-playlist-current]");
final var soundTypes = new ArrayList<SoundType>();
for (final var sound : sounds) {
if (!sound.text().isBlank()) {
final var soundType = getSoundType(sound.text());
soundTypes.add(soundType);
}
}
return soundTypes;
}
private static String getURL(final Bird bird) {
final var name = Utils.getName(bird);
return "https://www.oiseaux.net/oiseaux/" + name + ".html";
}
private static String getJSUrl(final Bird bird) {
final var name = Utils.getName(bird);
return "https://www.oiseaux.net/front/js/espece/" + name + ".js";
}
private void handleFigure(final Bird bird, final String url, final Element figure) throws IOException {
final var link = figure.selectFirst("a");
if (link != null) {
final var img = link.selectFirst("img");
if (img != null) {
final var type = getPictureType(figure);
downloadImage(bird, url, img, type);
}
}
}
private void downloadImage(final Bird bird, final String url, final Element img, final PictureType pictureType) throws IOException {
final var src = img.attr("data-src");
final var lastSlash = url.lastIndexOf('/');
final var imageUrl = url.substring(0, lastSlash) + addLeadingSlash(src).replace("/id/", "/");
downloadImage(bird, imageUrl, pictureType);
}
private static String addLeadingSlash(final String src) {
if (src.startsWith("/")) {
return src;
} else {
return "/" + src;
}
}
private static SoundType getSoundType(final String name) {
return switch (name.trim().toLowerCase(Locale.ROOT)) {
case "♫ chant" -> SoundType.SONG;
case "♫ cri" -> SoundType.CALL;
case "♫ cri en vol" -> SoundType.FLY_CALL;
default -> SoundType.UNKNOWN;
};
}
private static PictureType getPictureType(final Element figure) {
final var caption = figure.nextElementSibling();
if (caption != null && caption.tagName().equals("figcaption")) {
final var typeStr = caption.text();
return getPictureType(typeStr);
} else {
return PictureType.UNKNOWN;
}
}
private static String getImagesURL(final Bird bird) {
final var name = Utils.getName(bird);
return "https://www.oiseaux.net/oiseaux/photos/" + name + ".html";
}
private static PictureType getPictureType(final String name) {
return switch (name.toLowerCase(Locale.ROOT)) {
case "adulte" -> PictureType.ADULT;
case "adulte plum. nuptial" -> PictureType.ADULT_NUPTIAL;
case "adulte plum. internuptial" -> PictureType.ADULT_INTERNUPTIAL;
case "juvénile", "poussin", "subadulte", "1ère année", "immature" -> PictureType.JUVENILE;
case "♂ adulte plum. nuptial" -> PictureType.ADULT_NUPTIAL_MALE;
case "♂ adulte plum. internuptial" -> PictureType.ADULT_INTERNUPTIAL_MALE;
case "♂ adulte" -> PictureType.ADULT_MALE;
case "♀ adulte" -> PictureType.ADULT_FEMALE;
default -> PictureType.UNKNOWN;
};
}
}

View File

@@ -0,0 +1,46 @@
package ch.gtache.fro.oiseaux.net;
import ch.gtache.fro.Bird;
import java.util.Locale;
final class Utils {
private Utils() {
}
static String getName(final Bird bird) {
return correctName(bird.name().replace("_", ".").toLowerCase(Locale.ROOT));
}
static String correctName(final String name) {
return switch (name) {
case "bec.croise.des.sapins" -> "bec-croise.des.sapins";
case "busard.saint.martin" -> "busard.saint-martin";
case "chevalier.culblanc" -> "chevalier.cul-blanc";
case "circaete.jean.le.blanc" -> "circaete.jean-le-blanc";
case "gallinule.poule.d.eau" -> "gallinule.poule-d.eau";
case "gobemouche.a.demi.collier" -> "gobemouche.a.demi-collier";
case "grand.duc.d.europe" -> "grand-duc.d.europe";
case "grosbec.casse.noyaux" -> "grosbec.casse-noyaux";
case "heron.garde.boeufs" -> "heron.garde-boeufs";
case "hibou.moyen.duc" -> "hibou.moyen-duc";
case "martin.pecheur.d.europe" -> "martin-pecheur.d.europe";
case "mesange.a.longue.queue" -> "orite.a.longue.queue";
//case "mesange.alpestre" -> "mesange.boreale";
//case "mesange.des.saules" -> "mesange.boreale";
case "petit.duc.scops" -> "petit-duc.scops";
case "pie.grieche.a.poitrine.rose" -> "pie-grieche.a.poitrine.rose";
case "pie.grieche.a.tete.rousse" -> "pie-grieche.a.tete.rousse";
case "pie.grieche.ecorcheur" -> "pie-grieche.ecorcheur";
case "pie.grieche.grise" -> "pie-grieche.grise";
case "pie.grieche.isabelle" -> "pie-grieche.isabelle";
case "pigeon.biset.domestique" -> "pigeon.biset";
case "puffin.cendre" -> "puffin.boreal";
case "roitelet.a.triple.bandeau" -> "roitelet.triple-bandeau";
case "vautour.percnoptere" -> "percnoptere.d.egypte";
default -> name;
};
}
}

View File

@@ -0,0 +1,13 @@
/**
* Module for oiseaux.net fetcher
*/
module ch.gtache.fro.oiseaux.net {
requires transitive ch.gtache.fro.selenium;
requires org.seleniumhq.selenium.chrome_driver;
requires org.jsoup;
requires jakarta.inject;
requires java.compiler;
requires org.apache.logging.log4j;
exports ch.gtache.fro.oiseaux.net;
}

View File

@@ -0,0 +1,18 @@
package ch.gtache.fro.oiseaux.net;
import ch.gtache.fro.FetchException;
import ch.gtache.fro.impl.CommonBirdsProvider;
import ch.gtache.fro.impl.ConfigurationImpl;
import static org.mockito.Mockito.*;
class TestOiseauxNetFetcher {
public static void main(final String[] args) throws FetchException {
final var provider = spy(CommonBirdsProvider.class);
final var configuration = mock(ConfigurationImpl.class, withSettings().useConstructor(provider).defaultAnswer(CALLS_REAL_METHODS));
try (final var fetcher = new OiseauxNetFetcher(provider, configuration)) {
System.out.println(fetcher.fetchAll());
}
}
}