Exact picture seems to work
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
package ch.gtache.fro.modules.oiseaux.net;
|
||||
|
||||
import jakarta.inject.Qualifier;
|
||||
|
||||
import java.lang.annotation.Documented;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
|
||||
/**
|
||||
* Qualifier for the oiseaux.net fetcher
|
||||
*/
|
||||
@Qualifier
|
||||
@Documented
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
public @interface OiseauxNet {
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
package ch.gtache.fro.modules.oiseaux.net;
|
||||
|
||||
import ch.gtache.fro.Fetcher;
|
||||
import ch.gtache.fro.FetcherConfiguration;
|
||||
import ch.gtache.fro.impl.FetcherConfigurationImpl;
|
||||
import ch.gtache.fro.oiseaux.net.OiseauxNetFetcher;
|
||||
import dagger.Binds;
|
||||
import dagger.Module;
|
||||
import dagger.Provides;
|
||||
import dagger.multibindings.IntoSet;
|
||||
import jakarta.inject.Singleton;
|
||||
|
||||
/**
|
||||
* Dagger module for the oiseaux.net fetcher
|
||||
*/
|
||||
@Module
|
||||
public abstract class OiseauxNetModule {
|
||||
|
||||
private OiseauxNetModule() {
|
||||
//Empty constructor
|
||||
}
|
||||
|
||||
@Binds
|
||||
@IntoSet
|
||||
abstract Fetcher bindsFetcher(OiseauxNetFetcher fetcher);
|
||||
|
||||
@Provides
|
||||
@OiseauxNet
|
||||
@Singleton
|
||||
static FetcherConfiguration providesFetcherConfiguration() {
|
||||
return new FetcherConfigurationImpl("oiseaux.net");
|
||||
}
|
||||
}
|
||||
@@ -37,30 +37,32 @@ final class BirdTranslationsFetcher {
|
||||
final var driver = new ChromeDriver(options);
|
||||
for (final var value : CommonBirds.values()) {
|
||||
final var name = getName(value);
|
||||
final var url = "https://www.oiseaux.net/oiseaux/" + name + ".html";
|
||||
driver.get(url);
|
||||
final var html = (String) driver.executeScript("return document.documentElement.outerHTML");
|
||||
final var document = Jsoup.parse(html);
|
||||
final var title = document.select("span[itemprop=headline]");
|
||||
final var elements = document.select("span.flag");
|
||||
if (title.isEmpty() || elements.isEmpty()) {
|
||||
System.out.println(value + " NOT FOUND!");
|
||||
} else {
|
||||
createOrAppend("fr", value.name(), title.text());
|
||||
for (final var element : elements) {
|
||||
final var clazz = element.attr("class");
|
||||
final var split = clazz.split(" ");
|
||||
final var language = Arrays.stream(split).filter(s -> s.startsWith("flag-")).map(s -> s.split("-")[1]).findFirst().orElse(null);
|
||||
if (language == null) {
|
||||
System.out.println("Language not found in " + element);
|
||||
} else {
|
||||
final var next = element.nextElementSibling();
|
||||
final var text = END_COMMA_PATTERN.matcher(next.text()).replaceAll("");
|
||||
createOrAppend(language, value.name(), text);
|
||||
if (!name.isBlank()) {
|
||||
final var url = "https://www.oiseaux.net/oiseaux/" + name + ".html";
|
||||
driver.get(url);
|
||||
final var html = (String) driver.executeScript("return document.documentElement.outerHTML");
|
||||
final var document = Jsoup.parse(html);
|
||||
final var title = document.select("span[itemprop=headline]");
|
||||
final var elements = document.select("span.flag");
|
||||
if (title.isEmpty() || elements.isEmpty()) {
|
||||
System.out.println(value + " NOT FOUND!");
|
||||
} else {
|
||||
createOrAppend("fr", value.name(), title.text());
|
||||
for (final var element : elements) {
|
||||
final var clazz = element.attr("class");
|
||||
final var split = clazz.split(" ");
|
||||
final var language = Arrays.stream(split).filter(s -> s.startsWith("flag-")).map(s -> s.split("-")[1]).findFirst().orElse(null);
|
||||
if (language == null) {
|
||||
System.out.println("Language not found in " + element);
|
||||
} else {
|
||||
final var next = element.nextElementSibling();
|
||||
final var text = END_COMMA_PATTERN.matcher(next.text()).replaceAll("");
|
||||
createOrAppend(language, value.name(), text);
|
||||
}
|
||||
}
|
||||
}
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,12 +2,13 @@ package ch.gtache.fro.oiseaux.net;
|
||||
|
||||
import ch.gtache.fro.Bird;
|
||||
import ch.gtache.fro.BirdProvider;
|
||||
import ch.gtache.fro.Configuration;
|
||||
import ch.gtache.fro.FetchException;
|
||||
import ch.gtache.fro.Fetcher;
|
||||
import ch.gtache.fro.FetcherConfiguration;
|
||||
import ch.gtache.fro.PictureType;
|
||||
import ch.gtache.fro.SoundType;
|
||||
import ch.gtache.fro.impl.CommonBirds;
|
||||
import ch.gtache.fro.modules.oiseaux.net.OiseauxNet;
|
||||
import ch.gtache.fro.selenium.AbstractSeleniumFetcher;
|
||||
import jakarta.inject.Inject;
|
||||
import jakarta.inject.Singleton;
|
||||
@@ -31,7 +32,7 @@ import java.util.regex.Pattern;
|
||||
public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
|
||||
private static final Logger logger = LogManager.getLogger(OiseauxNetFetcher.class);
|
||||
|
||||
private static final Pattern SOUND_PATTERN = Pattern.compile("https://www.xeno-canto.org/(?<id>\\d+)/download");
|
||||
private static final Pattern SOUND_PATTERN = Pattern.compile("https://www\\.xeno-canto\\.org/(?<id>\\d+)/download");
|
||||
|
||||
|
||||
/**
|
||||
@@ -42,7 +43,7 @@ public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
|
||||
* @throws NullPointerException If any parameter is null
|
||||
*/
|
||||
@Inject
|
||||
OiseauxNetFetcher(final BirdProvider birdProvider, final Configuration configuration) {
|
||||
OiseauxNetFetcher(final BirdProvider birdProvider, @OiseauxNet final FetcherConfiguration configuration) {
|
||||
super(birdProvider, configuration);
|
||||
}
|
||||
|
||||
@@ -71,43 +72,47 @@ public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
|
||||
|
||||
private void downloadImages(final Bird bird) throws FetchException {
|
||||
final var url = getImagesURL(bird);
|
||||
logger.info("Trying to download {}", url);
|
||||
try {
|
||||
final var document = getDocument(url);
|
||||
final var figures = document.select("figure");
|
||||
for (final var figure : figures) {
|
||||
handleFigure(bird, url, figure);
|
||||
if (!url.isBlank()) {
|
||||
logger.info("Trying to download {}", url);
|
||||
try {
|
||||
final var document = getDocument(url);
|
||||
final var figures = document.select("figure");
|
||||
for (final var figure : figures) {
|
||||
handleFigure(bird, url, figure);
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
throw new FetchException("Failed to parse and download " + url, e);
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
throw new FetchException("Failed to parse and download " + url, e);
|
||||
}
|
||||
}
|
||||
|
||||
private void downloadSounds(final Bird bird) throws FetchException {
|
||||
final var jsUrl = getJSUrl(bird);
|
||||
try {
|
||||
final var jsUrl = getJSUrl(bird);
|
||||
final var content = Jsoup.connect(jsUrl).ignoreContentType(true).execute().body();
|
||||
final var url = getURL(bird);
|
||||
final var pageContent = getDocument(url);
|
||||
final var soundTypes = getSoundTypes(pageContent);
|
||||
final var matcher = SOUND_PATTERN.matcher(content);
|
||||
var i = 0;
|
||||
while (matcher.find()) {
|
||||
final var soundUrl = matcher.group();
|
||||
final var soundType = i >= soundTypes.size() ? SoundType.UNKNOWN : soundTypes.get(i);
|
||||
try {
|
||||
downloadSound(bird, soundUrl, soundType);
|
||||
} catch (final HttpStatusException e) {
|
||||
if (e.getStatusCode() == 404) {
|
||||
logger.warn("Sound {} not found", soundUrl);
|
||||
} else {
|
||||
throw e;
|
||||
if (!jsUrl.isBlank()) {
|
||||
final var content = Jsoup.connect(jsUrl).ignoreContentType(true).execute().body();
|
||||
final var url = getURL(bird);
|
||||
final var pageContent = getDocument(url);
|
||||
final var soundTypes = getSoundTypes(pageContent);
|
||||
final var matcher = SOUND_PATTERN.matcher(content);
|
||||
var i = 0;
|
||||
while (matcher.find()) {
|
||||
final var soundUrl = matcher.group();
|
||||
final var soundType = i >= soundTypes.size() ? SoundType.UNKNOWN : soundTypes.get(i);
|
||||
try {
|
||||
downloadSound(bird, soundUrl, soundType);
|
||||
} catch (final HttpStatusException e) {
|
||||
if (e.getStatusCode() == 404) {
|
||||
logger.warn("Sound {} not found", soundUrl);
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
if (i != soundTypes.size()) {
|
||||
logger.warn("Number of sounds ({}) does not match number of sound types ({})", i, soundTypes.size());
|
||||
}
|
||||
i++;
|
||||
}
|
||||
if (i != soundTypes.size()) {
|
||||
logger.warn("Number of sounds ({}) does not match number of sound types ({})", i, soundTypes.size());
|
||||
}
|
||||
} catch (final HttpStatusException e) {
|
||||
if (e.getStatusCode() == 404) {
|
||||
@@ -116,7 +121,7 @@ public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
|
||||
throw new FetchException(e);
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
throw new FetchException("Failed to parse and download " + getJSUrl(bird), e);
|
||||
throw new FetchException("Failed to parse and download " + jsUrl, e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,12 +148,12 @@ public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
|
||||
|
||||
private static String getURL(final Bird bird) {
|
||||
final var name = Utils.getName(bird);
|
||||
return "https://www.oiseaux.net/oiseaux/" + name + ".html";
|
||||
return name.isBlank() ? "" : "https://www.oiseaux.net/oiseaux/" + name + ".html";
|
||||
}
|
||||
|
||||
private static String getJSUrl(final Bird bird) {
|
||||
final var name = Utils.getName(bird);
|
||||
return "https://www.oiseaux.net/front/js/espece/" + name + ".js";
|
||||
return name.isBlank() ? "" : "https://www.oiseaux.net/front/js/espece/" + name + ".js";
|
||||
}
|
||||
|
||||
private void handleFigure(final Bird bird, final String url, final Element figure) throws IOException {
|
||||
@@ -198,7 +203,7 @@ public class OiseauxNetFetcher extends AbstractSeleniumFetcher {
|
||||
|
||||
private static String getImagesURL(final Bird bird) {
|
||||
final var name = Utils.getName(bird);
|
||||
return "https://www.oiseaux.net/oiseaux/photos/" + name + ".html";
|
||||
return name.isBlank() ? "" : "https://www.oiseaux.net/oiseaux/photos/" + name + ".html";
|
||||
}
|
||||
|
||||
private static PictureType getPictureType(final String name) {
|
||||
|
||||
@@ -14,7 +14,7 @@ final class Utils {
|
||||
return correctName(bird.name().replace("_", ".").toLowerCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
static String correctName(final String name) {
|
||||
private static String correctName(final String name) {
|
||||
return switch (name) {
|
||||
case "bec.croise.des.sapins" -> "bec-croise.des.sapins";
|
||||
case "busard.saint.martin" -> "busard.saint-martin";
|
||||
@@ -28,8 +28,6 @@ final class Utils {
|
||||
case "hibou.moyen.duc" -> "hibou.moyen-duc";
|
||||
case "martin.pecheur.d.europe" -> "martin-pecheur.d.europe";
|
||||
case "mesange.a.longue.queue" -> "orite.a.longue.queue";
|
||||
//case "mesange.alpestre" -> "mesange.boreale";
|
||||
//case "mesange.des.saules" -> "mesange.boreale";
|
||||
case "petit.duc.scops" -> "petit-duc.scops";
|
||||
case "pie.grieche.a.poitrine.rose" -> "pie-grieche.a.poitrine.rose";
|
||||
case "pie.grieche.a.tete.rousse" -> "pie-grieche.a.tete.rousse";
|
||||
@@ -40,6 +38,7 @@ final class Utils {
|
||||
case "puffin.cendre" -> "puffin.boreal";
|
||||
case "roitelet.a.triple.bandeau" -> "roitelet.triple-bandeau";
|
||||
case "vautour.percnoptere" -> "percnoptere.d.egypte";
|
||||
case "mesange.alpestre", "mesange.des.saules" -> "";
|
||||
default -> name;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -3,11 +3,12 @@
|
||||
*/
|
||||
module ch.gtache.fro.oiseaux.net {
|
||||
requires transitive ch.gtache.fro.selenium;
|
||||
requires org.seleniumhq.selenium.chrome_driver;
|
||||
requires org.jsoup;
|
||||
requires jakarta.inject;
|
||||
requires java.compiler;
|
||||
requires transitive jakarta.inject;
|
||||
requires transitive java.compiler;
|
||||
requires org.apache.logging.log4j;
|
||||
requires ch.gtache.fro.api;
|
||||
requires org.seleniumhq.selenium.chrome_driver;
|
||||
|
||||
exports ch.gtache.fro.oiseaux.net;
|
||||
exports ch.gtache.fro.modules.oiseaux.net;
|
||||
}
|
||||
@@ -2,7 +2,7 @@ package ch.gtache.fro.oiseaux.net;
|
||||
|
||||
import ch.gtache.fro.FetchException;
|
||||
import ch.gtache.fro.impl.CommonBirdsProvider;
|
||||
import ch.gtache.fro.impl.ConfigurationImpl;
|
||||
import ch.gtache.fro.impl.FetcherConfigurationImpl;
|
||||
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
@@ -10,7 +10,7 @@ class TestOiseauxNetFetcher {
|
||||
|
||||
public static void main(final String[] args) throws FetchException {
|
||||
final var provider = spy(CommonBirdsProvider.class);
|
||||
final var configuration = mock(ConfigurationImpl.class, withSettings().useConstructor(provider).defaultAnswer(CALLS_REAL_METHODS));
|
||||
final var configuration = mock(FetcherConfigurationImpl.class, withSettings().useConstructor("oiseaux.net").defaultAnswer(CALLS_REAL_METHODS));
|
||||
try (final var fetcher = new OiseauxNetFetcher(provider, configuration)) {
|
||||
System.out.println(fetcher.fetchAll());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user