Implements database, adds profiles

This commit is contained in:
2025-10-01 22:23:00 +02:00
parent b2571c191f
commit d2da811868
86 changed files with 17323 additions and 483 deletions

34
utils/pom.xml Normal file
View File

@@ -0,0 +1,34 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ch.gtache.fro</groupId>
<artifactId>fro</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>fro-utils</artifactId>
<properties>
<pdfbox.version>3.0.5</pdfbox.version>
<tabula.version>1.0.5</tabula.version>
</properties>
<dependencies>
<dependency>
<groupId>ch.gtache.fro</groupId>
<artifactId>fro-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>
<dependency>
<groupId>technology.tabula</groupId>
<artifactId>tabula</artifactId>
<version>${tabula.version}</version>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,160 @@
package ch.gtache.fro.utils;
import ch.gtache.fro.PictureType;
import ch.gtache.fro.SoundType;
import ch.gtache.fro.impl.CommonBirds;
import ch.gtache.fro.practice.BirdPracticeParameters;
import ch.gtache.fro.practice.impl.BirdPracticeParametersImpl;
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import java.io.IOException;
import java.text.Normalizer;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* Parser of PDF to settings
*/
public class PDFSettingsParser {
private static final Pattern BIRD_PATTERN = Pattern.compile("\\d+ (?<bird>.+?)(?: x)+");
private static final Pattern DIACRITICS_PATTERN = Pattern.compile("\\p{M}");
private static final Set<String> FETCHERS = Set.of("Vogelwarte", "chant-oiseaux.fr", "oiseaux.net");
private static final Set<String> FRO1_SONGS = Set.of("PIGEON_RAMIER", "TOURTERELLE_TURQUE", "COUCOU_GRIS",
"PIC_VERT", "PIC_EPEICHE", "LORIOT_D_EUROPE", "GEAI_DES_CHENES", "MESANGE_NOIRE", "MESANGE_BLEUE",
"MESANGE_CHARBONNIERE", "ALOUETTE_DES_CHAMPS", "HIRONDELLE_RUSTIQUE", "POUILLOT_FITIS", "POUILLOT_VELOCE",
"ROUSSEROLLE_EFFARVATTE", "ROUSSEROLLE_TURDOIDE", "FAUVETTE_A_TETE_NOIRE", "ROITELET_HUPPE", "TROGLODYTE_MIGNON",
"SITTELLE_TORCHEPOT", "GRIMPEREAU_DES_JARDINS", "ETOURNEAU_SANSONNET", "GRIVE_DRAINE", "GRIVE_MUSICIENNE",
"MERLE_NOIR", "ROUGEGORGE_FAMILIER", "ROSSIGNOL_PHILOMELE", "ROUGEQUEUE_NOIR", "BERGERONNETTE_GRISE",
"PINSON_DES_ARBRES", "VERDIER_D_EUROPE", "CHARDONNERET_ELEGANT", "SERIN_CINI", "BRUANT_JAUNE");
private static final Set<PictureType> FRO1_PICTURES = Set.of(PictureType.ADULT, PictureType.ADULT_FEMALE, PictureType.ADULT_MALE,
PictureType.ADULT_INTERNUPTIAL, PictureType.ADULT_INTERNUPTIAL_MALE, PictureType.ADULT_INTERNUPTIAL_FEMALE,
PictureType.ADULT_NUPTIAL, PictureType.ADULT_NUPTIAL_MALE, PictureType.ADULT_NUPTIAL_FEMALE,
PictureType.DARK_MORPH, PictureType.LIGHT_MORPH,
PictureType.UNKNOWN);
private static final Set<SoundType> FRO1_SOUNDS = Set.of(SoundType.SONG, SoundType.UNKNOWN);
private PDFSettingsParser() {
}
private List<BirdPracticeParameters> parseParameters(final String path) throws IOException {
try (final var document = loadDocument(path)) {
final var stripper = new PDFTextStripper();
stripper.setSortByPosition(true);
final var text = stripper.getText(document);
final var matcher = BIRD_PATTERN.matcher(text);
final var parameters = new HashMap<String, BirdPracticeParameters>(CommonBirds.values().length);
for (final var value : CommonBirds.values()) {
parameters.put(value.name(), new BirdPracticeParametersImpl(value, false, Set.of(), Set.of(), Set.of()));
}
while (matcher.find()) {
final var birdName = matcher.group("bird");
final var fixedName = DIACRITICS_PATTERN.matcher(Normalizer.normalize(birdName, Normalizer.Form.NFKD))
.replaceAll("").replace(" ", "_").replace("", "_")
.replace("'", "_").replace("-", "_").toUpperCase(Locale.ROOT)
.replace("ORITE", "MESANGE").replace("LUSCINOIDE", "LUSCINIOIDE")
.replace("GROS_BEC", "GROSBEC");
final var bird = CommonBirds.valueOf(fixedName);
if (FRO1_SONGS.contains(bird.name())) {
parameters.put(bird.name(), new BirdPracticeParametersImpl(bird, true, FETCHERS, FRO1_PICTURES, FRO1_SOUNDS));
} else {
parameters.put(bird.name(), new BirdPracticeParametersImpl(bird, true, FETCHERS, FRO1_PICTURES, Set.of()));
}
}
return parameters.values().stream().sorted(Comparator.comparing(p -> p.bird().name())).toList();
}
}
private static PDDocument loadDocument(final String path) throws IOException {
final var parser = new PDFParser(new RandomAccessReadBufferedFile(path));
return parser.parse();
}
static void main(final String[] args) throws IOException {
final var parser = new PDFSettingsParser();
final var parameters = parser.parseParameters(args[0]);
// System.out.println("INSERT INTO bird_parameters(profile_id, bird_id, enabled) VALUES " + parameters.stream().map(p -> "((SELECT id FROM profile WHERE name='FRO1'), (SELECT id FROM bird WHERE name='" + p.bird() + "'), " + p.enabled() + ")").collect(Collectors.joining(", \n")) + ";");
// System.out.println("INSERT INTO bird_parameters_picture_type(bird_parameters_id, picture_type_id) VALUES " + parameters.stream().filter(p -> p.enabled() && !p.enabledPictureTypes().isEmpty()).flatMap(p -> p.enabledPictureTypes().stream().sorted().map(pt -> "((SELECT id FROM bird_parameters WHERE bird_id=(SELECT id FROM bird WHERE name='" + p.bird() + "') AND profile_id=(SELECT id FROM profile WHERE name='FRO1')), (SELECT id FROM picture_type WHERE name='" + pt.name() + "'))")).collect(Collectors.joining(", \n")) + ";");
// System.out.println("INSERT INTO bird_parameters_sound_type(bird_parameters_id, sound_type_id) VALUES " + parameters.stream().filter(p -> p.enabled() && !p.enabledSoundTypes().isEmpty()).flatMap(p -> p.enabledSoundTypes().stream().sorted().map(st -> "((SELECT id FROM bird_parameters WHERE bird_id=(SELECT id FROM bird WHERE name='" + p.bird() + "') AND profile_id=(SELECT id FROM profile WHERE name='FRO1')), (SELECT id FROM sound_type WHERE name='" + st.name() + "'))")).collect(Collectors.joining(", \n")) + ";");
System.out.println("INSERT INTO bird_parameters_fetcher(bird_parameters_id, fetcher_id) VALUES " + parameters.stream().filter(p -> p.enabled() && !p.enabledFetchers().isEmpty()).flatMap(p -> p.enabledFetchers().stream().sorted().map(f -> "((SELECT id FROM bird_parameters WHERE bird_id=(SELECT id FROM bird WHERE name='" + p.bird() + "') AND profile_id=(SELECT id FROM profile WHERE name='FRO1')), (SELECT id FROM fetcher WHERE name='" + f + "'))")).collect(Collectors.joining(", \n")) + ";");
/* System.out.printf("""
WITH profile_cte AS (SELECT id AS profile_id
FROM profile
WHERE name = 'FRO1'),
bird_cte AS (SELECT name, id AS bird_id
FROM bird)
INSERT
INTO bird_parameters(profile_id, bird_id, enabled)
SELECT p.profile_id, b.bird_id, v.enabled
FROM profile_cte p
JOIN (VALUES %s) AS v(bird_name, enabled) JOIN bird_cte b on B.name = v.bird_name;%n""", parameters.stream().map(s -> "('" + s.bird() + "', " + s.enabled() + ")").collect(Collectors.joining(",\n")));
System.out.printf("""
WITH profile_cte AS (
SELECT id AS profile_id FROM profile WHERE name = 'FRO1'
),
bird_cte AS (
SELECT name AS bird_name, id AS bird_id FROM bird
),
picture_type_cte AS (
SELECT name AS picture_type_name, id AS picture_type_id FROM picture_type
),
bird_parameters_cte AS (
SELECT bp.id AS bird_parameters_id, b.name AS bird_name
FROM bird_parameters bp
JOIN bird b ON bp.bird_id = b.id
WHERE bp.profile_id = (SELECT profile_id FROM profile_cte)
),
pairs AS (
SELECT * FROM (
VALUES
%s
) AS tmp(bird_name, picture_type_name)
)
INSERT INTO bird_parameters_picture_type (bird_parameters_id, picture_type_id)
SELECT
bp.bird_parameters_id,
pt.picture_type_id
FROM pairs p
JOIN bird_parameters_cte bp ON bp.bird_name = p.bird_name
JOIN picture_type_cte pt ON pt.picture_type_name = p.picture_type_name;%n""", parameters.stream().flatMap(s -> s.enabledPictureTypes().stream().map(pt -> "('" + s.bird() + "', '" + pt.name() + "')")).collect(Collectors.joining(",\n")));
System.out.printf("""
WITH profile_cte AS (
SELECT id AS profile_id FROM profile WHERE name = 'FRO1'
),
bird_cte AS (
SELECT name AS bird_name, id AS bird_id FROM bird
),
sound_type_cte AS (
SELECT name AS sound_type_name, id AS sound_type_id FROM sound_type
),
bird_parameters_cte AS (
SELECT bp.id AS bird_parameters_id, b.name AS bird_name
FROM bird_parameters bp
JOIN bird b ON bp.bird_id = b.id
WHERE bp.profile_id = (SELECT profile_id FROM profile_cte)
),
pairs AS (
SELECT * FROM (
VALUES
%s
) AS tmp(bird_name, sound_type_name)
)
INSERT INTO bird_parameters_sound_type (bird_parameters_id, sound_type_id)
SELECT
bp.bird_parameters_id,
st.sound_type_id
FROM pairs p
JOIN bird_parameters_cte bp ON bp.bird_name = p.bird_name
JOIN sound_type_cte st ON st.sound_type_name = p.sound_type_name;%n""", parameters.stream().flatMap(s -> s.enabledSoundTypes().stream().map(st -> "('" + s.bird() + "', '" + st.name() + "')")).collect(Collectors.joining(",\n")));*/
}
}