Implements database, adds profiles
This commit is contained in:
34
utils/pom.xml
Normal file
34
utils/pom.xml
Normal file
@@ -0,0 +1,34 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>ch.gtache.fro</groupId>
|
||||
<artifactId>fro</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>fro-utils</artifactId>
|
||||
|
||||
<properties>
|
||||
<pdfbox.version>3.0.5</pdfbox.version>
|
||||
<tabula.version>1.0.5</tabula.version>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>ch.gtache.fro</groupId>
|
||||
<artifactId>fro-core</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
<version>${pdfbox.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>technology.tabula</groupId>
|
||||
<artifactId>tabula</artifactId>
|
||||
<version>${tabula.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
160
utils/src/main/java/ch/gtache/fro/utils/PDFSettingsParser.java
Normal file
160
utils/src/main/java/ch/gtache/fro/utils/PDFSettingsParser.java
Normal file
@@ -0,0 +1,160 @@
|
||||
package ch.gtache.fro.utils;
|
||||
|
||||
import ch.gtache.fro.PictureType;
|
||||
import ch.gtache.fro.SoundType;
|
||||
import ch.gtache.fro.impl.CommonBirds;
|
||||
import ch.gtache.fro.practice.BirdPracticeParameters;
|
||||
import ch.gtache.fro.practice.impl.BirdPracticeParametersImpl;
|
||||
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
|
||||
import org.apache.pdfbox.pdfparser.PDFParser;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Normalizer;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Parser of PDF to settings
|
||||
*/
|
||||
public class PDFSettingsParser {
|
||||
|
||||
private static final Pattern BIRD_PATTERN = Pattern.compile("\\d+ (?<bird>.+?)(?: x)+");
|
||||
private static final Pattern DIACRITICS_PATTERN = Pattern.compile("\\p{M}");
|
||||
private static final Set<String> FETCHERS = Set.of("Vogelwarte", "chant-oiseaux.fr", "oiseaux.net");
|
||||
private static final Set<String> FRO1_SONGS = Set.of("PIGEON_RAMIER", "TOURTERELLE_TURQUE", "COUCOU_GRIS",
|
||||
"PIC_VERT", "PIC_EPEICHE", "LORIOT_D_EUROPE", "GEAI_DES_CHENES", "MESANGE_NOIRE", "MESANGE_BLEUE",
|
||||
"MESANGE_CHARBONNIERE", "ALOUETTE_DES_CHAMPS", "HIRONDELLE_RUSTIQUE", "POUILLOT_FITIS", "POUILLOT_VELOCE",
|
||||
"ROUSSEROLLE_EFFARVATTE", "ROUSSEROLLE_TURDOIDE", "FAUVETTE_A_TETE_NOIRE", "ROITELET_HUPPE", "TROGLODYTE_MIGNON",
|
||||
"SITTELLE_TORCHEPOT", "GRIMPEREAU_DES_JARDINS", "ETOURNEAU_SANSONNET", "GRIVE_DRAINE", "GRIVE_MUSICIENNE",
|
||||
"MERLE_NOIR", "ROUGEGORGE_FAMILIER", "ROSSIGNOL_PHILOMELE", "ROUGEQUEUE_NOIR", "BERGERONNETTE_GRISE",
|
||||
"PINSON_DES_ARBRES", "VERDIER_D_EUROPE", "CHARDONNERET_ELEGANT", "SERIN_CINI", "BRUANT_JAUNE");
|
||||
|
||||
private static final Set<PictureType> FRO1_PICTURES = Set.of(PictureType.ADULT, PictureType.ADULT_FEMALE, PictureType.ADULT_MALE,
|
||||
PictureType.ADULT_INTERNUPTIAL, PictureType.ADULT_INTERNUPTIAL_MALE, PictureType.ADULT_INTERNUPTIAL_FEMALE,
|
||||
PictureType.ADULT_NUPTIAL, PictureType.ADULT_NUPTIAL_MALE, PictureType.ADULT_NUPTIAL_FEMALE,
|
||||
PictureType.DARK_MORPH, PictureType.LIGHT_MORPH,
|
||||
PictureType.UNKNOWN);
|
||||
|
||||
private static final Set<SoundType> FRO1_SOUNDS = Set.of(SoundType.SONG, SoundType.UNKNOWN);
|
||||
|
||||
private PDFSettingsParser() {
|
||||
}
|
||||
|
||||
private List<BirdPracticeParameters> parseParameters(final String path) throws IOException {
|
||||
try (final var document = loadDocument(path)) {
|
||||
final var stripper = new PDFTextStripper();
|
||||
stripper.setSortByPosition(true);
|
||||
final var text = stripper.getText(document);
|
||||
final var matcher = BIRD_PATTERN.matcher(text);
|
||||
final var parameters = new HashMap<String, BirdPracticeParameters>(CommonBirds.values().length);
|
||||
for (final var value : CommonBirds.values()) {
|
||||
parameters.put(value.name(), new BirdPracticeParametersImpl(value, false, Set.of(), Set.of(), Set.of()));
|
||||
}
|
||||
while (matcher.find()) {
|
||||
final var birdName = matcher.group("bird");
|
||||
final var fixedName = DIACRITICS_PATTERN.matcher(Normalizer.normalize(birdName, Normalizer.Form.NFKD))
|
||||
.replaceAll("").replace(" ", "_").replace("’", "_")
|
||||
.replace("'", "_").replace("-", "_").toUpperCase(Locale.ROOT)
|
||||
.replace("ORITE", "MESANGE").replace("LUSCINOIDE", "LUSCINIOIDE")
|
||||
.replace("GROS_BEC", "GROSBEC");
|
||||
final var bird = CommonBirds.valueOf(fixedName);
|
||||
if (FRO1_SONGS.contains(bird.name())) {
|
||||
parameters.put(bird.name(), new BirdPracticeParametersImpl(bird, true, FETCHERS, FRO1_PICTURES, FRO1_SOUNDS));
|
||||
} else {
|
||||
parameters.put(bird.name(), new BirdPracticeParametersImpl(bird, true, FETCHERS, FRO1_PICTURES, Set.of()));
|
||||
}
|
||||
}
|
||||
return parameters.values().stream().sorted(Comparator.comparing(p -> p.bird().name())).toList();
|
||||
}
|
||||
}
|
||||
|
||||
private static PDDocument loadDocument(final String path) throws IOException {
|
||||
final var parser = new PDFParser(new RandomAccessReadBufferedFile(path));
|
||||
return parser.parse();
|
||||
}
|
||||
|
||||
static void main(final String[] args) throws IOException {
|
||||
final var parser = new PDFSettingsParser();
|
||||
final var parameters = parser.parseParameters(args[0]);
|
||||
// System.out.println("INSERT INTO bird_parameters(profile_id, bird_id, enabled) VALUES " + parameters.stream().map(p -> "((SELECT id FROM profile WHERE name='FRO1'), (SELECT id FROM bird WHERE name='" + p.bird() + "'), " + p.enabled() + ")").collect(Collectors.joining(", \n")) + ";");
|
||||
// System.out.println("INSERT INTO bird_parameters_picture_type(bird_parameters_id, picture_type_id) VALUES " + parameters.stream().filter(p -> p.enabled() && !p.enabledPictureTypes().isEmpty()).flatMap(p -> p.enabledPictureTypes().stream().sorted().map(pt -> "((SELECT id FROM bird_parameters WHERE bird_id=(SELECT id FROM bird WHERE name='" + p.bird() + "') AND profile_id=(SELECT id FROM profile WHERE name='FRO1')), (SELECT id FROM picture_type WHERE name='" + pt.name() + "'))")).collect(Collectors.joining(", \n")) + ";");
|
||||
// System.out.println("INSERT INTO bird_parameters_sound_type(bird_parameters_id, sound_type_id) VALUES " + parameters.stream().filter(p -> p.enabled() && !p.enabledSoundTypes().isEmpty()).flatMap(p -> p.enabledSoundTypes().stream().sorted().map(st -> "((SELECT id FROM bird_parameters WHERE bird_id=(SELECT id FROM bird WHERE name='" + p.bird() + "') AND profile_id=(SELECT id FROM profile WHERE name='FRO1')), (SELECT id FROM sound_type WHERE name='" + st.name() + "'))")).collect(Collectors.joining(", \n")) + ";");
|
||||
System.out.println("INSERT INTO bird_parameters_fetcher(bird_parameters_id, fetcher_id) VALUES " + parameters.stream().filter(p -> p.enabled() && !p.enabledFetchers().isEmpty()).flatMap(p -> p.enabledFetchers().stream().sorted().map(f -> "((SELECT id FROM bird_parameters WHERE bird_id=(SELECT id FROM bird WHERE name='" + p.bird() + "') AND profile_id=(SELECT id FROM profile WHERE name='FRO1')), (SELECT id FROM fetcher WHERE name='" + f + "'))")).collect(Collectors.joining(", \n")) + ";");
|
||||
/* System.out.printf("""
|
||||
WITH profile_cte AS (SELECT id AS profile_id
|
||||
FROM profile
|
||||
WHERE name = 'FRO1'),
|
||||
bird_cte AS (SELECT name, id AS bird_id
|
||||
FROM bird)
|
||||
INSERT
|
||||
INTO bird_parameters(profile_id, bird_id, enabled)
|
||||
SELECT p.profile_id, b.bird_id, v.enabled
|
||||
FROM profile_cte p
|
||||
JOIN (VALUES %s) AS v(bird_name, enabled) JOIN bird_cte b on B.name = v.bird_name;%n""", parameters.stream().map(s -> "('" + s.bird() + "', " + s.enabled() + ")").collect(Collectors.joining(",\n")));
|
||||
System.out.printf("""
|
||||
WITH profile_cte AS (
|
||||
SELECT id AS profile_id FROM profile WHERE name = 'FRO1'
|
||||
),
|
||||
bird_cte AS (
|
||||
SELECT name AS bird_name, id AS bird_id FROM bird
|
||||
),
|
||||
picture_type_cte AS (
|
||||
SELECT name AS picture_type_name, id AS picture_type_id FROM picture_type
|
||||
),
|
||||
bird_parameters_cte AS (
|
||||
SELECT bp.id AS bird_parameters_id, b.name AS bird_name
|
||||
FROM bird_parameters bp
|
||||
JOIN bird b ON bp.bird_id = b.id
|
||||
WHERE bp.profile_id = (SELECT profile_id FROM profile_cte)
|
||||
),
|
||||
pairs AS (
|
||||
SELECT * FROM (
|
||||
VALUES
|
||||
%s
|
||||
) AS tmp(bird_name, picture_type_name)
|
||||
)
|
||||
INSERT INTO bird_parameters_picture_type (bird_parameters_id, picture_type_id)
|
||||
SELECT
|
||||
bp.bird_parameters_id,
|
||||
pt.picture_type_id
|
||||
FROM pairs p
|
||||
JOIN bird_parameters_cte bp ON bp.bird_name = p.bird_name
|
||||
JOIN picture_type_cte pt ON pt.picture_type_name = p.picture_type_name;%n""", parameters.stream().flatMap(s -> s.enabledPictureTypes().stream().map(pt -> "('" + s.bird() + "', '" + pt.name() + "')")).collect(Collectors.joining(",\n")));
|
||||
System.out.printf("""
|
||||
WITH profile_cte AS (
|
||||
SELECT id AS profile_id FROM profile WHERE name = 'FRO1'
|
||||
),
|
||||
bird_cte AS (
|
||||
SELECT name AS bird_name, id AS bird_id FROM bird
|
||||
),
|
||||
sound_type_cte AS (
|
||||
SELECT name AS sound_type_name, id AS sound_type_id FROM sound_type
|
||||
),
|
||||
bird_parameters_cte AS (
|
||||
SELECT bp.id AS bird_parameters_id, b.name AS bird_name
|
||||
FROM bird_parameters bp
|
||||
JOIN bird b ON bp.bird_id = b.id
|
||||
WHERE bp.profile_id = (SELECT profile_id FROM profile_cte)
|
||||
),
|
||||
pairs AS (
|
||||
SELECT * FROM (
|
||||
VALUES
|
||||
%s
|
||||
) AS tmp(bird_name, sound_type_name)
|
||||
)
|
||||
INSERT INTO bird_parameters_sound_type (bird_parameters_id, sound_type_id)
|
||||
SELECT
|
||||
bp.bird_parameters_id,
|
||||
st.sound_type_id
|
||||
FROM pairs p
|
||||
JOIN bird_parameters_cte bp ON bp.bird_name = p.bird_name
|
||||
JOIN sound_type_cte st ON st.sound_type_name = p.sound_type_name;%n""", parameters.stream().flatMap(s -> s.enabledSoundTypes().stream().map(st -> "('" + s.bird() + "', '" + st.name() + "')")).collect(Collectors.joining(",\n")));*/
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user