Skip to content

Commit

Permalink
scrape environmental stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
paulheinr committed Jul 12, 2024
1 parent e1b3a90 commit 1cb5fd7
Show file tree
Hide file tree
Showing 8 changed files with 403 additions and 34 deletions.
8 changes: 8 additions & 0 deletions src/main/java/org/tub/vsp/bvwp/JSoupUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ public static Double parseDoubleOrElseNull(String s) {
}
}

public static Double parseDoubleOrElseThrow(String s) {
try {
return parseDouble(s);
} catch (ParseException e) {
throw new RuntimeException(e);
}
}

public static Optional<Element> getTableByKeyAndContainedText(Document document, String cssClass,
String textToContain) {
return getTableByCssKeyAndPredicate(document, cssClass, (e) -> anyRowContainsText(e, textToContain));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ public class StreetBaseDataContainer {
private StreetProjectInformationDataContainer projectInformation;
private StreetPhysicalEffectDataContainer physicalEffect;
private StreetCostBenefitAnalysisDataContainer costBenefitAnalysis;
private StreetEnvironmentalDataContainer environmentalCriteria;

public String getUrl() {
return url;
Expand Down Expand Up @@ -45,6 +46,15 @@ public StreetBaseDataContainer setCostBenefitAnalysis(StreetCostBenefitAnalysisD
return this;
}

public StreetEnvironmentalDataContainer getEnvironmentalCriteria() {
return environmentalCriteria;
}

public StreetBaseDataContainer setEnvironmentalCriteria(StreetEnvironmentalDataContainer environmentalCriteria) {
this.environmentalCriteria = environmentalCriteria;
return this;
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand All @@ -55,25 +65,18 @@ public boolean equals(Object o) {
}

StreetBaseDataContainer that = (StreetBaseDataContainer) o;

if (!Objects.equals(url, that.url)) {
return false;
}
if (!Objects.equals(projectInformation, that.projectInformation)) {
return false;
}
if (!Objects.equals(physicalEffect, that.physicalEffect)) {
return false;
}
return Objects.equals(costBenefitAnalysis, that.costBenefitAnalysis);
return Objects.equals(url, that.url) && Objects.equals(projectInformation, that.projectInformation) && Objects.equals(physicalEffect,
that.physicalEffect) && Objects.equals(costBenefitAnalysis, that.costBenefitAnalysis) && Objects.equals(environmentalCriteria,
that.environmentalCriteria);
}

@Override
public int hashCode() {
int result = url != null ? url.hashCode() : 0;
result = 31 * result + (projectInformation != null ? projectInformation.hashCode() : 0);
result = 31 * result + (physicalEffect != null ? physicalEffect.hashCode() : 0);
result = 31 * result + (costBenefitAnalysis != null ? costBenefitAnalysis.hashCode() : 0);
int result = Objects.hashCode(url);
result = 31 * result + Objects.hashCode(projectInformation);
result = 31 * result + Objects.hashCode(physicalEffect);
result = 31 * result + Objects.hashCode(costBenefitAnalysis);
result = 31 * result + Objects.hashCode(environmentalCriteria);
return result;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package org.tub.vsp.bvwp.data.container.base.street;

import org.tub.vsp.bvwp.data.type.EnvironmentalCriteria;

import java.util.Objects;

public class StreetEnvironmentalDataContainer {
private EnvironmentalCriteria naturschutzVorrangflaechen21;
private EnvironmentalCriteria natura2000Gebiete22;
private EnvironmentalCriteria unzerschnitteneKernraeume23;
private EnvironmentalCriteria unzerschnitteneGrossraeume24;
private EnvironmentalCriteria flaechenInanspruchnahme25;
private EnvironmentalCriteria ueberschwemmungsgebiete26;
private EnvironmentalCriteria wasserschutzgebiete27;
private EnvironmentalCriteria verkehrsarmeRaeume28;
private EnvironmentalCriteria kulturLandschaftsschutz29;

public EnvironmentalCriteria getNaturschutzVorrangflaechen21() {
return naturschutzVorrangflaechen21;
}

public StreetEnvironmentalDataContainer setNaturschutzVorrangflaechen21(EnvironmentalCriteria naturschutzVorrangflaechen21) {
this.naturschutzVorrangflaechen21 = naturschutzVorrangflaechen21;
return this;
}

public EnvironmentalCriteria getNatura2000Gebiete22() {
return natura2000Gebiete22;
}

public StreetEnvironmentalDataContainer setNatura2000Gebiete22(EnvironmentalCriteria natura2000Gebiete22) {
this.natura2000Gebiete22 = natura2000Gebiete22;
return this;
}

public EnvironmentalCriteria getUnzerschnitteneKernraeume23() {
return unzerschnitteneKernraeume23;
}

public StreetEnvironmentalDataContainer setUnzerschnitteneKernraeume23(EnvironmentalCriteria unzerschnitteneKernraeume23) {
this.unzerschnitteneKernraeume23 = unzerschnitteneKernraeume23;
return this;
}

public EnvironmentalCriteria getUnzerschnitteneGrossraeume24() {
return unzerschnitteneGrossraeume24;
}

public StreetEnvironmentalDataContainer setUnzerschnitteneGrossraeume24(EnvironmentalCriteria unzerschnitteneGrossraeume24) {
this.unzerschnitteneGrossraeume24 = unzerschnitteneGrossraeume24;
return this;
}

public EnvironmentalCriteria getFlaechenInanspruchnahme25() {
return flaechenInanspruchnahme25;
}

public StreetEnvironmentalDataContainer setFlaechenInanspruchnahme25(EnvironmentalCriteria flaechenInanspruchnahme25) {
this.flaechenInanspruchnahme25 = flaechenInanspruchnahme25;
return this;
}

public EnvironmentalCriteria getUeberschwemmungsgebiete26() {
return ueberschwemmungsgebiete26;
}

public StreetEnvironmentalDataContainer setUeberschwemmungsgebiete26(EnvironmentalCriteria ueberschwemmungsgebiete26) {
this.ueberschwemmungsgebiete26 = ueberschwemmungsgebiete26;
return this;
}

public EnvironmentalCriteria getWasserschutzgebiete27() {
return wasserschutzgebiete27;
}

public StreetEnvironmentalDataContainer setWasserschutzgebiete27(EnvironmentalCriteria wasserschutzgebiete27) {
this.wasserschutzgebiete27 = wasserschutzgebiete27;
return this;
}

public EnvironmentalCriteria getVerkehrsarmeRaeume28() {
return verkehrsarmeRaeume28;
}

public StreetEnvironmentalDataContainer setVerkehrsarmeRaeume28(EnvironmentalCriteria verkehrsarmeRaeume28) {
this.verkehrsarmeRaeume28 = verkehrsarmeRaeume28;
return this;
}

public EnvironmentalCriteria getKulturLandschaftsschutz29() {
return kulturLandschaftsschutz29;
}

public StreetEnvironmentalDataContainer setKulturLandschaftsschutz29(EnvironmentalCriteria kulturLandschaftsschutz29) {
this.kulturLandschaftsschutz29 = kulturLandschaftsschutz29;
return this;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}

StreetEnvironmentalDataContainer that = (StreetEnvironmentalDataContainer) o;
return Objects.equals(naturschutzVorrangflaechen21, that.naturschutzVorrangflaechen21) && Objects.equals(natura2000Gebiete22,
that.natura2000Gebiete22) && Objects.equals(unzerschnitteneKernraeume23, that.unzerschnitteneKernraeume23) && Objects.equals(unzerschnitteneGrossraeume24, that.unzerschnitteneGrossraeume24) && Objects.equals(flaechenInanspruchnahme25, that.flaechenInanspruchnahme25) && Objects.equals(ueberschwemmungsgebiete26, that.ueberschwemmungsgebiete26) && Objects.equals(wasserschutzgebiete27, that.wasserschutzgebiete27) && Objects.equals(verkehrsarmeRaeume28, that.verkehrsarmeRaeume28) && Objects.equals(kulturLandschaftsschutz29, that.kulturLandschaftsschutz29);
}

@Override
public int hashCode() {
int result = Objects.hashCode(naturschutzVorrangflaechen21);
result = 31 * result + Objects.hashCode(natura2000Gebiete22);
result = 31 * result + Objects.hashCode(unzerschnitteneKernraeume23);
result = 31 * result + Objects.hashCode(unzerschnitteneGrossraeume24);
result = 31 * result + Objects.hashCode(flaechenInanspruchnahme25);
result = 31 * result + Objects.hashCode(ueberschwemmungsgebiete26);
result = 31 * result + Objects.hashCode(wasserschutzgebiete27);
result = 31 * result + Objects.hashCode(verkehrsarmeRaeume28);
result = 31 * result + Objects.hashCode(kulturLandschaftsschutz29);
return result;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package org.tub.vsp.bvwp.data.mapper.environmentalCriteria;

import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.tub.vsp.bvwp.JSoupUtils;
import org.tub.vsp.bvwp.data.container.base.street.StreetEnvironmentalDataContainer;
import org.tub.vsp.bvwp.data.type.EnvironmentalCriteria;

import java.util.ArrayList;
import java.util.List;

public class StreetEnvironmentalCriteriaMapper {
public static StreetEnvironmentalDataContainer mapDocument(Document document) {
StreetEnvironmentalDataContainer result = new StreetEnvironmentalDataContainer();

Element envTable = JSoupUtils.getTableByCssKeyAndPredicate(document, "table.table_webprins",
StreetEnvironmentalCriteriaMapper::isEnvironmentalCriteriaTable)
.orElseThrow();

result.setNaturschutzVorrangflaechen21(extractFlatEnvironmentalCriteria(envTable, "2.1"))
.setNatura2000Gebiete22(extractWithSubEnvironmentalCriteria(envTable, "2.2", 2))
.setUnzerschnitteneKernraeume23(extractFlatEnvironmentalCriteria(envTable, "2.3"))
.setUnzerschnitteneGrossraeume24(extractWithSubEnvironmentalCriteria(envTable, "2.4", 4))
.setFlaechenInanspruchnahme25(extractFlatEnvironmentalCriteria(envTable, "2.5"))
.setUeberschwemmungsgebiete26(extractFlatEnvironmentalCriteria(envTable, "2.6"))
.setWasserschutzgebiete27(extractFlatEnvironmentalCriteria(envTable, "2.7"))
.setVerkehrsarmeRaeume28(extractFlatEnvironmentalCriteria(envTable, "2.8"))
.setKulturLandschaftsschutz29(extractFlatEnvironmentalCriteria(envTable, "2.9"));

return result;
}

private static EnvironmentalCriteria extractFlatEnvironmentalCriteria(Element table, String number) {
Element row = JSoupUtils.firstRowWithKeyInCol(table, number, 0).orElseThrow();

Double absolut = JSoupUtils.parseDoubleOrElseThrow(row.select("td").get(2).text());
Double betroffen = JSoupUtils.parseDoubleOrElseThrow(row.select("td").get(4).text());
EnvironmentalCriteria.Description description = new EnvironmentalCriteria.Description(absolut, betroffen);

EnvironmentalCriteria.UmweltBewertung bewertung = handleBewertung(row.select("td").get(6).text());
return new EnvironmentalCriteria(List.of(description), bewertung);
}

private static EnvironmentalCriteria extractWithSubEnvironmentalCriteria(Element table, String number, int rows) {
int headRowIndex = JSoupUtils.getFirstRowIndexWithTextInCol(table, number, 0).orElseThrow();
EnvironmentalCriteria.UmweltBewertung umweltBewertung = handleBewertung(JSoupUtils.getTextFromRowAndCol(table, headRowIndex, 6));

List<EnvironmentalCriteria.Description> descriptions = new ArrayList<>();
for (int i = 1; i <= rows; i++) {
Double absolut = JSoupUtils.parseDoubleOrElseThrow(JSoupUtils.getTextFromRowAndCol(table, headRowIndex + i, 2));
Double betroffen = JSoupUtils.parseDoubleOrElseThrow(JSoupUtils.getTextFromRowAndCol(table, headRowIndex + i, 4));
EnvironmentalCriteria.Description description = new EnvironmentalCriteria.Description(absolut, betroffen);
descriptions.add(description);
}

return new EnvironmentalCriteria(descriptions, umweltBewertung);
}

private static EnvironmentalCriteria.UmweltBewertung handleBewertung(String bewertung) {
if (bewertung.equals("-")) {
return null;
}
return EnvironmentalCriteria.UmweltBewertung.valueOf(bewertung.toUpperCase());
}

private static boolean isEnvironmentalCriteriaTable(Element element) {
return element.select("th").stream().map(Element::text).limit(4).toList().equals(List.of("Nr.", "Kriterium", "Beschreibung", "Bewertung"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package org.tub.vsp.bvwp.data.type;

import java.util.List;

public record EnvironmentalCriteria(List<Description> description, UmweltBewertung bewertung) {
public record Description(double absolute, double betroffenheit) {
}

public enum UmweltBewertung {
HOCH, MITTEL, GERING;
}
}
4 changes: 3 additions & 1 deletion src/main/java/org/tub/vsp/bvwp/scraping/StreetScraper.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.jsoup.nodes.Document;
import org.tub.vsp.bvwp.data.container.base.street.StreetBaseDataContainer;
import org.tub.vsp.bvwp.data.mapper.costBenefit.StreetCostBenefitMapper;
import org.tub.vsp.bvwp.data.mapper.environmentalCriteria.StreetEnvironmentalCriteriaMapper;
import org.tub.vsp.bvwp.data.mapper.physicalEffect.StreetPhysicalEffectMapper;
import org.tub.vsp.bvwp.data.mapper.projectInformation.ProjectInformationMapperUtils;
import org.tub.vsp.bvwp.data.mapper.projectInformation.StreetProjectInformationMapper;
Expand Down Expand Up @@ -72,7 +73,8 @@ public Optional<StreetBaseDataContainer> extractBaseData(Document doc, String ur
return Optional.of(streetBaseDataContainer.setUrl(url)
.setProjectInformation(StreetProjectInformationMapper.mapDocument(doc))
.setPhysicalEffect(StreetPhysicalEffectMapper.mapDocument(doc))
.setCostBenefitAnalysis(StreetCostBenefitMapper.mapDocument(doc)));
.setCostBenefitAnalysis(StreetCostBenefitMapper.mapDocument(doc))
.setEnvironmentalCriteria(StreetEnvironmentalCriteriaMapper.mapDocument(doc)));
}

private boolean checkIfProjectIsScrapable(Document doc) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package org.tub.vsp.bvwp.data.mapper.environmentalCriteria;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.tub.vsp.bvwp.data.LocalFileAccessor;
import org.tub.vsp.bvwp.data.container.base.street.StreetEnvironmentalDataContainer;
import org.tub.vsp.bvwp.data.type.EnvironmentalCriteria;

import java.io.IOException;
import java.util.List;

class StreetEnvironmentalCriteriaMapperTest {
@Test
void test_a20() throws IOException {
StreetEnvironmentalDataContainer result = StreetEnvironmentalCriteriaMapper.mapDocument(LocalFileAccessor.getLocalDocument("a20.html"));

Assertions.assertEquals(result.getNaturschutzVorrangflaechen21().bewertung(), EnvironmentalCriteria.UmweltBewertung.MITTEL);
Assertions.assertEquals(result.getNaturschutzVorrangflaechen21().description(), List.of(new EnvironmentalCriteria.Description(1.6, 0.01)));

Assertions.assertEquals(result.getNatura2000Gebiete22().bewertung(), EnvironmentalCriteria.UmweltBewertung.HOCH);
Assertions.assertEquals(result.getNatura2000Gebiete22().description(), List.of(new EnvironmentalCriteria.Description(4., 0.0),
new EnvironmentalCriteria.Description(0., 0.0)));

Assertions.assertEquals(result.getUnzerschnitteneKernraeume23().bewertung(), EnvironmentalCriteria.UmweltBewertung.HOCH);
Assertions.assertEquals(result.getUnzerschnitteneKernraeume23().description(), List.of(new EnvironmentalCriteria.Description(15.9, 0.1)));

Assertions.assertEquals(result.getUnzerschnitteneGrossraeume24().bewertung(), EnvironmentalCriteria.UmweltBewertung.HOCH);
Assertions.assertEquals(result.getUnzerschnitteneGrossraeume24().description(), List.of(new EnvironmentalCriteria.Description(5.9, 0.04),
new EnvironmentalCriteria.Description(14.0, 0.09),
new EnvironmentalCriteria.Description(0.0, 0.0),
new EnvironmentalCriteria.Description(0.0, 0.0)));

Assertions.assertNull(result.getFlaechenInanspruchnahme25().bewertung());
Assertions.assertEquals(result.getFlaechenInanspruchnahme25().description(), List.of(new EnvironmentalCriteria.Description(870.9, 0.0)));

Assertions.assertEquals(result.getUeberschwemmungsgebiete26().bewertung(), EnvironmentalCriteria.UmweltBewertung.GERING);
Assertions.assertEquals(result.getUeberschwemmungsgebiete26().description(), List.of(new EnvironmentalCriteria.Description(0.0, 0.0)));

Assertions.assertEquals(result.getWasserschutzgebiete27().bewertung(), EnvironmentalCriteria.UmweltBewertung.HOCH);
Assertions.assertEquals(result.getWasserschutzgebiete27().description(), List.of(new EnvironmentalCriteria.Description(0.5, 0.0)));

Assertions.assertEquals(result.getVerkehrsarmeRaeume28().bewertung(), EnvironmentalCriteria.UmweltBewertung.HOCH);
Assertions.assertEquals(result.getVerkehrsarmeRaeume28().description(), List.of(new EnvironmentalCriteria.Description(18941.4, 0.0)));

Assertions.assertEquals(result.getKulturLandschaftsschutz29().bewertung(), EnvironmentalCriteria.UmweltBewertung.HOCH);
Assertions.assertEquals(result.getKulturLandschaftsschutz29().description(), List.of(new EnvironmentalCriteria.Description(76.7, 0.49)));
}


}
Loading

0 comments on commit 1cb5fd7

Please sign in to comment.