diff --git a/src/main/java/org/tub/vsp/bvwp/scraping/StreetScraper.java b/src/main/java/org/tub/vsp/bvwp/scraping/StreetScraper.java index 36a69cd..9eccbe3 100644 --- a/src/main/java/org/tub/vsp/bvwp/scraping/StreetScraper.java +++ b/src/main/java/org/tub/vsp/bvwp/scraping/StreetScraper.java @@ -51,20 +51,14 @@ public List extractAllLocalBaseData(String path, String return files.stream() .filter(file -> file.getName().startsWith(prefix)) .filter(file -> file.getName().matches(regexToMatch)) - .filter(file -> !file.getName().matches("A20-G10-SH.html")) // gibt es nochmal mit A20-G10-SH-NI. Muss man beide zusammenzählen? kai, feb'24 -// .filter(file -> !file.getName().matches("A57-G10-NW.html")) // sehr hohes DTV für 4 Spuren. ?? kai, mar'24 -// .filter(file -> !file.getName().matches("A81-G50-BW.html")) // sehr hohes DTV für 4 Spuren. ?? kai, mar'24 - .filter(file -> !file.getName().matches("A61-G10-RP-T2-RP.html")) // benefits and costs for T1 and T2 are same; there are no revised investment costs from TUD for T2 - .filter(file -> !file.getName().matches("A3-G30-HE-T05-HE.html")) // benefits and costs for T04 and T05 are same; there are no revised investment costs from TUD for T05 - .filter(file -> !file.getName().matches("A3-G30-HE-T08-HE.html")) // benefits and costs for T06 and T06 are same; there are no revised investment costs from TUD for T08 - .filter(file -> !file.getName().matches("A40-G30-NW-T4-NW.html")) // dto - .filter(file -> !file.getName().matches("A003-G061-BY.html")) // dto .filter(file -> !file.getName().matches( regexToExclude )) .map(this::extractLocalBaseData) .filter(Optional::isPresent) .map(Optional::get) .sorted(Comparator.comparing(StreetBaseDataContainer::getUrl)) .toList(); + + // yyyy Man könnte (sollte?) die exkludierten Projekte weiter oben in die "regexToExclude" packen. kai, nov'24 } @Override diff --git a/src/main/java/org/tub/vsp/bvwp/users/kn/Gbl.java b/src/main/java/org/tub/vsp/bvwp/users/kn/Gbl.java new file mode 100644 index 0000000..471db81 --- /dev/null +++ b/src/main/java/org/tub/vsp/bvwp/users/kn/Gbl.java @@ -0,0 +1,15 @@ +package org.tub.vsp.bvwp.users.kn; + +class Gbl { + private Gbl(){} // do not instantiate + + public static final void assertTrue( boolean condition ) { + assertTrue( condition, "assertion failed; look into code to find reasin" ); + } + public static final void assertTrue( boolean condition, String msg ) { + if ( !condition ) { + throw new RuntimeException( msg ); + } + } + +} diff --git a/src/main/java/org/tub/vsp/bvwp/users/kn/RunLocalCsvScrapingKN.java b/src/main/java/org/tub/vsp/bvwp/users/kn/RunLocalCsvScrapingKN.java index 11c3f2e..c6b5d8d 100644 --- a/src/main/java/org/tub/vsp/bvwp/users/kn/RunLocalCsvScrapingKN.java +++ b/src/main/java/org/tub/vsp/bvwp/users/kn/RunLocalCsvScrapingKN.java @@ -50,16 +50,28 @@ public static void main(String[] args) throws IOException{ String filePath = "../../shared-svn/"; Map constructionCostsByProject = BvwpUtils.getConstructionCostsFromTudFile(filePath ); - final String regexToMatch = ".*"; + final String regexToMatch = "(A.*)|(B288_A524-G20-NW.html)"; // dies führt, mit prefix="" (!), zu den gleichen 213 BAB Projekten wie bei Richard. // final String regexToMatch = "(A...B.*)|(A....B.*)"; - -// final String regexToExclude = "(A...B.*)|(A....B.*)"; // Bundesstrassen, die von Autobahnen ausgehen. - final String regexToExclude = ""; + + StringBuilder strb = new StringBuilder(); + strb.append("A20-G10-SH.html"); // gibt es nochmal mit A20-G10-SH-NI. Muss man beide zusammenzählen? kai, feb'24 +// strb.append("A57-G10-NW.html")) // sehr hohes DTV für 4 Spuren. ?? kai, mar'24 +// strb.append("A81-G50-BW.html")) // sehr hohes DTV für 4 Spuren. ?? kai, mar'24 + strb.append("|A61-G10-RP-T2-RP.html"); // benefits and costs for T1 and T2 are same; there are no revised investment costs from TUD for T2 + strb.append("|A3-G30-HE-T05-HE.html"); // benefits and costs for T04 and T05 are same; there are no revised investment costs from TUD for T05 + strb.append("|A3-G30-HE-T08-HE.html"); // benefits and costs for T06 and T08 are same; there are no revised investment costs from TUD for T08 + strb.append("|A40-G30-NW-T4-NW.html"); // dto + strb.append("|A003-G061-BY.html"); // dto + strb.append("|A860_B31-G20-BW-T2-BW.html"); // Exkludiert, da NKA von T1 genutzt UND Teilprojekt selber BStr ist + strb.append("|A860_B31-G20-BW-T3-BW.html"); // Exkludiert, da NKA von T1 genutzt UND Teilprojekt selber BStr ist + strb.append("|A860_B31-G20-BW-T4-BW.html"); // Exkludiert, da NKA von T1 genutzt UND Teilprojekt selber BStr ist + strb.append("|A860_B31-G20-BW-T5-BW.html"); // BStr, da Teilprojekt einzeln bewertet UND NKA für das Teilprojekt vorliegt yyyy müsste man für BStr reinnehmen! + final String regexToExclude = strb.toString(); logger.info( "Starting scraping" ); // yyyy man könnte (sollte?) den table in den StreetAnalysisDataContainer mit hinein geben, und die Werte gleich dort eintragen. kai, feb'24 List allStreetBaseData = new StreetScraper() - .extractAllLocalBaseData( "./data/street/all", "A", regexToMatch, regexToExclude ) + .extractAllLocalBaseData( "./data/street/all", "", regexToMatch, regexToExclude ) .stream() .map(streetBaseDataContainer -> new StreetAnalysisDataContainer( streetBaseDataContainer, @@ -70,8 +82,7 @@ public static void main(String[] args) throws IOException{ logger.info( "Writing csv and generating table:" ); Table table = new StreetCsvWriter( "output/street_data.csv" ).writeCsv( allStreetBaseData ); - System.out.println( table.shape() ); - System.exit(-1); + Gbl.assertTrue( table.rowCount()==213, "wrong number of (BAB) projects; should be 213 but is "+table.rowCount() ); final String PLUS_110_PCT = "plus110pct"; table.addColumns( table.numberColumn( INVCOST_SUM_ORIG ).multiply( 2.1 ).setName( PLUS_110_PCT ) );