From 164ff9afe7500f2b38f9d4fc7c80a5387096eafa Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Mon, 18 Dec 2023 15:23:02 +0200 Subject: [PATCH] - Update dependencies. - Code polishing. --- pom.xml | 10 +++++----- .../DomainWithUnsupportedHEADmethodException.java | 6 +++--- .../util/http/HttpConnUtils.java | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pom.xml b/pom.xml index d9cf90e3..bc4c1cb7 100644 --- a/pom.xml +++ b/pom.xml @@ -61,7 +61,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.2.2 + 3.2.3 @@ -99,14 +99,14 @@ ch.qos.logback logback-classic - 1.2.12 + 1.2.13 org.jsoup jsoup - 1.16.2 + 1.17.1 @@ -141,7 +141,7 @@ commons-io commons-io - 2.15.0 + 2.15.1 diff --git a/src/main/java/eu/openaire/publications_retriever/exceptions/DomainWithUnsupportedHEADmethodException.java b/src/main/java/eu/openaire/publications_retriever/exceptions/DomainWithUnsupportedHEADmethodException.java index ad4465ed..d87b8f92 100644 --- a/src/main/java/eu/openaire/publications_retriever/exceptions/DomainWithUnsupportedHEADmethodException.java +++ b/src/main/java/eu/openaire/publications_retriever/exceptions/DomainWithUnsupportedHEADmethodException.java @@ -3,9 +3,9 @@ /** * This class implements the new custom exception: "DomainWithUnsupportedHEADmethodException". - * This exception is designed to be thrown when the domain is caught to not support HTTP HEAD method and we don't want to continue connecting to it's internal links. - * The possible-docInternalLinks will be connected with "GET" method by default, so the exclusion promoted by this exception is of low risk. - * Note that when the exception is thrown, we just return from the "visitPage()"-method , we do not block the domain, since an inputUrl from this domain might be a docUrl which we don't want to miss. + * This exception is designed to be thrown when the domain is caught to not support HTTP HEAD method, and we don't want to continue connecting to its internal links. + * The possible document-related internalLinks will be connected with "GET" method by default, so this exception is thrown for the less-possibly docUrls, after all the possible ones have been checked. + * Note that when the exception is thrown, we just return from the "visitPage()"-method, we do not block the domain, since an inputUrl from this domain might be a docUrl which we don't want to miss. * ... * @author Lampros Smyrnaios */ diff --git a/src/main/java/eu/openaire/publications_retriever/util/http/HttpConnUtils.java b/src/main/java/eu/openaire/publications_retriever/util/http/HttpConnUtils.java index 81f3a3ce..501143a8 100644 --- a/src/main/java/eu/openaire/publications_retriever/util/http/HttpConnUtils.java +++ b/src/main/java/eu/openaire/publications_retriever/util/http/HttpConnUtils.java @@ -67,7 +67,7 @@ public class HttpConnUtils public static final Set domainsWithSlashRedirect = Collections.newSetFromMap(new ConcurrentHashMap()); - public static final Pattern ENDING_WITH_SLASH_OR_EXTENSION_FILTER = Pattern.compile(".*(?:(?:/|\\.[^.?&/_-]{1,7})(?:\\?.+)?|(?:\\?.+))$"); + public static final Pattern ENDING_WITH_SLASH_OR_EXTENSION_FILTER = Pattern.compile(".*(?:(?:/|\\.[^.?&/_-]{1,7})(?:\\?.+)?|\\?.+)$"); // The above regex, assumes file-extensions up to 7-chars long. Rare file-extensions with app to 10 or more characters exist, // but then the risk of identifying irrelevant dot-prepended strings as extensions, increases (some urls end with: ".NOTEXTENSIONSTRING").