Skip to content

Commit

Permalink
- Update dependencies.
Browse files Browse the repository at this point in the history
- Code polishing.
  • Loading branch information
LSmyrnaios committed Dec 18, 2023
1 parent db72f2a commit 164ff9a
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 9 deletions.
10 changes: 5 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.2.2</version>
<version>3.2.3</version>
<configuration>
<!--<excludes>
<exclude>some test to exclude here</exclude>
Expand All @@ -85,7 +85,7 @@
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-core</artifactId>
<version>1.2.12</version>
<version>1.2.13</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
Expand All @@ -99,14 +99,14 @@
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.2.12</version>
<version>1.2.13</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.16.2</version>
<version>1.17.1</version>
</dependency>

<!-- https://mvnrepository.com/artifact/com.google.guava/guava -->
Expand Down Expand Up @@ -141,7 +141,7 @@
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.15.0</version>
<version>2.15.1</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.json/json -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

/**
* This class implements the new custom exception: "DomainWithUnsupportedHEADmethodException".
* This exception is designed to be thrown when the domain is caught to not support HTTP HEAD method and we don't want to continue connecting to it's internal links.
* The possible-docInternalLinks will be connected with "GET" method by default, so the exclusion promoted by this exception is of low risk.
* Note that when the exception is thrown, we just return from the "visitPage()"-method , we do not block the domain, since an inputUrl from this domain might be a docUrl which we don't want to miss.
* This exception is designed to be thrown when the domain is caught to not support HTTP HEAD method, and we don't want to continue connecting to its internal links.
* The possible document-related internalLinks will be connected with "GET" method by default, so this exception is thrown for the less-possibly docUrls, after all the possible ones have been checked.
* Note that when the exception is thrown, we just return from the "visitPage()"-method, we do not block the domain, since an inputUrl from this domain might be a docUrl which we don't want to miss.
* ...
* @author Lampros Smyrnaios
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public class HttpConnUtils
public static final Set<String> domainsWithSlashRedirect = Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());


public static final Pattern ENDING_WITH_SLASH_OR_EXTENSION_FILTER = Pattern.compile(".*(?:(?:/|\\.[^.?&/_-]{1,7})(?:\\?.+)?|(?:\\?.+))$");
public static final Pattern ENDING_WITH_SLASH_OR_EXTENSION_FILTER = Pattern.compile(".*(?:(?:/|\\.[^.?&/_-]{1,7})(?:\\?.+)?|\\?.+)$");
// The above regex, assumes file-extensions up to 7-chars long. Rare file-extensions with app to 10 or more characters exist,
// but then the risk of identifying irrelevant dot-prepended strings as extensions, increases (some urls end with: "<other chars>.NOTEXTENSIONSTRING").

Expand Down

0 comments on commit 164ff9a

Please sign in to comment.