Skip to content

Commit

Permalink
- Optimize the "DOC_URL_FILTER"-regex.
Browse files Browse the repository at this point in the history
- Update the user-agent.
- Update dependencies.
  • Loading branch information
LSmyrnaios committed Mar 11, 2024
1 parent 77f51ed commit 61431d5
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
8 changes: 4 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.5.1</version>
<version>3.5.2</version>
<executions>
<execution>
<phase>package</phase>
Expand Down Expand Up @@ -119,7 +119,7 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.25.0</version>
<version>1.26.1</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.brotli/dec -->
Expand Down Expand Up @@ -147,14 +147,14 @@
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20231013</version>
<version>20240303</version>
</dependency>

<!-- https://mvnrepository.com/artifact/io.minio/minio -->
<dependency>
<groupId>io.minio</groupId>
<artifactId>minio</artifactId>
<version>8.5.7</version>
<version>8.5.9</version>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public class ConnSupportUtils

public static final ConcurrentHashMap<String, DomainConnectionData> domainsWithConnectionData = new ConcurrentHashMap<>();

public static String userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/118.0"; // This should not be "final", another program, using this software as a library, should be able to set its own "UserAgent".
public static String userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0"; // This should not be "final", another program, using this software as a library, should be able to set its own "UserAgent".
public static String acceptLanguage = "en-US,en;q=0.5";


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public class LoaderAndChecker

public static boolean useIdUrlPairs = true;

public static final Pattern DOC_URL_FILTER = Pattern.compile(".+(pdf|download|/doc|document|(?:/|[?]|&)file|/fulltext|attachment|/paper|viewfile|viewdoc|/get|cgi/viewcontent.cgi\\?|t[ée]l[ée]charger|descargar).*");
public static final Pattern DOC_URL_FILTER = Pattern.compile(".+(pdf|download|/doc|document|(?:/|[?]|&)file|/fulltext|attachment|/paper|view(?:file|doc)|/get|cgi/viewcontent.cgi\\?|t[ée]l[ée]charger|descargar).*");
// "DOC_URL_FILTER" works for lowerCase Strings (we make sure they are in lowerCase before we check).
// Note that we still need to check if it's an alive link and if it's actually a docUrl (though it's mimeType).

Expand Down

0 comments on commit 61431d5

Please sign in to comment.