Skip to content

Commit

Permalink
Try jbrowserdriver (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
koppor committed Sep 8, 2024
1 parent 0ac060a commit 7e84a60
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 142 deletions.
7 changes: 1 addition & 6 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,6 @@ application {
'--add-exports=javafx.base/com.sun.javafx.event=org.jabref.merged.module',
'--add-exports=javafx.controls/com.sun.javafx.scene.control=org.jabref.merged.module',

// Required by https://github.com/jcefmaven/jcefmaven?tab=readme-ov-file#limitations
'--add-exports=java.base/java.lang=org.jabref.merged.module',
'--add-exports=java.desktop/sun.awt=org.jabref.merged.module',
'--add-exports=java.desktop/sun.java2d=org.jabref.merged.module',

// Fix for https://github.com/JabRef/jabref/issues/11198
'--add-opens=javafx.graphics/javafx.scene=org.jabref.merged.module',
'--add-opens=javafx.controls/javafx.scene.control=org.jabref.merged.module',
Expand Down Expand Up @@ -261,7 +256,7 @@ dependencies {
implementation 'org.controlsfx:controlsfx:11.2.1'

// region HTTP clients
implementation 'me.friwi:jcefmaven:126.2.0' // used for web scraping; https://github.com/jcefmaven/jcefmaven
implementation 'com.machinepublishers:jbrowserdriver:1.1.1' // used for web scraping; https://github.com/jcefmaven/jcefmaven
implementation 'org.jsoup:jsoup:1.18.1'
implementation 'com.konghq:unirest-java-core:4.4.4'
implementation 'com.konghq:unirest-modules-gson:4.4.4'
Expand Down
6 changes: 4 additions & 2 deletions src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,10 @@
requires org.glassfish.hk2.api;

// region: http clients
requires jcefmaven;
requires transitive jbrowserdriver;
requires org.openqa.selenium.core;
requires org.openqa.grid.selenium;
requires org.openqa.selenium.remote;
requires org.apache.httpcomponents.core5.httpcore5;
requires org.jsoup;
requires unirest.java.core;
Expand Down Expand Up @@ -184,6 +187,5 @@
requires mslinks;
requires org.antlr.antlr4.runtime;
requires org.libreoffice.uno;
requires jcef;
// endregion
}
147 changes: 13 additions & 134 deletions src/main/java/org/jabref/logic/importer/fetcher/ACS.java
Original file line number Diff line number Diff line change
@@ -1,35 +1,25 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URL;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;

import javax.swing.SwingUtilities;

import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.DOI;

import me.friwi.jcefmaven.CefAppBuilder;
import me.friwi.jcefmaven.MavenCefAppHandlerAdapter;
import org.cef.CefApp;
import org.cef.CefClient;
import org.cef.CefSettings;
import org.cef.browser.CefBrowser;
import org.cef.browser.CefFrame;
import org.cef.browser.CefMessageRouter;
import org.cef.handler.CefDisplayHandlerAdapter;
import org.cef.handler.CefLoadHandlerAdapter;
import org.cef.network.CefRequest;
import com.machinepublishers.jbrowserdriver.JBrowserDriver;
import com.machinepublishers.jbrowserdriver.Settings;
import com.machinepublishers.jbrowserdriver.Timezone;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* FulltextFetcher implementation that attempts to find a PDF URL at <a href="https://pubs.acs.org/">ACS</a>.
*
* Alternatives concidered: https://stackoverflow.com/a/53099311/873282
*/
public class ACS implements FulltextFetcher {
private static final Logger LOGGER = LoggerFactory.getLogger(ACS.class);
Expand All @@ -54,130 +44,19 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {

String source = SOURCE.formatted(doi.get().getDOI());

CompletableFuture<Void> result = new CompletableFuture<>();

System.out.println(Thread.currentThread().getName());
try {
SwingUtilities.invokeAndWait(() -> {
try {
startBrowser(result);
} catch (
IOException e) {
throw new RuntimeException(e);
}
});
} catch (
InterruptedException e) {
throw new RuntimeException(e);
} catch (
InvocationTargetException e) {
throw new RuntimeException(e);
}
// You can optionally pass a Settings object here,
// constructed using Settings.Builder
JBrowserDriver driver = new JBrowserDriver(Settings.builder().
timezone(Timezone.AMERICA_NEWYORK).build());

try {
Thread.sleep(10000);
} catch (
InterruptedException e) {
throw new RuntimeException(e);
}
driver.get(source);
System.out.println(driver.getStatusCode());
System.out.println(driver.getPageSource());
driver.quit();

return Optional.empty();
}

private static void startBrowser(CompletableFuture<Void> result) throws IOException {
CefAppBuilder builder = new CefAppBuilder();

// Set an app handler. Do not use CefApp.addAppHandler(...), it will break your code on MacOSX!
builder.setAppHandler(new MavenCefAppHandlerAdapter() {
@Override
public void stateHasChanged(CefApp.CefAppState state) {
System.out.println(state);
// Shutdown the app if the native CEF part is terminated
if (state == CefApp.CefAppState.TERMINATED) {
// calling System.exit(0) appears to be causing assert errors,
// as its firing before all of the CEF objects shutdown.
//System.exit(0);
}
}
});

// builder.getCefSettings().windowless_rendering_enabled = true;

CefApp cefApp;
try {
cefApp = builder.build();
} catch (Exception e) {
LOGGER.error("Could not initialize CEF", e);
throw new IOException(e);
}

/*
new Thread(() -> {
while (true) {
try {
cefApp.doMessageLoopWork(100);
Thread.sleep(10); // Sleep for 10ms between calls
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}).start();
*/

CefClient client = cefApp.createClient();
CefMessageRouter msgRouter = CefMessageRouter.create();
client.addMessageRouter(msgRouter);

CefBrowser browser = client.createBrowser("ftps://lalala.notfound", true, false);
// (3) Create a simple message router to receive messages from CEF.

client.addLoadHandler(new CefLoadHandlerAdapter() {
@Override
public void onLoadingStateChange(CefBrowser browser, boolean isLoading, boolean canGoBack, boolean canGoForward) {
System.out.println("Loading state changed is loading " + isLoading);
}

@Override
public void onLoadStart(CefBrowser browser, CefFrame frame, CefRequest.TransitionType transitionType) {
System.out.println("Load start");
}

@Override
public void onLoadError(CefBrowser browser, CefFrame frame, ErrorCode errorCode, String errorText, String failedUrl) {
System.out.println("Load error");
}

@Override
public void onLoadEnd(CefBrowser browser, CefFrame frame, int httpStatusCode) {
System.out.println("lalala");
if (frame.isMain()) {
frame.executeJavaScript(
"document.documentElement.outerHTML;",
frame.getURL(),
0
);
}
result.complete(null);
}
});

client.addDisplayHandler(new CefDisplayHandlerAdapter() {
@Override
public boolean onConsoleMessage(CefBrowser browser, CefSettings.LogSeverity level, String message, String source, int line) {
// Capture the result of the JavaScript execution in the console message
System.out.println("Page HTML content:\n" + message);
return true;
}
});

// cefApp.doMessageLoopWork();

// browser.loadURL(source);
browser.loadURL("ftps://lalala.notfound");

// cefApp.doMessageLoopWork(1000);
}

@Override
public TrustLevel getTrustLevel() {
return TrustLevel.PUBLISHER;
Expand Down

0 comments on commit 7e84a60

Please sign in to comment.