-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feature] Integration with Searxng (#10)
* Initial migration from langchain4j * Removed lombok dependency * Removed baseUrl method and added baseUrl argument to builder method as it is mandatory * Some javadoc * Removed unused constructors * Added module to the bom * Added support for language, safeSearch and startPage parameters * Several changes due to PR discussion * Changed back to no-arg builder() * Moved ensureNotNull to constructor from build. Also fixed missed issue in the integration test * Use testcontainers for integration tests * Added INDENT_OUTPUT * Support for optionalParameters and additionalParams * Renamed to optionalParams * Testing for various additional parameter scenarios and some initial metadata result values added * Removed some debugging code * Added some additional metadata fields * Support for logging requests and responses * Less brittle check in should_search_with_additional_params() unit test * reformat --------- Co-authored-by: Martin7-1 <[email protected]>
- Loading branch information
Showing
12 changed files
with
3,256 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
97 changes: 97 additions & 0 deletions
97
web-search-engines/langchain4j-community-web-search-engine-searxng/pom.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<parent> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j-community</artifactId> | ||
<version>0.36.0-SNAPSHOT</version> | ||
<relativePath>../../pom.xml</relativePath> | ||
</parent> | ||
|
||
<artifactId>langchain4j-community-web-search-engine-searxng</artifactId> | ||
<packaging>jar</packaging> | ||
|
||
<name>LangChain4j :: Web Search Engine :: SearXNG</name> | ||
|
||
<dependencies> | ||
|
||
<dependency> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j-core</artifactId> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>com.squareup.retrofit2</groupId> | ||
<artifactId>retrofit</artifactId> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>com.squareup.retrofit2</groupId> | ||
<artifactId>converter-jackson</artifactId> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>com.fasterxml.jackson.core</groupId> | ||
<artifactId>jackson-databind</artifactId> | ||
</dependency> | ||
|
||
<!-- DEPENDENCY CONFLICT RESOLUTION FOR OKHTTP (START) --> | ||
<dependency> | ||
<groupId>com.squareup.okhttp3</groupId> | ||
<artifactId>okhttp</artifactId> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.jetbrains.kotlin</groupId> | ||
<artifactId>kotlin-stdlib-jdk8</artifactId> | ||
</dependency> | ||
<!-- DEPENDENCY CONFLICT RESOLUTION FOR OKHTTP (END) --> | ||
|
||
<dependency> | ||
<groupId>org.junit.jupiter</groupId> | ||
<artifactId>junit-jupiter</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.assertj</groupId> | ||
<artifactId>assertj-core</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<!-- Visibility for WebSearchEngineIT --> | ||
<dependency> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j-core</artifactId> | ||
<classifier>tests</classifier> | ||
<type>test-jar</type> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.testcontainers</groupId> | ||
<artifactId>testcontainers</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.testcontainers</groupId> | ||
<artifactId>junit-jupiter</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
</dependencies> | ||
|
||
<licenses> | ||
<license> | ||
<name>Apache-2.0</name> | ||
<url>https://www.apache.org/licenses/LICENSE-2.0.txt</url> | ||
<distribution>repo</distribution> | ||
<comments>A business-friendly OSS license</comments> | ||
</license> | ||
</licenses> | ||
|
||
</project> |
15 changes: 15 additions & 0 deletions
15
...engine-searxng/src/main/java/dev/langchain4j/community/web/search/searxng/SearXNGApi.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
package dev.langchain4j.community.web.search.searxng; | ||
|
||
import retrofit2.Call; | ||
import retrofit2.http.GET; | ||
import retrofit2.http.Headers; | ||
import retrofit2.http.QueryMap; | ||
|
||
import java.util.Map; | ||
|
||
interface SearXNGApi { | ||
|
||
@GET("search") | ||
@Headers({"Content-Type: application/json"}) | ||
Call<SearXNGResponse> search(@QueryMap Map<String, Object> params); | ||
} |
76 changes: 76 additions & 0 deletions
76
...ine-searxng/src/main/java/dev/langchain4j/community/web/search/searxng/SearXNGClient.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package dev.langchain4j.community.web.search.searxng; | ||
|
||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import dev.langchain4j.web.search.WebSearchRequest; | ||
import okhttp3.OkHttpClient; | ||
import retrofit2.Response; | ||
import retrofit2.Retrofit; | ||
import retrofit2.converter.jackson.JacksonConverterFactory; | ||
|
||
import java.io.IOException; | ||
import java.time.Duration; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
import static com.fasterxml.jackson.databind.SerializationFeature.INDENT_OUTPUT; | ||
|
||
class SearXNGClient { | ||
|
||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper().enable(INDENT_OUTPUT); | ||
private final SearXNGApi api; | ||
private final Map<String, Object> optionalParams; | ||
|
||
public SearXNGClient(String baseUrl, Duration timeout, boolean logRequests, boolean logResponses, Map<String, Object> optionalParams) { | ||
this.optionalParams = optionalParams; | ||
OkHttpClient.Builder okHttpClientBuilder = new OkHttpClient.Builder() | ||
.callTimeout(timeout) | ||
.connectTimeout(timeout) | ||
.readTimeout(timeout) | ||
.writeTimeout(timeout); | ||
if (logRequests) { | ||
okHttpClientBuilder.addInterceptor(new SearXNGRequestLoggingInterceptor()); | ||
} | ||
if (logResponses) { | ||
okHttpClientBuilder.addInterceptor(new SearXNGResponseLoggingInterceptor()); | ||
} | ||
Retrofit retrofit = new Retrofit.Builder() | ||
.baseUrl(baseUrl) | ||
.client(okHttpClientBuilder.build()) | ||
.addConverterFactory(JacksonConverterFactory.create(OBJECT_MAPPER)) | ||
.build(); | ||
this.api = retrofit.create(SearXNGApi.class); | ||
} | ||
|
||
SearXNGResponse search(WebSearchRequest request) { | ||
try { | ||
final Map<String, Object> args = new HashMap<>(); | ||
if (optionalParams != null) { | ||
args.putAll(optionalParams); | ||
} | ||
if (request.additionalParams() != null) { | ||
args.putAll(request.additionalParams()); | ||
} | ||
args.put("q", request.searchTerms()); | ||
args.put("format", "json"); | ||
// Only consider explicit safesearch requests, otherwise keep the default | ||
if (request.safeSearch() != null) { | ||
if (request.safeSearch()) { | ||
// Set to strict as opposed to moderate | ||
args.put("safesearch", 2); | ||
} else { | ||
args.put("safesearch", 0); | ||
} | ||
} | ||
if (request.startPage() != null) { | ||
args.put("pageno", request.startPage()); | ||
} | ||
if (request.language() != null) { | ||
args.put("language", request.language()); | ||
} | ||
final Response<SearXNGResponse> response = api.search(args).execute(); | ||
return response.body(); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
} |
80 changes: 80 additions & 0 deletions
80
...n/java/dev/langchain4j/community/web/search/searxng/SearXNGRequestLoggingInterceptor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
package dev.langchain4j.community.web.search.searxng; | ||
|
||
import okhttp3.Headers; | ||
import okhttp3.Interceptor; | ||
import okhttp3.Request; | ||
import okhttp3.Response; | ||
import okio.Buffer; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.IOException; | ||
import java.util.HashSet; | ||
import java.util.Set; | ||
import java.util.stream.StreamSupport; | ||
|
||
import static dev.langchain4j.internal.Utils.isNullOrBlank; | ||
import static java.util.Arrays.asList; | ||
import static java.util.stream.Collectors.joining; | ||
|
||
class SearXNGRequestLoggingInterceptor implements Interceptor { | ||
|
||
private static final Logger log = LoggerFactory.getLogger(SearXNGRequestLoggingInterceptor.class); | ||
|
||
private static final Set<String> COMMON_SECRET_HEADERS = | ||
new HashSet<>(asList("authorization", "x-api-key", "x-auth-token")); | ||
|
||
@Override | ||
public Response intercept(Chain chain) throws IOException { | ||
Request request = chain.request(); | ||
this.log(request); | ||
return chain.proceed(request); | ||
} | ||
|
||
private void log(Request request) { | ||
try { | ||
log.debug("Request:\n- method: {}\n- url: {}\n- headers: {}\n- body: {}", | ||
request.method(), request.url(), getHeaders(request.headers()), getBody(request)); | ||
} catch (Exception e) { | ||
log.warn("Error while logging request: {}", e.getMessage()); | ||
} | ||
} | ||
|
||
private static String getBody(Request request) { | ||
try (Buffer buffer = new Buffer()) { | ||
if (request.body() == null) { | ||
return ""; | ||
} | ||
request.body().writeTo(buffer); | ||
return buffer.readUtf8(); | ||
} catch (Exception e) { | ||
log.warn("Exception while getting body", e); | ||
return "Exception while getting body: " + e.getMessage(); | ||
} | ||
} | ||
|
||
private static String getHeaders(Headers headers) { | ||
return StreamSupport.stream(headers.spliterator(), false) | ||
.map(header -> formatHeader(header.component1(), header.component2())) | ||
.collect(joining(", ")); | ||
} | ||
|
||
private static String formatHeader(String headerKey, String headerValue) { | ||
if (COMMON_SECRET_HEADERS.contains(headerKey.toLowerCase())) { | ||
headerValue = maskSecretKey(headerValue); | ||
} | ||
return String.format("[%s: %s]", headerKey, headerValue); | ||
} | ||
|
||
private static String maskSecretKey(String key) { | ||
if (isNullOrBlank(key)) { | ||
return key; | ||
} | ||
|
||
if (key.length() >= 7) { | ||
return key.substring(0, 5) + "..." + key.substring(key.length() - 2); | ||
} else { | ||
return "..."; // to short to be masked | ||
} | ||
} | ||
} |
50 changes: 50 additions & 0 deletions
50
...e-searxng/src/main/java/dev/langchain4j/community/web/search/searxng/SearXNGResponse.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package dev.langchain4j.community.web.search.searxng; | ||
|
||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties; | ||
import com.fasterxml.jackson.databind.PropertyNamingStrategies.SnakeCaseStrategy; | ||
import com.fasterxml.jackson.databind.annotation.JsonNaming; | ||
|
||
import java.util.List; | ||
|
||
@JsonNaming(SnakeCaseStrategy.class) | ||
@JsonIgnoreProperties(ignoreUnknown = true) | ||
class SearXNGResponse { | ||
|
||
private String query; | ||
private long numberOfResults; | ||
private List<SearXNGResult> results; | ||
private List<String> answers; | ||
private List<String> corrections; | ||
private List<String> suggestions; | ||
private List<List<String>> unresponsiveEngines; | ||
// Skipping other returned fields like infoboxes for now | ||
|
||
public String getQuery() { | ||
return query; | ||
} | ||
|
||
public long getNumberOfResults() { | ||
return numberOfResults; | ||
} | ||
|
||
public List<SearXNGResult> getResults() { | ||
return results; | ||
} | ||
|
||
public List<String> getAnswers() { | ||
return answers; | ||
} | ||
|
||
public List<String> getCorrections() { | ||
return corrections; | ||
} | ||
|
||
public List<String> getSuggestions() { | ||
return suggestions; | ||
} | ||
|
||
public List<List<String>> getUnresponsiveEngines() { | ||
return unresponsiveEngines; | ||
} | ||
} | ||
|
42 changes: 42 additions & 0 deletions
42
.../java/dev/langchain4j/community/web/search/searxng/SearXNGResponseLoggingInterceptor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package dev.langchain4j.community.web.search.searxng; | ||
|
||
import okhttp3.Interceptor; | ||
import okhttp3.Request; | ||
import okhttp3.Response; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.IOException; | ||
|
||
class SearXNGResponseLoggingInterceptor implements Interceptor { | ||
|
||
private static final Logger log = LoggerFactory.getLogger(SearXNGResponseLoggingInterceptor.class); | ||
|
||
@Override | ||
public Response intercept(Chain chain) throws IOException { | ||
Request request = chain.request(); | ||
Response response = chain.proceed(request); | ||
this.log(response); | ||
return response; | ||
} | ||
|
||
private void log(Response response) { | ||
try { | ||
log.debug("Response:\n- status code: {}\n- headers: {}\n- body: {}", | ||
response.code(), response.headers(), this.getBody(response)); | ||
} catch (Exception e) { | ||
log.warn("Error while logging response: {}", e.getMessage()); | ||
} | ||
} | ||
|
||
private String getBody(Response response) throws IOException { | ||
return isEventStream(response) | ||
? "[skipping response body due to streaming]" | ||
: response.peekBody(Long.MAX_VALUE).string(); | ||
} | ||
|
||
private static boolean isEventStream(Response response) { | ||
String contentType = response.header("Content-Type"); | ||
return contentType != null && contentType.contains("event-stream"); | ||
} | ||
} |
Oops, something went wrong.