Skip to content

Commit

Permalink
[Feature] Integration with Searxng (#10)
Browse files Browse the repository at this point in the history
* Initial migration from langchain4j

* Removed lombok dependency

* Removed baseUrl method and added baseUrl argument to builder method as it is mandatory

* Some javadoc

* Removed unused constructors

* Added module to the bom

* Added support for language, safeSearch and startPage parameters

* Several changes due to PR discussion

* Changed back to no-arg builder()

* Moved ensureNotNull to constructor from build.  Also fixed missed issue in the integration test

* Use testcontainers for integration tests

* Added INDENT_OUTPUT

* Support for optionalParameters and additionalParams

* Renamed to optionalParams

* Testing for various additional parameter scenarios and some initial metadata result values added

* Removed some debugging code

* Added some additional metadata fields

* Support for logging requests and responses

* Less brittle check in should_search_with_additional_params() unit test

* reformat

---------

Co-authored-by: Martin7-1 <[email protected]>
  • Loading branch information
bnayfeh and Martin7-1 authored Nov 7, 2024
1 parent 9f4596e commit 1cb1967
Show file tree
Hide file tree
Showing 12 changed files with 3,256 additions and 0 deletions.
6 changes: 6 additions & 0 deletions langchain4j-community-bom/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@
<artifactId>langchain4j-community-clickhouse</artifactId>
<version>${project.version}</version>
</dependency>
<!-- web searchers -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-community-web-search-engine-searxng</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</dependencyManagement>

Expand Down
3 changes: 3 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
<!-- LangChain4j Community integration of embedding store -->
<module>langchain4j-community-clickhouse</module>

<!-- LangChain4j Community integration of web search engine -->
<module>web-search-engines/langchain4j-community-web-search-engine-searxng</module>

<!-- LangChain4j Community integration of model provider -->

</modules>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-community</artifactId>
<version>0.36.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

<artifactId>langchain4j-community-web-search-engine-searxng</artifactId>
<packaging>jar</packaging>

<name>LangChain4j :: Web Search Engine :: SearXNG</name>

<dependencies>

<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
</dependency>

<dependency>
<groupId>com.squareup.retrofit2</groupId>
<artifactId>retrofit</artifactId>
</dependency>

<dependency>
<groupId>com.squareup.retrofit2</groupId>
<artifactId>converter-jackson</artifactId>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>

<!-- DEPENDENCY CONFLICT RESOLUTION FOR OKHTTP (START) -->
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
</dependency>

<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib-jdk8</artifactId>
</dependency>
<!-- DEPENDENCY CONFLICT RESOLUTION FOR OKHTTP (END) -->

<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>

<!-- Visibility for WebSearchEngineIT -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
<classifier>tests</classifier>
<type>test-jar</type>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>testcontainers</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>

</dependencies>

<licenses>
<license>
<name>Apache-2.0</name>
<url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution>
<comments>A business-friendly OSS license</comments>
</license>
</licenses>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package dev.langchain4j.community.web.search.searxng;

import retrofit2.Call;
import retrofit2.http.GET;
import retrofit2.http.Headers;
import retrofit2.http.QueryMap;

import java.util.Map;

interface SearXNGApi {

@GET("search")
@Headers({"Content-Type: application/json"})
Call<SearXNGResponse> search(@QueryMap Map<String, Object> params);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package dev.langchain4j.community.web.search.searxng;

import com.fasterxml.jackson.databind.ObjectMapper;
import dev.langchain4j.web.search.WebSearchRequest;
import okhttp3.OkHttpClient;
import retrofit2.Response;
import retrofit2.Retrofit;
import retrofit2.converter.jackson.JacksonConverterFactory;

import java.io.IOException;
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;

import static com.fasterxml.jackson.databind.SerializationFeature.INDENT_OUTPUT;

class SearXNGClient {

private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper().enable(INDENT_OUTPUT);
private final SearXNGApi api;
private final Map<String, Object> optionalParams;

public SearXNGClient(String baseUrl, Duration timeout, boolean logRequests, boolean logResponses, Map<String, Object> optionalParams) {
this.optionalParams = optionalParams;
OkHttpClient.Builder okHttpClientBuilder = new OkHttpClient.Builder()
.callTimeout(timeout)
.connectTimeout(timeout)
.readTimeout(timeout)
.writeTimeout(timeout);
if (logRequests) {
okHttpClientBuilder.addInterceptor(new SearXNGRequestLoggingInterceptor());
}
if (logResponses) {
okHttpClientBuilder.addInterceptor(new SearXNGResponseLoggingInterceptor());
}
Retrofit retrofit = new Retrofit.Builder()
.baseUrl(baseUrl)
.client(okHttpClientBuilder.build())
.addConverterFactory(JacksonConverterFactory.create(OBJECT_MAPPER))
.build();
this.api = retrofit.create(SearXNGApi.class);
}

SearXNGResponse search(WebSearchRequest request) {
try {
final Map<String, Object> args = new HashMap<>();
if (optionalParams != null) {
args.putAll(optionalParams);
}
if (request.additionalParams() != null) {
args.putAll(request.additionalParams());
}
args.put("q", request.searchTerms());
args.put("format", "json");
// Only consider explicit safesearch requests, otherwise keep the default
if (request.safeSearch() != null) {
if (request.safeSearch()) {
// Set to strict as opposed to moderate
args.put("safesearch", 2);
} else {
args.put("safesearch", 0);
}
}
if (request.startPage() != null) {
args.put("pageno", request.startPage());
}
if (request.language() != null) {
args.put("language", request.language());
}
final Response<SearXNGResponse> response = api.search(args).execute();
return response.body();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package dev.langchain4j.community.web.search.searxng;

import okhttp3.Headers;
import okhttp3.Interceptor;
import okhttp3.Request;
import okhttp3.Response;
import okio.Buffer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.StreamSupport;

import static dev.langchain4j.internal.Utils.isNullOrBlank;
import static java.util.Arrays.asList;
import static java.util.stream.Collectors.joining;

class SearXNGRequestLoggingInterceptor implements Interceptor {

private static final Logger log = LoggerFactory.getLogger(SearXNGRequestLoggingInterceptor.class);

private static final Set<String> COMMON_SECRET_HEADERS =
new HashSet<>(asList("authorization", "x-api-key", "x-auth-token"));

@Override
public Response intercept(Chain chain) throws IOException {
Request request = chain.request();
this.log(request);
return chain.proceed(request);
}

private void log(Request request) {
try {
log.debug("Request:\n- method: {}\n- url: {}\n- headers: {}\n- body: {}",
request.method(), request.url(), getHeaders(request.headers()), getBody(request));
} catch (Exception e) {
log.warn("Error while logging request: {}", e.getMessage());
}
}

private static String getBody(Request request) {
try (Buffer buffer = new Buffer()) {
if (request.body() == null) {
return "";
}
request.body().writeTo(buffer);
return buffer.readUtf8();
} catch (Exception e) {
log.warn("Exception while getting body", e);
return "Exception while getting body: " + e.getMessage();
}
}

private static String getHeaders(Headers headers) {
return StreamSupport.stream(headers.spliterator(), false)
.map(header -> formatHeader(header.component1(), header.component2()))
.collect(joining(", "));
}

private static String formatHeader(String headerKey, String headerValue) {
if (COMMON_SECRET_HEADERS.contains(headerKey.toLowerCase())) {
headerValue = maskSecretKey(headerValue);
}
return String.format("[%s: %s]", headerKey, headerValue);
}

private static String maskSecretKey(String key) {
if (isNullOrBlank(key)) {
return key;
}

if (key.length() >= 7) {
return key.substring(0, 5) + "..." + key.substring(key.length() - 2);
} else {
return "..."; // to short to be masked
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package dev.langchain4j.community.web.search.searxng;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.databind.PropertyNamingStrategies.SnakeCaseStrategy;
import com.fasterxml.jackson.databind.annotation.JsonNaming;

import java.util.List;

@JsonNaming(SnakeCaseStrategy.class)
@JsonIgnoreProperties(ignoreUnknown = true)
class SearXNGResponse {

private String query;
private long numberOfResults;
private List<SearXNGResult> results;
private List<String> answers;
private List<String> corrections;
private List<String> suggestions;
private List<List<String>> unresponsiveEngines;
// Skipping other returned fields like infoboxes for now

public String getQuery() {
return query;
}

public long getNumberOfResults() {
return numberOfResults;
}

public List<SearXNGResult> getResults() {
return results;
}

public List<String> getAnswers() {
return answers;
}

public List<String> getCorrections() {
return corrections;
}

public List<String> getSuggestions() {
return suggestions;
}

public List<List<String>> getUnresponsiveEngines() {
return unresponsiveEngines;
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package dev.langchain4j.community.web.search.searxng;

import okhttp3.Interceptor;
import okhttp3.Request;
import okhttp3.Response;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

class SearXNGResponseLoggingInterceptor implements Interceptor {

private static final Logger log = LoggerFactory.getLogger(SearXNGResponseLoggingInterceptor.class);

@Override
public Response intercept(Chain chain) throws IOException {
Request request = chain.request();
Response response = chain.proceed(request);
this.log(response);
return response;
}

private void log(Response response) {
try {
log.debug("Response:\n- status code: {}\n- headers: {}\n- body: {}",
response.code(), response.headers(), this.getBody(response));
} catch (Exception e) {
log.warn("Error while logging response: {}", e.getMessage());
}
}

private String getBody(Response response) throws IOException {
return isEventStream(response)
? "[skipping response body due to streaming]"
: response.peekBody(Long.MAX_VALUE).string();
}

private static boolean isEventStream(Response response) {
String contentType = response.header("Content-Type");
return contentType != null && contentType.contains("event-stream");
}
}
Loading

0 comments on commit 1cb1967

Please sign in to comment.