diff --git a/README.md b/README.md index fe8aeaaa..b705275b 100644 --- a/README.md +++ b/README.md @@ -60,9 +60,9 @@ gzcat < gse.sql.gz | mysql -u gseadmin -pLugano2020 gse ### Server -Before attempting to run the server, I advise you generate your own GitHub personal access token (PAT). -Said token should include the `repo` scope, in order for it to effectively crawl the GitHub API. -While the token is not mandatory, the impact its presence has on the mining speed can not be understated. +Before attempting to run the server, you must generate your own GitHub personal access token (PAT). +GHS relies on the GraphQL API, which is inaccessible without authentication. +The token must include the `repo` scope, in order for it to access the information present in the GitHub API. Once that is done, you can run the server locally using Maven: diff --git a/pom.xml b/pom.xml index 6237856d..583de70a 100644 --- a/pom.xml +++ b/pom.xml @@ -115,6 +115,14 @@ org.springframework.boot spring-boot-starter-web + + org.springframework.boot + spring-boot-starter-webflux + + + org.springframework.boot + spring-boot-starter-graphql + org.springframework.boot spring-boot-starter-aop diff --git a/src/main/java/usi/si/seart/config/GraphQlConfig.java b/src/main/java/usi/si/seart/config/GraphQlConfig.java new file mode 100644 index 00000000..661185c9 --- /dev/null +++ b/src/main/java/usi/si/seart/config/GraphQlConfig.java @@ -0,0 +1,57 @@ +package usi.si.seart.config; + +import lombok.AllArgsConstructor; +import org.jetbrains.annotations.NotNull; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.graphql.client.GraphQlClient; +import org.springframework.graphql.client.HttpGraphQlClient; +import org.springframework.web.reactive.function.client.ClientRequest; +import org.springframework.web.reactive.function.client.ClientResponse; +import org.springframework.web.reactive.function.client.ExchangeFilterFunction; +import org.springframework.web.reactive.function.client.ExchangeFunction; +import org.springframework.web.reactive.function.client.WebClient; +import reactor.core.publisher.Mono; +import usi.si.seart.github.Endpoint; +import usi.si.seart.github.GitHubTokenManager; + +@Configuration +@AllArgsConstructor(onConstructor_ = @Autowired) +public class GraphQlConfig { + + GitHubTokenManager gitHubTokenManager; + + @Bean + public GraphQlClient graphQlClient() { + return HttpGraphQlClient.create(webClient()); + } + + @Bean + public WebClient webClient() { + return WebClient.builder() + .baseUrl(Endpoint.GRAPH_QL.toString()) + .defaultHeader("X-GitHub-Api-Version", "2022-11-28") + .filter(exchangeFilterFunction()) + .build(); + } + + @Bean + public ExchangeFilterFunction exchangeFilterFunction() { + return new ExchangeFilterFunction() { + + @NotNull + @Override + public Mono filter(@NotNull ClientRequest original, @NotNull ExchangeFunction next) { + ClientRequest modified = ClientRequest.from(original) + .headers(headers -> { + String token = gitHubTokenManager.getCurrentToken(); + if (token != null) + headers.setBearerAuth(token); + }) + .build(); + return next.exchange(modified); + } + }; + } +} diff --git a/src/main/java/usi/si/seart/converter/JsonObjectToErrorResponseConverter.java b/src/main/java/usi/si/seart/converter/JsonObjectToErrorResponseConverter.java index 561bc4da..7afcc905 100644 --- a/src/main/java/usi/si/seart/converter/JsonObjectToErrorResponseConverter.java +++ b/src/main/java/usi/si/seart/converter/JsonObjectToErrorResponseConverter.java @@ -4,19 +4,19 @@ import com.google.gson.JsonObject; import org.springframework.core.convert.converter.Converter; import org.springframework.lang.NonNull; -import usi.si.seart.github.ErrorResponse; +import usi.si.seart.github.RestErrorResponse; import java.net.MalformedURLException; import java.net.URL; import java.util.Optional; import java.util.stream.StreamSupport; -public class JsonObjectToErrorResponseConverter implements Converter { +public class JsonObjectToErrorResponseConverter implements Converter { @Override @NonNull - public ErrorResponse convert(@NonNull JsonObject source) { - ErrorResponse.ErrorResponseBuilder builder = ErrorResponse.builder(); + public RestErrorResponse convert(@NonNull JsonObject source) { + RestErrorResponse.RestErrorResponseBuilder builder = RestErrorResponse.builder(); builder.message(source.get("message").getAsString()); @@ -48,7 +48,7 @@ public ErrorResponse convert(@NonNull JsonObject source) { String codeName = Optional.ofNullable(object.get("code")) .map(JsonElement::getAsString) .orElse(null); - return new ErrorResponse.Error(resource, field, codeName, message); + return new RestErrorResponse.Error(resource, field, codeName, message); }).forEach(builder::error); return builder.build(); diff --git a/src/main/java/usi/si/seart/converter/JsonObjectToGraphQlErrorResponse.java b/src/main/java/usi/si/seart/converter/JsonObjectToGraphQlErrorResponse.java new file mode 100644 index 00000000..dc2eb5e0 --- /dev/null +++ b/src/main/java/usi/si/seart/converter/JsonObjectToGraphQlErrorResponse.java @@ -0,0 +1,66 @@ +package usi.si.seart.converter; + +import com.google.common.reflect.TypeToken; +import com.google.gson.Gson; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import graphql.ErrorClassification; +import graphql.language.SourceLocation; +import lombok.AllArgsConstructor; +import org.jetbrains.annotations.NotNull; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.convert.converter.Converter; +import org.springframework.stereotype.Component; +import usi.si.seart.github.GraphQlErrorResponse; + +import java.lang.reflect.Type; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +@Component +@AllArgsConstructor(onConstructor_ = @Autowired) +public class JsonObjectToGraphQlErrorResponse implements Converter { + + Gson gson; + JsonObjectToSourceLocationConverter sourceLocationConverter; + StringToGraphQlErrorResponseErrorTypeConverter errorTypeConverter; + + @Override + @NotNull + public GraphQlErrorResponse convert(@NotNull JsonObject source) { + GraphQlErrorResponse.GraphQlErrorResponseBuilder builder = GraphQlErrorResponse.builder(); + + String message = source.getAsJsonPrimitive("message").getAsString(); + builder.message(message); + ErrorClassification errorType = errorTypeConverter.convert(message); + builder.errorType(errorType); + + if (source.has("path")) { + JsonArray array = source.getAsJsonArray("path"); + Type type = new TypeToken>() { }.getType(); + List parsedPath = gson.fromJson(array, type); + builder.parsedPath(parsedPath); + } + + if (source.has("locations")) { + JsonArray array = source.getAsJsonArray("locations"); + List locations = StreamSupport.stream(array.spliterator(), true) + .map(JsonElement::getAsJsonObject) + .map(sourceLocationConverter::convert) + .collect(Collectors.toList()); + builder.locations(locations); + } + + if (source.has("extensions")) { + JsonObject object = source.getAsJsonObject("extensions"); + Type type = new TypeToken>() { }.getType(); + Map extensions = gson.fromJson(object, type); + builder.extensions(extensions); + } + + return builder.build(); + } +} diff --git a/src/main/java/usi/si/seart/converter/JsonObjectToSourceLocationConverter.java b/src/main/java/usi/si/seart/converter/JsonObjectToSourceLocationConverter.java new file mode 100644 index 00000000..0df061c4 --- /dev/null +++ b/src/main/java/usi/si/seart/converter/JsonObjectToSourceLocationConverter.java @@ -0,0 +1,23 @@ +package usi.si.seart.converter; + +import com.google.gson.Gson; +import com.google.gson.JsonObject; +import graphql.language.SourceLocation; +import lombok.AllArgsConstructor; +import org.jetbrains.annotations.NotNull; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.convert.converter.Converter; +import org.springframework.stereotype.Component; + +@Component +@AllArgsConstructor(onConstructor_ = @Autowired) +public class JsonObjectToSourceLocationConverter implements Converter { + + private final Gson gson; + + @Override + @NotNull + public SourceLocation convert(@NotNull JsonObject source) { + return gson.fromJson(source, SourceLocation.class); + } +} diff --git a/src/main/java/usi/si/seart/converter/ListToJsonArrayConverter.java b/src/main/java/usi/si/seart/converter/ListToJsonArrayConverter.java new file mode 100644 index 00000000..6fcc1b1a --- /dev/null +++ b/src/main/java/usi/si/seart/converter/ListToJsonArrayConverter.java @@ -0,0 +1,25 @@ +package usi.si.seart.converter; + +import com.google.gson.Gson; +import com.google.gson.JsonArray; +import lombok.AllArgsConstructor; +import org.jetbrains.annotations.NotNull; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.convert.converter.Converter; +import org.springframework.stereotype.Component; + +import java.util.List; + +@Component +@AllArgsConstructor(onConstructor_ = @Autowired) +public class ListToJsonArrayConverter implements Converter, JsonArray> { + + private final Gson gson; + + @Override + @NotNull + public JsonArray convert(@NotNull List source) { + String string = gson.toJson(source); + return gson.fromJson(string, JsonArray.class); + } +} diff --git a/src/main/java/usi/si/seart/converter/MapToJsonObjectConverter.java b/src/main/java/usi/si/seart/converter/MapToJsonObjectConverter.java new file mode 100644 index 00000000..3cef4895 --- /dev/null +++ b/src/main/java/usi/si/seart/converter/MapToJsonObjectConverter.java @@ -0,0 +1,25 @@ +package usi.si.seart.converter; + +import com.google.gson.Gson; +import com.google.gson.JsonObject; +import lombok.AllArgsConstructor; +import org.jetbrains.annotations.NotNull; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.convert.converter.Converter; +import org.springframework.stereotype.Component; + +import java.util.Map; + +@Component +@AllArgsConstructor(onConstructor_ = @Autowired) +public class MapToJsonObjectConverter implements Converter, JsonObject> { + + private final Gson gson; + + @Override + @NotNull + public JsonObject convert(@NotNull Map source) { + String string = gson.toJson(source); + return gson.fromJson(string, JsonObject.class); + } +} diff --git a/src/main/java/usi/si/seart/converter/StringToGraphQlErrorResponseErrorTypeConverter.java b/src/main/java/usi/si/seart/converter/StringToGraphQlErrorResponseErrorTypeConverter.java new file mode 100644 index 00000000..a5894bbf --- /dev/null +++ b/src/main/java/usi/si/seart/converter/StringToGraphQlErrorResponseErrorTypeConverter.java @@ -0,0 +1,81 @@ +package usi.si.seart.converter; + +import org.jetbrains.annotations.NotNull; +import org.springframework.core.convert.converter.Converter; +import org.springframework.lang.Nullable; +import org.springframework.stereotype.Component; +import usi.si.seart.github.GraphQlErrorResponse; + +import java.util.regex.Pattern; + +@Component +public class StringToGraphQlErrorResponseErrorTypeConverter implements Converter +{ + // https://www.debuggex.com/r/kzZ6PfNvkQLAg3qG + private static final Pattern RATE_LIMITED_PATTERN = Pattern.compile( + "^API rate limit exceeded for user ID (\\d+)\\.$" + ); + + // https://www.debuggex.com/r/1unTNcRjYb3M8TrH + private static final Pattern FIELD_ERROR_PATTERN = Pattern.compile( + "^Field '([^']+)' doesn't exist on type '([^']+)'$" + ); + + // https://www.debuggex.com/r/yK-tNA559lGPF0aL + private static final Pattern NOT_FOUND_PATTERN = Pattern.compile( + "^Could not resolve to an? (\\w+) with the (\\w+) (?:of )?'([^']+)'\\.$" + ); + + // https://www.debuggex.com/r/6BjTNEeyleQXfBjx + private static final Pattern ARGUMENT_TYPE_ERROR_PATTERN = Pattern.compile( + "^Argument '([^']+)' on Field '([^']+)' has an invalid value \\(([^)]+)\\). Expected type '([^']+)'\\.$" + ); + + // https://www.debuggex.com/r/mcZ2rcy61FSqVpBO + private static final Pattern ARGUMENT_MISSING_ERROR_PATTERN = Pattern.compile( + "^Field '([^']+)' is missing required arguments?: (\\w+(?:,\\s*\\w+)*)$" + ); + + // https://www.debuggex.com/r/emiQbdZlfCYhAlhi + private static final Pattern ARGUMENT_UNKNOWN_ERROR_PATTERN = Pattern.compile( + "^Field '([^']+)' doesn't accept argument '([^']+)'$" + ); + + // https://www.debuggex.com/r/_WAiksCCoBiSq9nw + private static final Pattern VARIABLE_VALUE_ERROR_PATTERN = Pattern.compile( + "^Variable \\$(\\w+) of type (\\w+!?) was provided invalid value$" + ); + + // https://www.debuggex.com/r/OqQixKpVA8MogqMH + private static final Pattern VARIABLE_UNUSED_ERROR_PATTERN = Pattern.compile( + "^Variable \\$(\\w+) is declared by (\\w+)(?: query)? but not used$" + ); + + @Override + @Nullable + public GraphQlErrorResponse.ErrorType convert(@NotNull String source) { + if (source.equals("A query attribute must be specified and must be a string.")) + return GraphQlErrorResponse.ErrorType.EMPTY_QUERY; + if (source.equals("Unexpected end of document")) + return GraphQlErrorResponse.ErrorType.EARLY_EOF; + if (source.startsWith("Parse error")) + return GraphQlErrorResponse.ErrorType.PARSE_ERROR; + if (RATE_LIMITED_PATTERN.matcher(source).matches()) + return GraphQlErrorResponse.ErrorType.RATE_LIMITED; + if (FIELD_ERROR_PATTERN.matcher(source).matches()) + return GraphQlErrorResponse.ErrorType.FIELD_ERROR; + if (NOT_FOUND_PATTERN.matcher(source).matches()) + return GraphQlErrorResponse.ErrorType.NOT_FOUND; + if (ARGUMENT_TYPE_ERROR_PATTERN.matcher(source).matches()) + return GraphQlErrorResponse.ErrorType.ARGUMENT_TYPE_ERROR; + if (ARGUMENT_MISSING_ERROR_PATTERN.matcher(source).matches()) + return GraphQlErrorResponse.ErrorType.ARGUMENT_MISSING_ERROR; + if (ARGUMENT_UNKNOWN_ERROR_PATTERN.matcher(source).matches()) + return GraphQlErrorResponse.ErrorType.ARGUMENT_UNKNOWN_ERROR; + if (VARIABLE_VALUE_ERROR_PATTERN.matcher(source).matches()) + return GraphQlErrorResponse.ErrorType.VARIABLE_VALUE_ERROR; + if (VARIABLE_UNUSED_ERROR_PATTERN.matcher(source).matches()) + return GraphQlErrorResponse.ErrorType.VARIABLE_UNUSED_ERROR; + return null; + } +} diff --git a/src/main/java/usi/si/seart/exception/github/GitHubAPIException.java b/src/main/java/usi/si/seart/exception/github/GitHubAPIException.java index edbdc00a..60aa19d5 100644 --- a/src/main/java/usi/si/seart/exception/github/GitHubAPIException.java +++ b/src/main/java/usi/si/seart/exception/github/GitHubAPIException.java @@ -1,12 +1,12 @@ package usi.si.seart.exception.github; import lombok.experimental.StandardException; -import usi.si.seart.github.ErrorResponse; +import usi.si.seart.github.RestErrorResponse; @StandardException public class GitHubAPIException extends RuntimeException { - public GitHubAPIException(ErrorResponse errorResponse) { + public GitHubAPIException(RestErrorResponse errorResponse) { this(errorResponse.toString()); } } diff --git a/src/main/java/usi/si/seart/github/GitHubAPIConnector.java b/src/main/java/usi/si/seart/github/GitHubAPIConnector.java index 2fd88e9a..3808ccef 100644 --- a/src/main/java/usi/si/seart/github/GitHubAPIConnector.java +++ b/src/main/java/usi/si/seart/github/GitHubAPIConnector.java @@ -22,6 +22,8 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.core.convert.ConversionService; +import org.springframework.graphql.GraphQlResponse; +import org.springframework.graphql.client.GraphQlClient; import org.springframework.http.HttpHeaders; import org.springframework.http.HttpStatus; import org.springframework.retry.RetryCallback; @@ -38,9 +40,12 @@ import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.Date; +import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.concurrent.TimeUnit; +import java.util.stream.StreamSupport; @SuppressWarnings("ConstantConditions") @Slf4j @@ -53,7 +58,9 @@ public class GitHubAPIConnector { @Value("${app.crawl.minimum-stars}") Integer minimumStars; - OkHttpClient client; + OkHttpClient httpClient; + + GraphQlClient graphQlClient; RetryTemplate retryTemplate; @@ -86,10 +93,8 @@ public JsonObject searchRepositories(String language, Range range, Integer return result.getJsonObject(); } - public JsonObject fetchRepoInfo(String name) { - URL url = Endpoint.REPOSITORY.toURL(name.split("/")); - FetchCallback.Result result = fetch(url); + GraphQLCallback.Result result = fetch(name, "repository"); return result.getJsonObject(); } @@ -291,6 +296,19 @@ public JsonArray fetchRepoTopics(String name) { return array; } + private GraphQLCallback.Result fetch(String name, String document) { + String[] args = name.split("/"); + if (args.length != 2) + throw new IllegalArgumentException("Invalid repository name: " + name); + Map variables = Map.of("owner", args[0], "name", args[1]); + try { + return retryTemplate.execute(new GraphQLCallback(document, variables)); + } catch (Exception ex) { + String message = String.format("GraphQL request to %s failed", name); + throw new GitHubAPIException(message, ex); + } + } + private FetchCallback.Result fetch(URL url) { try { return retryTemplate.execute(new FetchCallback(url)); @@ -300,6 +318,76 @@ private FetchCallback.Result fetch(URL url) { } } + @Getter + @AllArgsConstructor(access = AccessLevel.PROTECTED) + @FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true) + private abstract static class Result { + + JsonElement jsonElement; + + public JsonObject getJsonObject() { + return jsonElement.getAsJsonObject(); + } + + public JsonArray getJsonArray() { + return jsonElement.getAsJsonArray(); + } + + public Optional size() { + if (jsonElement.isJsonArray()) { + return Optional.of(jsonElement.getAsJsonArray().size()); + } else if (jsonElement.isJsonObject()) { + return Optional.of(jsonElement.getAsJsonObject().size()); + } else { + return Optional.empty(); + } + } + } + + @AllArgsConstructor(access = AccessLevel.PRIVATE) + @FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true) + private class GraphQLCallback implements RetryCallback { + + String document; + Map variables; + + @Getter + @FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true) + private class Result extends GitHubAPIConnector.Result { + + private Result(JsonElement jsonElement) { + super(jsonElement); + } + } + + @Override + public GraphQLCallback.Result doWithRetry(RetryContext context) { + GraphQlResponse response = graphQlClient.documentName(document) + .variables(variables) + .execute() + .block(); + Map map = response.toMap(); + JsonObject data = conversionService.convert(map.getOrDefault("data", Map.of()), JsonObject.class); + JsonArray errors = conversionService.convert(map.getOrDefault("errors", List.of()), JsonArray.class); + StreamSupport.stream(errors.spliterator(), true) + .map(JsonElement::getAsJsonObject) + .findFirst() + .map(json -> conversionService.convert(json, GraphQlErrorResponse.class)) + .ifPresent(errorResponse -> { + String name = Objects.toString(errorResponse.getErrorType(), null); + try { + GraphQlErrorResponse.ErrorType errorType = GraphQlErrorResponse.ErrorType.valueOf(name); + if (GraphQlErrorResponse.ErrorType.RATE_LIMITED.equals(errorType)) + gitHubTokenManager.replaceTokenIfExpired(); + } catch (RuntimeException ignored) { + } + throw errorResponse.asException(); + }); + JsonObject repository = data.getAsJsonObject("repository"); + return new Result(repository); + } + } + @AllArgsConstructor(access = AccessLevel.PRIVATE) @FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true) private class FetchCallback implements RetryCallback { @@ -307,43 +395,29 @@ private class FetchCallback implements RetryCallback size() { - if (jsonElement.isJsonArray()) { - return Optional.of(jsonElement.getAsJsonArray().size()); - } else if (jsonElement.isJsonObject()) { - return Optional.of(jsonElement.getAsJsonObject().size()); - } else { - return Optional.empty(); - } + private Result(JsonElement jsonElement, HttpStatus status, Headers headers) { + super(jsonElement); + this.status = status; + this.headers = headers; } } @Override @SuppressWarnings("resource") - public FetchCallback.Result doWithRetry(RetryContext context) throws Exception { + public Result doWithRetry(RetryContext context) throws Exception { Request.Builder builder = new Request.Builder(); builder.url(url); String currentToken = gitHubTokenManager.getCurrentToken(); if (currentToken != null) builder.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + currentToken); Request request = builder.build(); - Response response = client.newCall(request).execute(); + Response response = httpClient.newCall(request).execute(); HttpStatus status = HttpStatus.valueOf(response.code()); HttpStatus.Series series = status.series(); @@ -353,10 +427,10 @@ public FetchCallback.Result doWithRetry(RetryContext context) throws Exception { switch (series) { case SUCCESSFUL: - return new Result(status, headers, element); + return new Result(element, status, headers); case INFORMATIONAL: case REDIRECTION: - return new Result(status, headers, JsonNull.INSTANCE); + return new Result(JsonNull.INSTANCE, status, headers); case CLIENT_ERROR: return handleClientError(status, headers, element.getAsJsonObject()); case SERVER_ERROR: @@ -367,17 +441,17 @@ public FetchCallback.Result doWithRetry(RetryContext context) throws Exception { throw new IllegalStateException("This line should never be reached"); } - private FetchCallback.Result handleServerError(HttpStatus status, JsonObject json) { + private Result handleServerError(HttpStatus status, JsonObject json) { GitHubAPIConnector.log.error("Server Error: {} [{}]", status.value(), status.getReasonPhrase()); - ErrorResponse errorResponse = conversionService.convert(json, ErrorResponse.class); + RestErrorResponse errorResponse = conversionService.convert(json, RestErrorResponse.class); throw new HttpServerErrorException(status, errorResponse.getMessage()); } @SuppressWarnings("java:S128") - private FetchCallback.Result handleClientError( + private Result handleClientError( HttpStatus status, Headers headers, JsonObject json ) throws InterruptedException { - ErrorResponse errorResponse = conversionService.convert(json, ErrorResponse.class); + RestErrorResponse errorResponse = conversionService.convert(json, RestErrorResponse.class); switch (status) { case UNAUTHORIZED: /* @@ -415,7 +489,7 @@ private FetchCallback.Result handleClientError( * Case (2) encountered, so we propagate error upwards * @see fetchLastPageNumberFromHeader */ - return new Result(status, headers, json); + return new Result(json, status, headers); } default: // TODO: 30.07.23 Add any other special logic here diff --git a/src/main/java/usi/si/seart/github/GitHubTokenManager.java b/src/main/java/usi/si/seart/github/GitHubTokenManager.java index 33da6976..e6e8b7fd 100644 --- a/src/main/java/usi/si/seart/github/GitHubTokenManager.java +++ b/src/main/java/usi/si/seart/github/GitHubTokenManager.java @@ -71,7 +71,7 @@ void postConstruct() { int size = tokens.size(); switch (size) { case 0: - log.warn("Access tokens not specified, GitHub API mining will be performed at a much slower rate!"); + log.warn("Access tokens not specified, can not mine the GitHub API!"); log.info( "Generate a new access token on https://github.com/settings/tokens " + "and add it to the `app.crawl.tokens` property in `application.properties`!" diff --git a/src/main/java/usi/si/seart/github/GraphQlErrorResponse.java b/src/main/java/usi/si/seart/github/GraphQlErrorResponse.java new file mode 100644 index 00000000..25e16284 --- /dev/null +++ b/src/main/java/usi/si/seart/github/GraphQlErrorResponse.java @@ -0,0 +1,127 @@ +package usi.si.seart.github; + +import graphql.ErrorClassification; +import graphql.GraphqlErrorException; +import graphql.language.SourceLocation; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.experimental.FieldDefaults; +import org.jetbrains.annotations.NotNull; +import org.springframework.graphql.ResponseError; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * This class is designed to handle GraphQL error information + * and provide utility methods to convert and manipulate error data. + * + * @see ResponseError + * @author Ozren Dabić + */ +@Builder +@Getter(onMethod_ = @Override) +@AllArgsConstructor(access = AccessLevel.PRIVATE) +@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true) +public class GraphQlErrorResponse implements ResponseError { + + String message; + ErrorClassification errorType; + List parsedPath; + List locations; + Map extensions; + + @NotNull + @Override + public String getPath() { + return Stream.ofNullable(parsedPath) + .flatMap(Collection::stream) + .map(Object::toString) + .collect(Collectors.joining(".")); + } + + /** + * Convert this GraphQlErrorResponse into a generic {@link GraphqlErrorException}. + * + * @return a new instance of {@link GraphqlErrorException}. + */ + public GraphqlErrorException asException() { + return GraphqlErrorException.newErrorException() + .errorClassification(errorType) + .sourceLocations(locations) + .extensions(extensions) + .path(parsedPath) + .message(message) + .build(); + } + + /** + * Represents error types specific to GitHub. + * This list is not exhaustive, and consists only + * of cases that we have manually inspected. + * + * @see GitHub GraphQL Error Response Examples + */ + public enum ErrorType implements ErrorClassification { + + /** + * @see Example + */ + RATE_LIMITED, + + /** + * @see Example + */ + EMPTY_QUERY, + + /** + * @see Example + */ + EARLY_EOF, + + /** + * @see Example + */ + PARSE_ERROR, + + /** + * @see Example + */ + FIELD_ERROR, + + /** + * @see Example + */ + NOT_FOUND, + + /** + * @see Example + */ + ARGUMENT_TYPE_ERROR, + + /** + * @see Example + */ + ARGUMENT_MISSING_ERROR, + + /** + * @see Example + */ + ARGUMENT_UNKNOWN_ERROR, + + /** + * @see Example + */ + VARIABLE_VALUE_ERROR, + + /** + * @see Example + */ + VARIABLE_UNUSED_ERROR, + } +} diff --git a/src/main/java/usi/si/seart/github/ErrorResponse.java b/src/main/java/usi/si/seart/github/RestErrorResponse.java similarity index 98% rename from src/main/java/usi/si/seart/github/ErrorResponse.java rename to src/main/java/usi/si/seart/github/RestErrorResponse.java index cfb7ccc2..70ef02f7 100644 --- a/src/main/java/usi/si/seart/github/ErrorResponse.java +++ b/src/main/java/usi/si/seart/github/RestErrorResponse.java @@ -18,7 +18,7 @@ @NoArgsConstructor(access = AccessLevel.PRIVATE) @AllArgsConstructor(access = AccessLevel.PRIVATE) @FieldDefaults(level = AccessLevel.PRIVATE) -public class ErrorResponse { +public class RestErrorResponse { String message; diff --git a/src/main/java/usi/si/seart/job/CrawlProjectsJob.java b/src/main/java/usi/si/seart/job/CrawlProjectsJob.java index 0a47610e..996483c3 100644 --- a/src/main/java/usi/si/seart/job/CrawlProjectsJob.java +++ b/src/main/java/usi/si/seart/job/CrawlProjectsJob.java @@ -20,7 +20,6 @@ import usi.si.seart.collection.Ranges; import usi.si.seart.exception.MetadataCrawlingException; import usi.si.seart.exception.UnsplittableRangeException; -import usi.si.seart.git.Commit; import usi.si.seart.github.GitHubAPIConnector; import usi.si.seart.model.GitRepo; import usi.si.seart.model.Label; @@ -50,7 +49,9 @@ @Job @Slf4j @DependsOn("LanguageInitializationBean") -@ConditionalOnExpression(value = "${app.crawl.enabled:false} and not '${app.crawl.languages}'.isBlank()") +@ConditionalOnExpression(value = + "${app.crawl.enabled:false} and not '${app.crawl.languages}'.isBlank() and not '${app.crawl.tokens}'.isBlank()" +) @RequiredArgsConstructor(onConstructor_ = @Autowired) @FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true) public class CrawlProjectsJob implements Runnable { @@ -218,11 +219,22 @@ private void saveRetrievedRepo(JsonObject result, String language, int lowerInde : "Saving: "; log.info("{}{} [{}/{}]", action, name, lowerIndex, total); + gitRepo.setCreatedAt(createdAt); + gitRepo.setPushedAt(pushedAt); + gitRepo.setUpdatedAt(updatedAt); + try { JsonObject json = gitHubApiConnector.fetchRepoInfo(name); - String defaultBranch = json.get("default_branch").getAsString(); - gitRepo.setDefaultBranch(defaultBranch); + Long size = json.getAsJsonPrimitive("size").getAsLong(); + gitRepo.setSize(size); + + String homepage = (!json.get("homepage").isJsonNull()) + ? json.getAsJsonPrimitive("homepage") + .getAsString() + .trim() + : null; + gitRepo.setHomepage(Strings.emptyToNull(homepage)); String license = (!json.get("license").isJsonNull()) ? json.getAsJsonObject("license") @@ -232,83 +244,119 @@ private void saveRetrievedRepo(JsonObject result, String language, int lowerInde : null; gitRepo.setLicense(license); - String homepage = (!json.get("homepage").isJsonNull()) - ? json.getAsJsonPrimitive("homepage") - .getAsString() - .trim() - : null; - gitRepo.setHomepage(Strings.emptyToNull(homepage)); - - Long stargazers = json.getAsJsonPrimitive("stargazers_count").getAsLong(); - gitRepo.setStargazers(stargazers); - Long forks = json.getAsJsonPrimitive("forks_count").getAsLong(); + Long forks = json.getAsJsonPrimitive("forks").getAsLong(); gitRepo.setForks(forks); - Long watchers = json.getAsJsonPrimitive("subscribers_count").getAsLong(); - gitRepo.setWatchers(watchers); - Long size = json.getAsJsonPrimitive("size").getAsLong(); - gitRepo.setSize(size); - gitRepo.setCreatedAt(createdAt); - gitRepo.setPushedAt(pushedAt); - gitRepo.setUpdatedAt(updatedAt); - - boolean hasWiki = json.getAsJsonPrimitive("has_wiki").getAsBoolean(); + Boolean hasWiki = json.getAsJsonPrimitive("has_wiki").getAsBoolean(); gitRepo.setHasWiki(hasWiki); - Boolean isFork = json.getAsJsonPrimitive("fork").getAsBoolean(); + Boolean isFork = json.getAsJsonPrimitive("is_fork").getAsBoolean(); gitRepo.setIsFork(isFork); - Boolean isArchived = json.getAsJsonPrimitive("archived").getAsBoolean(); + Boolean isArchived = json.getAsJsonPrimitive("is_archived").getAsBoolean(); gitRepo.setIsArchived(isArchived); - Long commits = gitHubApiConnector.fetchNumberOfCommits(name); - Long branches = gitHubApiConnector.fetchNumberOfBranches(name); - Long releases = gitHubApiConnector.fetchNumberOfReleases(name); - Long contributors = gitHubApiConnector.fetchNumberOfContributors(name); - gitRepo.setCommits(commits); + Long stargazers = json.getAsJsonObject("stars") + .getAsJsonPrimitive("count") + .getAsLong(); + gitRepo.setStargazers(stargazers); + + Long branches = json.getAsJsonObject("branches") + .getAsJsonPrimitive("count") + .getAsLong(); gitRepo.setBranches(branches); + + Long releases = json.getAsJsonObject("releases") + .getAsJsonPrimitive("count") + .getAsLong(); gitRepo.setReleases(releases); - gitRepo.setContributors(contributors); - Long totalPullRequests = gitHubApiConnector.fetchNumberOfAllPulls(name); - Long openPullRequests = gitHubApiConnector.fetchNumberOfOpenPulls(name); + Long watchers = json.getAsJsonObject("watchers") + .getAsJsonPrimitive("count") + .getAsLong(); + gitRepo.setWatchers(watchers); + + Long totalPullRequests = json.getAsJsonObject("total_pull_requests") + .getAsJsonPrimitive("count") + .getAsLong(); gitRepo.setTotalPullRequests(totalPullRequests); + + Long openPullRequests = json.getAsJsonObject("open_pull_requests") + .getAsJsonPrimitive("count") + .getAsLong(); gitRepo.setOpenPullRequests(openPullRequests); - boolean hasIssues = json.getAsJsonPrimitive("has_issues").getAsBoolean(); - if (hasIssues) { - Long totalIssues = gitHubApiConnector.fetchNumberOfAllIssuesAndPulls(name) - totalPullRequests; - Long openIssues = gitHubApiConnector.fetchNumberOfOpenIssuesAndPulls(name) - openPullRequests; - gitRepo.setTotalIssues(totalIssues); - gitRepo.setOpenIssues(openIssues); + Long totalIssues = json.getAsJsonObject("total_issues") + .getAsJsonPrimitive("count") + .getAsLong(); + gitRepo.setTotalIssues(totalIssues); + + Long openIssues = json.getAsJsonObject("open_issues") + .getAsJsonPrimitive("count") + .getAsLong(); + gitRepo.setOpenIssues(openIssues); + + JsonElement defaultBranch = json.get("default_branch"); + if (!defaultBranch.isJsonNull()) { + /* + * This can technically happen for uninitialized repositories + * (e.g. https://github.com/dabico/dl4se-empty). + * While these should typically never be encountered while mining, + * it's better to be safe than sorry... + */ + String branchName = defaultBranch.getAsJsonObject() + .getAsJsonPrimitive("name") + .getAsString(); + gitRepo.setDefaultBranch(branchName); + JsonObject history = defaultBranch.getAsJsonObject() + .getAsJsonObject("history"); + Long commits = history.getAsJsonObject("commits") + .getAsJsonPrimitive("count") + .getAsLong(); + gitRepo.setCommits(commits); + JsonObject commit = history.getAsJsonObject("commits") + .getAsJsonArray("items") + .get(0) + .getAsJsonObject() + .getAsJsonObject("commit"); + Date lastCommit = Dates.fromGitDateString(commit.getAsJsonPrimitive("date").getAsString()); + gitRepo.setLastCommit(lastCommit); + String lastCommitSHA = commit.getAsJsonPrimitive("sha").getAsString(); + gitRepo.setLastCommitSHA(lastCommitSHA); } else { - gitRepo.setTotalIssues(0L); - gitRepo.setOpenIssues(0L); + gitRepo.setCommits(0L); } - Commit commit = gitHubApiConnector.fetchLastCommitInfo(name); - Date lastCommit = commit.getDate(); - String lastCommitSHA = commit.getSha(); - gitRepo.setLastCommit(lastCommit); - gitRepo.setLastCommitSHA(lastCommitSHA); + // Not available on GraphQL, so we have to keep using the page hack + Long contributors = gitHubApiConnector.fetchNumberOfContributors(name); + gitRepo.setContributors(contributors); Language mainLanguage = languageService.getOrCreate(language); gitRepo.setMainLanguage(mainLanguage); gitRepo = gitRepoService.createOrUpdate(gitRepo); - Set