From bff82a15f5417931ffe4af9512181525912fd8fe Mon Sep 17 00:00:00 2001 From: Armin Stanitzok <21990230+GODrums@users.noreply.github.com> Date: Sat, 28 Sep 2024 20:20:54 +0200 Subject: [PATCH] Efficient Repository Updates instead of Refetching (#103) --- .../base/BaseGitServiceEntityConverter.java | 3 + .../hephaestus/codereview/base/Comment.java | 1 + .../comment/IssueCommentConverter.java | 10 +- .../PullRequestReviewCommentConverter.java | 11 +- .../codereview/pullrequest/PullRequest.java | 2 +- .../pullrequest/PullRequestConverter.java | 13 +- .../pullrequest/PullRequestRepository.java | 11 + .../pullrequest/review/PullRequestReview.java | 2 +- .../review/PullRequestReviewConverter.java | 6 +- .../codereview/repository/Repository.java | 6 +- .../repository/RepositoryConverter.java | 14 +- .../codereview/user/UserConverter.java | 17 +- .../scheduler/GitHubDataSyncService.java | 218 ++++++++++-------- 13 files changed, 203 insertions(+), 111 deletions(-) diff --git a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/base/BaseGitServiceEntityConverter.java b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/base/BaseGitServiceEntityConverter.java index ac1a1c61..5ba3e8b8 100644 --- a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/base/BaseGitServiceEntityConverter.java +++ b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/base/BaseGitServiceEntityConverter.java @@ -3,6 +3,7 @@ import org.kohsuke.github.GHObject; import org.springframework.core.convert.converter.Converter; import org.springframework.data.convert.ReadingConverter; +import org.springframework.lang.NonNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; @@ -16,6 +17,8 @@ public abstract class BaseGitServiceEntityConverter { + protected static final Logger logger = LoggerFactory.getLogger(IssueCommentConverter.class); + @Override public IssueComment convert(@NonNull GHIssueComment source) { - IssueComment comment = new IssueComment(); + return update(source, new IssueComment()); + } + + @Override + public IssueComment update(@NonNull GHIssueComment source, @NonNull IssueComment comment) { convertBaseFields(source, comment); comment.setBody(source.getBody()); return comment; diff --git a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/comment/review/PullRequestReviewCommentConverter.java b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/comment/review/PullRequestReviewCommentConverter.java index 6eb03c59..03982fae 100644 --- a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/comment/review/PullRequestReviewCommentConverter.java +++ b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/comment/review/PullRequestReviewCommentConverter.java @@ -1,6 +1,8 @@ package de.tum.in.www1.hephaestus.codereview.comment.review; import org.kohsuke.github.GHPullRequestReviewComment; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.lang.NonNull; import org.springframework.stereotype.Component; @@ -10,9 +12,16 @@ public class PullRequestReviewCommentConverter extends BaseGitServiceEntityConverter { + protected static final Logger logger = LoggerFactory.getLogger(PullRequestReviewCommentConverter.class); + @Override public PullRequestReviewComment convert(@NonNull GHPullRequestReviewComment source) { - PullRequestReviewComment comment = new PullRequestReviewComment(); + return update(source, new PullRequestReviewComment()); + } + + @Override + public PullRequestReviewComment update(@NonNull GHPullRequestReviewComment source, + @NonNull PullRequestReviewComment comment) { convertBaseFields(source, comment); comment.setBody(source.getBody()); comment.setCommit(source.getCommitId()); diff --git a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/PullRequest.java b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/PullRequest.java index 4e0f4dde..9ad5c321 100644 --- a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/PullRequest.java +++ b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/PullRequest.java @@ -59,7 +59,7 @@ public class PullRequest extends BaseGitServiceEntity { @ToString.Exclude private Set comments = new HashSet<>(); - @OneToMany(cascade = CascadeType.ALL, mappedBy = "pullRequest") + @OneToMany(cascade = CascadeType.REFRESH, mappedBy = "pullRequest") @ToString.Exclude private Set reviews = new HashSet<>(); diff --git a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/PullRequestConverter.java b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/PullRequestConverter.java index 855152d1..f065f597 100644 --- a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/PullRequestConverter.java +++ b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/PullRequestConverter.java @@ -21,14 +21,19 @@ public class PullRequestConverter extends BaseGitServiceEntityConverter { Set findByAuthor_Login(String authorLogin); + + @Query(""" + SELECT p + FROM PullRequest p + JOIN FETCH p.comments + JOIN FETCH p.reviews + WHERE p.id = :id + """) + Optional findByIdWithEagerRelations(Long id); } \ No newline at end of file diff --git a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/review/PullRequestReview.java b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/review/PullRequestReview.java index daf1a197..74bc73da 100644 --- a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/review/PullRequestReview.java +++ b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/review/PullRequestReview.java @@ -40,7 +40,7 @@ public class PullRequestReview extends BaseGitServiceEntity { private OffsetDateTime submittedAt; - @OneToMany(cascade = CascadeType.ALL, mappedBy = "review") + @OneToMany(cascade = CascadeType.REFRESH, mappedBy = "review") @ToString.Exclude private Set comments = new HashSet<>(); diff --git a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/review/PullRequestReviewConverter.java b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/review/PullRequestReviewConverter.java index e9396ed1..4f382f45 100644 --- a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/review/PullRequestReviewConverter.java +++ b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/pullrequest/review/PullRequestReviewConverter.java @@ -17,7 +17,11 @@ public class PullRequestReviewConverter extends BaseGitServiceEntityConverter pullRequests = new HashSet<>(); + + public void addPullRequest(PullRequest pullRequest) { + pullRequests.add(pullRequest); + } } diff --git a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/repository/RepositoryConverter.java b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/repository/RepositoryConverter.java index 2482b505..7cb69e9d 100644 --- a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/repository/RepositoryConverter.java +++ b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/repository/RepositoryConverter.java @@ -2,8 +2,9 @@ import org.kohsuke.github.GHRepository; import org.kohsuke.github.GHRepository.Visibility; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.lang.NonNull; -import org.springframework.lang.Nullable; import org.springframework.stereotype.Component; import de.tum.in.www1.hephaestus.codereview.base.BaseGitServiceEntityConverter; @@ -11,15 +12,20 @@ @Component public class RepositoryConverter extends BaseGitServiceEntityConverter { + protected static final Logger logger = LoggerFactory.getLogger(RepositoryConverter.class); + @Override - @Nullable public Repository convert(@NonNull GHRepository source) { - Repository repository = new Repository(); + return update(source, new Repository()); + } + + @Override + public Repository update(@NonNull GHRepository source, @NonNull Repository repository) { convertBaseFields(source, repository); repository.setName(source.getName()); repository.setNameWithOwner(source.getFullName()); - repository.setDescription(source.getDescription()); repository.setUrl(source.getHtmlUrl().toString()); + repository.setDescription(source.getDescription()); repository.setDefaultBranch(source.getDefaultBranch()); repository.setVisibility(convertVisibility(source.getVisibility())); repository.setHomepage(source.getHomepage()); diff --git a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/user/UserConverter.java b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/user/UserConverter.java index 3b9cf461..3f2dadbb 100644 --- a/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/user/UserConverter.java +++ b/server/application-server/src/main/java/de/tum/in/www1/hephaestus/codereview/user/UserConverter.java @@ -2,6 +2,7 @@ import java.io.IOException; +import org.kohsuke.github.GHUser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.lang.NonNull; @@ -15,21 +16,25 @@ public class UserConverter extends BaseGitServiceEntityConverter users = new HashSet<>(); private Set reviews = new HashSet<>(); @@ -89,8 +94,6 @@ public GitHubDataSyncService(RepositoryRepository repositoryRepository, PullRequ PullRequestReviewRepository prReviewRepository, IssueCommentRepository commentRepository, PullRequestReviewCommentRepository reviewCommentRepository, UserRepository userRepository) { - logger.info("Hello from GitHubDataSyncService!"); - this.repositoryRepository = repositoryRepository; this.pullRequestRepository = pullRequestRepository; this.prReviewRepository = prReviewRepository; @@ -100,12 +103,14 @@ public GitHubDataSyncService(RepositoryRepository repositoryRepository, PullRequ } public void syncData() { + if (!initGithubClient()) { + logger.error("Aborted GitHub data sync due to error during initialization of GitHub client."); + return; + } int successfullySyncedRepositories = 0; - this.cutOffTime = new Date(System.currentTimeMillis() - 1000 * 60 * 60 * 24 * timeframe); - logger.info("Cut-off time for the data sync: " + cutOffTime); for (String repositoryName : repositoriesToMonitor) { try { - syncRepository(repositoryName); + this.fetchRepository(repositoryName); logger.info("GitHub data sync completed successfully for repository: " + repositoryName); successfullySyncedRepositories++; } catch (Exception e) { @@ -117,58 +122,63 @@ public void syncData() { + repositoriesToMonitor.length + " repositories for the last " + timeframe + " day(s)."); } - private void syncRepository(String repositoryName) throws IOException { + private boolean initGithubClient() { if (ghAuthToken == null || ghAuthToken.isEmpty() || ghAuthToken.equals("null")) { logger.error("No GitHub auth token provided!"); - return; + return false; } if (github == null) { - github = new GitHubBuilder().withOAuthToken(ghAuthToken).build(); + try { + github = new GitHubBuilder().withOAuthToken(ghAuthToken).build(); + } catch (IOException e) { + logger.error("Error while initializing GitHub client: " + e.getMessage()); + return false; + } } - Repository repo = this.fetchRepository(repositoryName); - logger.info("Synced repository until " + repo.getUpdatedAt()); + return github.isCredentialValid(); } /** * Rest API implementation of fetching a Github repository. * - * @return The repository corresponding to the given nameWithOwner. * @throws IOException */ - public Repository fetchRepository(String nameWithOwner) throws IOException { - if (github == null) { - logger.error("GitHub client not initialized correctly!"); - return null; - } - - // Avoid double fetching of the same repository - Repository existingRepository = repositoryRepository.findByNameWithOwnerWithEagerPullRequests(nameWithOwner); - if (existingRepository != null) { - logger.info("Found existing repository: " + existingRepository); - return existingRepository; - } - + public void fetchRepository(String nameWithOwner) throws IOException { GHRepository ghRepo = github.getRepository(nameWithOwner); - Repository repository = repositoryConverter.convert(ghRepo); + // Avoid double fetching of already stored repositories + Repository repository = repositoryRepository.findByNameWithOwnerWithEagerPullRequests(nameWithOwner); if (repository == null) { - logger.error("Error while fetching repository!"); - return null; + logger.info("Creating new repository: " + nameWithOwner); + repository = repositoryRepository.save(repositoryConverter.convert(ghRepo)); + this.cutOffTime = new Date(System.currentTimeMillis() - 1000 * 60 * 60 * 24 * timeframe).toInstant() + .atOffset(ZoneOffset.UTC); + } else { + // Update cut-off time to avoid refetching stored data. + // The updatedAt field of the repository does not change with every PR update, + // so it will always be earlier + this.cutOffTime = repository.getPullRequests().stream().map(PullRequest::getUpdatedAt) + .max(OffsetDateTime::compareTo).orElse(repository.getUpdatedAt()); } - // preliminary save to make it referenceable - repository = repositoryRepository.save(repository); + logger.info("Cut-off time for the repository data sync: " + this.cutOffTime); - Set prs = getPullRequestsFromGHRepository(ghRepo, repository); + Set prs = fetchPullRequestsFromGHRepository(ghRepo, repository); logger.info("Found total of " + prs.size() + " PRs"); - repository.setPullRequests(prs); + for (PullRequest pr : prs) { + if (repository.getPullRequests().stream().noneMatch(p -> p.getId().equals(pr.getId()))) { + repository.addPullRequest(pr); + } + } pullRequestRepository.saveAll(prs); userRepository.saveAll(users); repositoryRepository.save(repository); prReviewRepository.saveAll(reviews); - return repository; + + users = new HashSet<>(); + reviews = new HashSet<>(); } - private Set getPullRequestsFromGHRepository(GHRepository ghRepo, Repository repository) + private Set fetchPullRequestsFromGHRepository(GHRepository ghRepo, Repository repository) throws IOException { // Iterator allows us to handle pullrequests without knowing the next ones PagedIterator pullRequests = ghRepo.queryPullRequests() @@ -180,7 +190,6 @@ private Set getPullRequestsFromGHRepository(GHRepository ghRepo, Re // Only fetch next page if all PRs are still within the timeframe AtomicBoolean fetchStillInTimeframe = new AtomicBoolean(true); - while (fetchStillInTimeframe.get() && pullRequests.hasNext()) { List nextPage = pullRequests.nextPage(); logger.info("Fetched " + nextPage.size() + " PRs from Github"); @@ -189,34 +198,18 @@ private Set getPullRequestsFromGHRepository(GHRepository ghRepo, Re fetchStillInTimeframe.set(false); return null; } - PullRequest pullRequest = pullRequestRepository.save(pullRequestConverter.convert(pr)); - pullRequest.setRepository(repository); - try { - User prAuthor = getUserFromGHUser(pr.getUser()); - prAuthor.addPullRequest(pullRequest); - pullRequest.setAuthor(prAuthor); - } catch (IOException e) { - // Dont mind this error as it occurs only for bots - pullRequest.setAuthor(null); - } + + PullRequest pullRequest = createOrUpdatePullRequest(pr, repository); try { - Set newReviews = pr.listReviews().toList().stream().map(review -> { - PullRequestReview prReview = prReviewRepository - .save(reviewConverter.convert(review)); - try { - User reviewAuthor = getUserFromGHUser(review.getUser()); - reviewAuthor.addReview(prReview); - prReview.setAuthor(reviewAuthor); - } catch (IOException e) { - // Dont mind this error as it occurs only for bots - } - prReview.setPullRequest(pullRequest); - return prReview; - }).collect(Collectors.toSet()); + Set newReviews = pr.listReviews().toList().stream() + .map(review -> createOrUpdatePullRequestReview(review, pullRequest)) + .collect(Collectors.toSet()); for (PullRequestReview prReview : newReviews) { - pullRequest.addReview(prReview); - reviews.add(prReview); + if (reviews.stream().noneMatch(r -> r.getId().equals(prReview.getId()))) { + pullRequest.addReview(prReview); + reviews.add(prReview); + } } logger.info("Found " + newReviews.size() + " reviews for PR #" + pullRequest.getNumber()); } catch (IOException e) { @@ -233,15 +226,22 @@ private Set getPullRequestsFromGHRepository(GHRepository ghRepo, Re logger.error("Error while fetching PR review comments!"); } - try { - Collection comments = getCommentsFromGHPullRequest(pr, pullRequest); - // comments = commentRepository.saveAll(comments); - for (IssueComment c : comments) { - pullRequest.addComment(c); + PagedIterator commentIterator = pr.queryComments().list().withPageSize(100) + .iterator(); + Set comments = pullRequest.getComments(); + while (commentIterator.hasNext()) { + List nextCommentPage = commentIterator.nextPage(); + try { + IssueComment nextComment = createOrUpdateIssueComment(nextCommentPage.getFirst(), pullRequest); + if (comments.stream().noneMatch(c -> c.getId().equals(nextComment.getId()))) { + comments.add(nextComment); + pullRequest.addComment(nextComment); + } + } catch (NoSuchElementException e) { + break; } - } catch (IOException e) { - logger.error("Error while fetching PR comments!"); } + commentRepository.saveAll(comments); return pullRequest; }).filter(Objects::nonNull).collect(Collectors.toSet())); @@ -268,30 +268,66 @@ private PullRequestReviewComment handleSinglePullRequestReviewComment(GHPullRequ } /** - * Retrieves the comments of a given pull request. + * Creates new PullRequest or updates instance if it already exists. * - * @param pr The GH pull request. - * @param pullRequest Stored PR to which the comments belong. - * @return The comments of the given pull request. - * @throws IOException when fetching the comments fails + * @param pr Source GitHub PullRequest + * @param repository Repository the PR belongs to + * @return PullRequest entity */ - private Set getCommentsFromGHPullRequest(GHPullRequest pr, PullRequest pullRequest) - throws IOException { - return pr.queryComments().list().withPageSize(100).toList().stream() - .map(comment -> { - IssueComment c = commentRepository.save(commentConverter.convert(comment)); - c.setPullRequest(pullRequest); - User commentAuthor; - try { - commentAuthor = getUserFromGHUser(comment.getUser()); - commentAuthor.addIssueComment(c); - } catch (IOException e) { - // Dont mind this error as it occurs only for bots - commentAuthor = null; - } - c.setAuthor(commentAuthor); - return c; - }).collect(Collectors.toSet()); + private PullRequest createOrUpdatePullRequest(GHPullRequest pr, Repository repository) { + Optional pullRequest = pullRequestRepository.findByIdWithEagerRelations(pr.getId()); + if (pullRequest.isPresent()) { + return pullRequestConverter.update(pr, pullRequest.get()); + } + + PullRequest newPullRequest = pullRequestRepository.save(pullRequestConverter.convert(pr)); + newPullRequest.setRepository(repository); + repository.addPullRequest(newPullRequest); + try { + User prAuthor = getUserFromGHUser(pr.getUser()); + prAuthor.addPullRequest(newPullRequest); + newPullRequest.setAuthor(prAuthor); + } catch (IOException e) { + // Dont mind this error as it occurs only for bots + newPullRequest.setAuthor(null); + } + return newPullRequest; + } + + private PullRequestReview createOrUpdatePullRequestReview(GHPullRequestReview review, PullRequest pullRequest) { + Optional pullRequestReview = prReviewRepository.findByIdWithEagerComments(review.getId()); + if (pullRequestReview.isPresent()) { + return reviewConverter.update(review, pullRequestReview.get()); + } + PullRequestReview newPullRequestReview = prReviewRepository.save(reviewConverter.convert(review)); + try { + User reviewAuthor = getUserFromGHUser(review.getUser()); + reviewAuthor.addReview(newPullRequestReview); + newPullRequestReview.setAuthor(reviewAuthor); + } catch (IOException e) { + // Dont mind this error as it occurs only for bots + } + newPullRequestReview.setPullRequest(pullRequest); + return newPullRequestReview; + } + + private IssueComment createOrUpdateIssueComment(GHIssueComment comment, PullRequest pullRequest) { + Optional issueComment = commentRepository.findById(comment.getId()); + if (issueComment.isPresent()) { + return commentConverter.update(comment, issueComment.get()); + } + IssueComment newIssueComment = commentRepository.save(commentConverter.convert(comment)); + newIssueComment.setPullRequest(pullRequest); + User commentAuthor; + try { + commentAuthor = getUserFromGHUser(comment.getUser()); + commentAuthor.addIssueComment(newIssueComment); + } catch (IOException e) { + // Dont mind this error as it occurs only for bots + commentAuthor = null; + } + newIssueComment.setAuthor(commentAuthor); + return newIssueComment; } /** @@ -347,7 +383,7 @@ private PullRequestReview getPRRFromReviewId(Long reviewId) { private boolean isResourceRecent(GHObject obj) { try { return obj.getUpdatedAt() != null - && obj.getUpdatedAt().after(cutOffTime); + && obj.getUpdatedAt().toInstant().atOffset(ZoneOffset.UTC).isAfter(cutOffTime); } catch (IOException e) { logger.error("Error while fetching createdAt! Resource ID: " + obj.getId()); return false;