Skip to content

Commit

Permalink
Merge pull request #343 from seart-group/enhancement/scheduling
Browse files Browse the repository at this point in the history
  • Loading branch information
dabico authored Apr 17, 2024
2 parents 0facda9 + e70a5b5 commit aab541f
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 17 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ Here's a list of project-specific arguments supported by the application that yo
| `ghs.crawler.minimum-stars` | int | 10 | Inclusive lower bound for the number of stars a project needs to have in order to be picked up by the crawler. Must not be negative. |
| `ghs.crawler.languages` | List<String> | See [application.properties](src/main/resources/application.properties) | List of language names that will be targeted during crawling. Must not contain blank strings. To ensure proper operations, the names must match those specified in [linguist](https://github.com/github-linguist/linguist/blob/master/lib/linguist/languages.yml). |
| `ghs.crawler.start-date` | Date | 2008-01-01T00:00:00Z | Default crawler start date: the earliest date for repository crawling in the absence of prior crawl jobs. Value format: `yyyy-MM-ddTHH:MM:SSZ`. |
| `ghs.crawler.delay-between-runs` | Duration | PT6H | Delay between successive crawler runs, expressed as a duration string. |
| `ghs.analysis.enabled` | Boolean | true | Specifies if the analysis job is enabled. |
| `ghs.analysis.delay-between-runs` | Duration | PT6H | Delay between successive analysis runs, expressed as a duration string. |
| `ghs.analysis.max-pool-threads` | int | 3 | Maximum amount of live threads dedicated to concurrently analyzing repositories. Must be positive. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@
import org.springframework.format.annotation.DateTimeFormat;

import javax.validation.constraints.NotBlank;
import javax.validation.constraints.NotNull;
import javax.validation.constraints.PastOrPresent;
import javax.validation.constraints.PositiveOrZero;
import java.time.Duration;
import java.util.Date;
import java.util.List;

Expand All @@ -30,22 +28,17 @@ public class CrawlerProperties {
@PastOrPresent
Date startDate;

@NotNull
Duration delayBetweenRuns;

@ConstructorBinding
public CrawlerProperties(
Boolean enabled,
int minimumStars,
List<String> languages,
@DateTimeFormat(pattern = "yyyy-MM-dd'T'HH:mm:ss'Z'")
Date startDate,
Duration delayBetweenRuns
Date startDate
) {
this.enabled = enabled;
this.minimumStars = minimumStars;
this.languages = languages;
this.startDate = startDate;
this.delayBetweenRuns = delayBetweenRuns;
}
}
9 changes: 2 additions & 7 deletions src/main/java/ch/usi/si/seart/job/CrawlProjectsJob.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package ch.usi.si.seart.job;

import ch.usi.si.seart.config.properties.CrawlerProperties;
import ch.usi.si.seart.exception.MetadataCrawlingException;
import ch.usi.si.seart.exception.UnsplittableRangeException;
import ch.usi.si.seart.github.GitHubGraphQlConnector;
Expand Down Expand Up @@ -48,6 +47,7 @@
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
Expand All @@ -69,12 +69,10 @@ public class CrawlProjectsJob implements Runnable {
GitHubRestConnector gitHubRestConnector;
GitHubGraphQlConnector gitHubGraphQlConnector;

CrawlerProperties crawlerProperties;

Ranges.Printer<Date> rangePrinter;
Ranges.Splitter<Date> rangeSplitter;

@Scheduled(fixedDelayString = "${ghs.crawler.delay-between-runs}")
@Scheduled(fixedDelay = 1, timeUnit = TimeUnit.SECONDS)
public void run() {
log.info("Initializing language queue...");
Collection<Language> languages = languageService.getTargetedLanguages();
Expand All @@ -86,9 +84,6 @@ public void run() {
Language.Progress progress = languageService.getProgress(language);
new LanguageCrawler(language, progress).run();
}
Duration delay = crawlerProperties.getDelayBetweenRuns();
Instant instant = Instant.now().plus(delay);
log.info("Next crawl scheduled for: {}", Date.from(instant));
}

@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
Expand Down
1 change: 0 additions & 1 deletion src/main/resources/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ ghs.crawler.languages[40]=F#
ghs.crawler.languages[41]=Elm
ghs.crawler.languages[42]=Zig
ghs.crawler.start-date=2008-01-01T00:00:00Z
ghs.crawler.delay-between-runs=PT6H

# Analysis Configuration
ghs.analysis.enabled=true
Expand Down

0 comments on commit aab541f

Please sign in to comment.