From 64e793a152fbf51e580cd9f97cb1195dafd72d25 Mon Sep 17 00:00:00 2001 From: Gustavo Pinto Date: Mon, 5 Aug 2013 20:48:09 -0300 Subject: [PATCH] fixing some troubles when downloading projects. r #6, #56 --- .../cin/groundhog/GroundhogException.java | 4 ++ .../cin/groundhog/crawler/CrawlGitHub.java | 16 ++++--- .../groundhog/crawler/DownloadExecption.java | 23 ++++++++++ .../cin/groundhog/crawler/ForgeCrawler.java | 11 +---- .../parser/NotAProjectException.java | 27 ++++++++++++ .../parser/java/NotAJavaProjectException.java | 6 +++ .../parser/license/LicenseParser.java | 44 +++++++++++++------ .../cin/groundhog/scmclient/GitClient.java | 21 ++++----- .../cin/groundhog/search/SearchGitHub.java | 23 ++++++---- 9 files changed, 123 insertions(+), 52 deletions(-) create mode 100644 src/java/main/br/ufpe/cin/groundhog/crawler/DownloadExecption.java create mode 100644 src/java/main/br/ufpe/cin/groundhog/parser/NotAProjectException.java diff --git a/src/java/main/br/ufpe/cin/groundhog/GroundhogException.java b/src/java/main/br/ufpe/cin/groundhog/GroundhogException.java index e086f91..5ee68c2 100644 --- a/src/java/main/br/ufpe/cin/groundhog/GroundhogException.java +++ b/src/java/main/br/ufpe/cin/groundhog/GroundhogException.java @@ -10,6 +10,10 @@ public class GroundhogException extends RuntimeException { private static final long serialVersionUID = -3563928567447310893L; + public GroundhogException() { + super(); + } + public GroundhogException(String msg) { super(msg); } diff --git a/src/java/main/br/ufpe/cin/groundhog/crawler/CrawlGitHub.java b/src/java/main/br/ufpe/cin/groundhog/crawler/CrawlGitHub.java index 7dbecd0..e59b4ce 100644 --- a/src/java/main/br/ufpe/cin/groundhog/crawler/CrawlGitHub.java +++ b/src/java/main/br/ufpe/cin/groundhog/crawler/CrawlGitHub.java @@ -3,9 +3,6 @@ import java.io.File; import java.util.Random; -import org.eclipse.jgit.api.errors.GitAPIException; -import org.eclipse.jgit.api.errors.InvalidRemoteException; -import org.eclipse.jgit.api.errors.TransportException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,15 +31,20 @@ public CrawlGitHub(GitClient gitClient, File destinationFolder) { } @Override - protected File downloadProject(Project project) - throws InvalidRemoteException, TransportException, GitAPIException { + protected File downloadProject(Project project) { String projectName = project.getName() + "_" + new Random().nextInt(); String cloneUrl = project.getScmURL(); File projectFolder = new File(destinationFolder, projectName); logger.info(String.format("Downloading %s project..", project.getName())); - this.gitClient.clone(cloneUrl, projectFolder); - return projectFolder; + try { + this.gitClient.clone(cloneUrl, projectFolder); + return projectFolder; + } catch (Exception e) { + String error = String.format("Unable to download %s", project.getName()); + logger.error(error); + throw new DownloadExecption(error); + } } } \ No newline at end of file diff --git a/src/java/main/br/ufpe/cin/groundhog/crawler/DownloadExecption.java b/src/java/main/br/ufpe/cin/groundhog/crawler/DownloadExecption.java new file mode 100644 index 0000000..2ffab40 --- /dev/null +++ b/src/java/main/br/ufpe/cin/groundhog/crawler/DownloadExecption.java @@ -0,0 +1,23 @@ +package br.ufpe.cin.groundhog.crawler; + +import br.ufpe.cin.groundhog.GroundhogException; + +public class DownloadExecption extends GroundhogException { + + /** + * + */ + private static final long serialVersionUID = 3008366459405858621L; + + public DownloadExecption(String msg) { + super(msg); + } + + public DownloadExecption(String msg, Throwable cause) { + super(msg, cause); + } + + public DownloadExecption(Throwable cause) { + super(cause); + } +} diff --git a/src/java/main/br/ufpe/cin/groundhog/crawler/ForgeCrawler.java b/src/java/main/br/ufpe/cin/groundhog/crawler/ForgeCrawler.java index 2b64e3b..c4b5cd2 100644 --- a/src/java/main/br/ufpe/cin/groundhog/crawler/ForgeCrawler.java +++ b/src/java/main/br/ufpe/cin/groundhog/crawler/ForgeCrawler.java @@ -61,17 +61,8 @@ public File call() throws Exception { fs.add(f); } - shutdown(); + ex.shutdownNow(); return fs; } - - /** - * Guarantees downloads to be executed, but no new downloads will be accepted. - * Should be called after downloadProjects. - */ - private void shutdown() { - ex.shutdownNow(); - } - } \ No newline at end of file diff --git a/src/java/main/br/ufpe/cin/groundhog/parser/NotAProjectException.java b/src/java/main/br/ufpe/cin/groundhog/parser/NotAProjectException.java new file mode 100644 index 0000000..a71a9e2 --- /dev/null +++ b/src/java/main/br/ufpe/cin/groundhog/parser/NotAProjectException.java @@ -0,0 +1,27 @@ +package br.ufpe.cin.groundhog.parser; + +import br.ufpe.cin.groundhog.GroundhogException; + +public class NotAProjectException extends GroundhogException { + + /** + * + */ + private static final long serialVersionUID = 2115582632203381099L; + + public NotAProjectException(String msg, Throwable cause) { + super(msg, cause); + } + + public NotAProjectException() { + super(); + } + + public NotAProjectException(String msg) { + super(msg); + } + + public NotAProjectException(Throwable cause) { + super(cause); + } +} diff --git a/src/java/main/br/ufpe/cin/groundhog/parser/java/NotAJavaProjectException.java b/src/java/main/br/ufpe/cin/groundhog/parser/java/NotAJavaProjectException.java index 415b2d1..a649895 100644 --- a/src/java/main/br/ufpe/cin/groundhog/parser/java/NotAJavaProjectException.java +++ b/src/java/main/br/ufpe/cin/groundhog/parser/java/NotAJavaProjectException.java @@ -2,6 +2,12 @@ import br.ufpe.cin.groundhog.GroundhogException; +/** + * Raise an exception when no source code is found during parsing + * + * @author ghlp + * @since 0.1.0 + */ public class NotAJavaProjectException extends GroundhogException { /** diff --git a/src/java/main/br/ufpe/cin/groundhog/parser/license/LicenseParser.java b/src/java/main/br/ufpe/cin/groundhog/parser/license/LicenseParser.java index f9ee95e..658fbc8 100644 --- a/src/java/main/br/ufpe/cin/groundhog/parser/license/LicenseParser.java +++ b/src/java/main/br/ufpe/cin/groundhog/parser/license/LicenseParser.java @@ -6,10 +6,18 @@ import org.slf4j.LoggerFactory; import br.ufpe.cin.groundhog.License; +import br.ufpe.cin.groundhog.parser.NotAProjectException; import br.ufpe.cin.groundhog.util.FileUtil; import com.google.common.collect.Lists; +/** + * This class tries to find which is the license in use. It raises an execption + * if no source code if found on the root dir. + * + * @author ghlp + * @since 0.1.0 + */ public class LicenseParser { private static Logger logger = LoggerFactory.getLogger(LicenseParser.class); @@ -18,10 +26,17 @@ public class LicenseParser { private final File root; public LicenseParser(File project) { + checkIfIsProject(project); this.files = project.listFiles(); this.root = project; } + private void checkIfIsProject(File project) { + if (files.length == 0) { + throw new NotAProjectException(); + } + } + /** * Parses the top level folder looking for licenses files */ @@ -29,26 +44,28 @@ public License parser() { logger.info("Running license parser.."); FileUtil filesUtils = FileUtil.getInstance(); - - for(File file: files) { - if(filesUtils.isTextFile(file)) { + + for (File file : files) { + if (filesUtils.isTextFile(file)) { String content = filesUtils.readAllLines(file); - - if(containsLicenseWord(content)) { + + if (containsLicenseWord(content)) { return extractLicense(content); } } } - - logger.info(String.format("No license found for project %s", root.getName())); + + logger.info(String.format("No license found for project %s", + root.getName())); return new License("unlincesed"); } private License extractLicense(String content) { - - for(String license: Licenses.names()) { - if(content.contains(license)) { - logger.info(String.format("License found! %s uses %s.", root.getName(), license)); + + for (String license : Licenses.names()) { + if (content.contains(license)) { + logger.info(String.format("License found! %s uses %s license.", + root.getName(), license)); return new License(license); } } @@ -57,8 +74,9 @@ private License extractLicense(String content) { private boolean containsLicenseWord(String content) { - for (String licenseKeyword : Lists.newArrayList("License")) { - if (content.contains(licenseKeyword)) { + for (String licenseKeyword : Lists.newArrayList("license", "copyright", + "permission")) { + if (content.toLowerCase().contains(licenseKeyword)) { return true; } } diff --git a/src/java/main/br/ufpe/cin/groundhog/scmclient/GitClient.java b/src/java/main/br/ufpe/cin/groundhog/scmclient/GitClient.java index 0900e78..1466b36 100644 --- a/src/java/main/br/ufpe/cin/groundhog/scmclient/GitClient.java +++ b/src/java/main/br/ufpe/cin/groundhog/scmclient/GitClient.java @@ -14,7 +14,9 @@ import org.eclipse.jgit.api.errors.CheckoutConflictException; import org.eclipse.jgit.api.errors.GitAPIException; import org.eclipse.jgit.api.errors.InvalidRefNameException; +import org.eclipse.jgit.api.errors.InvalidRemoteException; import org.eclipse.jgit.api.errors.RefAlreadyExistsException; +import org.eclipse.jgit.api.errors.TransportException; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevWalk; @@ -25,24 +27,17 @@ import br.ufpe.cin.groundhog.util.Dates; public class GitClient { - + /** * Performs a clone operation for the given project URL and places the fetched code * into the destination directory. * @param url the project's URL - * @param destination + * @param destination */ - public void clone(String url, File destination) { - try { - Git git = Git.cloneRepository() - .setURI(url) - .setDirectory(destination) - .call(); - - git.getRepository().close(); - } catch (Exception e) { - e.printStackTrace(); - } + public void clone(String url, File destination) + throws InvalidRemoteException, TransportException, GitAPIException { + Git git = Git.cloneRepository().setURI(url).setDirectory(destination).call(); + git.getRepository().close(); } /** diff --git a/src/java/main/br/ufpe/cin/groundhog/search/SearchGitHub.java b/src/java/main/br/ufpe/cin/groundhog/search/SearchGitHub.java index 3b39ae4..5746470 100644 --- a/src/java/main/br/ufpe/cin/groundhog/search/SearchGitHub.java +++ b/src/java/main/br/ufpe/cin/groundhog/search/SearchGitHub.java @@ -155,9 +155,7 @@ public List getAllProjects(int start, int limit) throws SearchException String searchUrl = String.format("%s/repositories?since=%s%s", ROOT, since, this.oauthToken); String response = requests.get(searchUrl); - JsonElement jsonElement = parser.parse(response); - - JsonArray jsonArray = jsonElement.getAsJsonArray(); + JsonArray jsonArray = parser.parse(response).getAsJsonArray(); int counter = 0; @@ -170,9 +168,15 @@ public List getAllProjects(int start, int limit) throws SearchException String searchUrlLegacy = String.format("%s/legacy/repos/search/%s?%s", ROOT , repoName, this.oauthToken); String jsonLegacy = requests.get(searchUrlLegacy); - jsonElement = parser.parse(jsonLegacy); - - JsonObject jsonObject = parser.parse(jsonLegacy).getAsJsonObject(); + JsonElement jsonElement = parser.parse(jsonLegacy); + + JsonObject jsonObject = null; + try { + jsonObject = jsonElement.getAsJsonObject(); + } catch (Exception e) { + System.out.println(jsonLegacy); + continue; + } JsonArray jsonArrayLegacy = jsonObject.get("repositories").getAsJsonArray(); if(jsonArrayLegacy.size() > 0) { @@ -197,14 +201,15 @@ public List getAllProjects(int start, int limit) throws SearchException JsonElement lastPagesRepository = jsonArray.get(jsonArray.size() -1); since = lastPagesRepository.getAsJsonObject().get("id").getAsInt(); } + return projects; } catch (GroundhogException e) { - + e.printStackTrace(); + throw new SearchException(e); + } catch (Exception e) { e.printStackTrace(); throw new SearchException(e); } - - return projects; } @Override