Skip to content

Commit

Permalink
Taxonomy: added rejection of name types
Browse files Browse the repository at this point in the history
  • Loading branch information
rbossy committed Aug 17, 2021
1 parent 850f494 commit f9b16d9
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
import fr.inra.maiage.bibliome.util.taxonomy.Taxon;
import fr.inra.maiage.bibliome.util.taxonomy.reject.RejectDisjunction;
import fr.inra.maiage.bibliome.util.taxonomy.reject.RejectName;
import fr.inra.maiage.bibliome.util.taxonomy.reject.RejectNone;
import fr.inra.maiage.bibliome.util.taxonomy.reject.RejectNameType;
import fr.inra.maiage.bibliome.util.taxonomy.saturate.Saturate;

/**
Expand All @@ -50,6 +50,7 @@
public class BuildDictionary extends CLIOParser {
private final Collection<File> nodesFiles = new ArrayList<File>();
private final Collection<File> namesFiles = new ArrayList<File>();
private final Collection<String> rejectedNameTypes = new ArrayList<String>();
private File saturationFile;
private File rejectionFile;
private final List<TaxonNamePattern> pattern = new ArrayList<TaxonNamePattern>(Arrays.asList(
Expand All @@ -73,6 +74,11 @@ protected boolean processArgument(String arg) throws CLIOException {
return false;
}

@CLIOption("-rejectNameType")
public void addRejectedNameType(String nameType) {
this.rejectedNameTypes.add(nameType);
}

@CLIOption("-prefix")
public void setIdPrefix(String idPrefix) {
this.idPrefix = idPrefix;
Expand Down Expand Up @@ -226,9 +232,14 @@ public static void main(String[] args) throws CLIOException, IOException, Invali
}

/* Name filter and synonym generation */
RejectName reject = RejectNone.INSTANCE;
if (inst.rejectionFile != null)
reject = new RejectDisjunction(taxonomy.readReject(logger, inst.rejectionFile));
RejectDisjunction rejects = new RejectDisjunction();
if (inst.rejectionFile != null) {
rejects.add(taxonomy.readReject(logger, inst.rejectionFile));
}
if (!inst.rejectedNameTypes.isEmpty()) {
rejects.add(new RejectNameType(inst.rejectedNameTypes));
}
RejectName reject = rejects.simplify();
for (File f : inst.namesFiles) {
taxonomy.readNames(logger, f, reject);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,17 @@ public RejectConjunction(RejectName first, RejectName second) {
public boolean reject(String taxid, Name name) {
return first.reject(taxid, name) && second.reject(taxid, name);
}

@Override
public RejectName simplify() {
RejectName first = this.first.simplify();
RejectName second = this.second.simplify();
if (first.equals(RejectNone.INSTANCE)) {
return RejectNone.INSTANCE;
}
if (second.equals(RejectNone.INSTANCE)) {
return RejectNone.INSTANCE;
}
return new RejectConjunction(first, second);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,19 @@

package fr.inra.maiage.bibliome.util.taxonomy.reject;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import fr.inra.maiage.bibliome.util.taxonomy.Name;

public class RejectDisjunction implements RejectName {
private final Collection<RejectName> rejects;


public RejectDisjunction() {
this(new ArrayList<RejectName>());
}

public RejectDisjunction(Collection<RejectName> rejects) {
super();
this.rejects = rejects;
Expand All @@ -35,4 +41,30 @@ public boolean reject(String taxid, Name name) {
return true;
return false;
}

public void add(RejectName reject) {
rejects.add(reject);
}

public void add(Collection<RejectName> rejects) {
this.rejects.addAll(rejects);
}

@Override
public RejectName simplify() {
List<RejectName> rejects = new ArrayList<RejectName>(this.rejects.size());
for (RejectName rn : this.rejects) {
RejectName srn = rn.simplify();
if (!srn.equals(RejectNone.INSTANCE)) {
rejects.add(srn);
}
}
if (rejects.isEmpty()) {
return RejectNone.INSTANCE;
}
if (rejects.size() == 1) {
return rejects.get(0);
}
return new RejectDisjunction(rejects);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,6 @@ public interface RejectName {
* @param name
*/
boolean reject(String taxid, Name name);

RejectName simplify();
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,9 @@ public RejectNamePattern(Pattern pattern) {
public boolean reject(String taxid, Name name) {
return pattern.matcher(name.name).matches();
}

@Override
public RejectName simplify() {
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package fr.inra.maiage.bibliome.util.taxonomy.reject;

import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;

import fr.inra.maiage.bibliome.util.taxonomy.Name;

public class RejectNameType implements RejectName {
private final Collection<String> nameTypes = new HashSet<String>();

public RejectNameType(String... nameTypes) {
this(Arrays.asList(nameTypes));
}

public RejectNameType(Collection<String> nameTypes) {
this.nameTypes.addAll(nameTypes);
}

@Override
public boolean reject(String taxid, Name name) {
return nameTypes.contains(name.type);
}

@Override
public RejectName simplify() {
if (nameTypes.isEmpty()) {
return RejectNone.INSTANCE;
}
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,16 @@

import fr.inra.maiage.bibliome.util.taxonomy.Name;

public class RejectNone implements RejectName {
public static final RejectNone INSTANCE = new RejectNone();

private RejectNone() {
super();
}
public enum RejectNone implements RejectName {
INSTANCE;

@Override
public boolean reject(String taxid, Name name) {
return false;
}

@Override
public RejectName simplify() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,9 @@ public RejectTaxid(String taxid) {
public boolean reject(String taxid, Name name) {
return this.taxid.equals(taxid);
}

@Override
public RejectName simplify() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ options = Options:
-saturationFile.help = path to the saturation patterns file
-rejectionFile.args = FILE
-rejectionFile.help = path to rejected names file
-rejectNameType.args = TYPE
-rejectNameType.help = reject synonyms TYPE
-pathSeparator.args = STRING
-pathSeparator.help = separator between nodes in node path output (default: '/')
-pattern.args = PATTERN
Expand Down

0 comments on commit f9b16d9

Please sign in to comment.