Skip to content
This repository has been archived by the owner on Jul 3, 2023. It is now read-only.

ANY23-321 Add openie toggle functionality to service #56

Merged
merged 18 commits into from
Feb 28, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<parent>
<artifactId>apache-any23</artifactId>
<groupId>org.apache.any23</groupId>
<version>2.3-SNAPSHOT</version>
<version>2.2-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ public ExtractorGroup filterByMIMEType(MIMEType mimeType) {
return new ExtractorGroup(matching);
}

@Override
public Iterator<ExtractorFactory<?>> iterator() {
return factories.iterator();
}
Expand Down
23 changes: 13 additions & 10 deletions api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ public synchronized boolean loadJAR(File jar) {
* @return list of exceptions raised during the loading.
*/
public synchronized Throwable[] loadJARs(File... jars) {
final List<Throwable> result = new ArrayList<Throwable>();
final List<Throwable> result = new ArrayList<>();
for (File jar : jars) {
try {
loadJAR(jar);
Expand Down Expand Up @@ -158,7 +158,7 @@ public synchronized boolean loadClassDir(File classDir) {
* @return list of exceptions raised during the loading.
*/
public synchronized Throwable[] loadClassDirs(File... classDirs) {
final List<Throwable> result = new ArrayList<Throwable>();
final List<Throwable> result = new ArrayList<>();
for (File classDir : classDirs) {
try {
loadClassDir(classDir);
Expand All @@ -178,14 +178,15 @@ public synchronized Throwable[] loadClassDirs(File... classDirs) {
* Loads all the JARs detected in a given directory.
*
* @param jarDir directory containing the JARs to be loaded.
* Example '/usr/local/apache-tomcat-7.0.72/webapps/apache-any23-service-2.2-SNAPSHOT/WEB-INF/lib/apache-any23-openie'
* @return <code>true</code> if all JARs in dir are loaded.
*/
public synchronized boolean loadJARDir(File jarDir) {
if(jarDir == null)
throw new NullPointerException("JAR dir must be not null.");
if( ! jarDir.exists() )
if(!jarDir.exists() )
throw new IllegalArgumentException("Given directory doesn't exist:" + jarDir.getAbsolutePath());
if(! jarDir.isDirectory() )
if(!jarDir.isDirectory() )
throw new IllegalArgumentException(
"given file exists and it is not a directory: " + jarDir.getAbsolutePath()
);
Expand All @@ -210,7 +211,7 @@ public boolean accept(File dir, String name) {
* @return list of errors occurred during loading.
*/
public synchronized Throwable[] loadFiles(File... files) {
final List<Throwable> errors = new ArrayList<Throwable>();
final List<Throwable> errors = new ArrayList<>();
for(File file : files) {
try {
if (file.isFile() && file.getName().endsWith(".jar")) {
Expand Down Expand Up @@ -263,6 +264,7 @@ public synchronized Iterator<Tool> getTools() throws IOException {
* @return not <code>null</code> list of plugin classes.
* @throws IOException if there is an error obtaining Extractors.
*/
@SuppressWarnings("rawtypes")
public synchronized Iterator<ExtractorFactory> getExtractors() throws IOException {
return getPlugins(ExtractorFactory.class);
}
Expand Down Expand Up @@ -312,7 +314,8 @@ public synchronized ExtractorGroup configureExtractors(

final StringBuilder report = new StringBuilder();
try {
final List<ExtractorFactory<?>> newFactoryList = new ArrayList<ExtractorFactory<?>>();
final List<ExtractorFactory<?>> newFactoryList = new ArrayList<>();
@SuppressWarnings("rawtypes")
Iterator<ExtractorFactory> extractors = getExtractors();
while (extractors.hasNext()) {
ExtractorFactory<?> factory = extractors.next();
Expand Down Expand Up @@ -386,7 +389,7 @@ public synchronized Iterator<Tool> getApplicableTools(File... pluginLocations) t
*/
private File[] getPluginLocations(String pluginDirsList) {
final String[] locationsStr = pluginDirsList.split(PLUGIN_DIRS_LIST_SEPARATOR);
final List<File> locations = new ArrayList<File>();
final List<File> locations = new ArrayList<>();
for(String locationStr : locationsStr) {
final File location = new File(locationStr);
if( ! location.exists()) {
Expand All @@ -404,16 +407,16 @@ private File[] getPluginLocations(String pluginDirsList) {
*/
private static final class DynamicClassLoader extends URLClassLoader {

private final Set<String> addedURLs = new HashSet<String>();
private final Set<String> addedURLs = new HashSet<>();

private final List<File> jars;

private final List<File> dirs;

public DynamicClassLoader(URL[] urls) {
super(urls, Any23PluginManager.class.getClassLoader());
jars = new ArrayList<File>();
dirs = new ArrayList<File>();
jars = new ArrayList<>();
dirs = new ArrayList<>();
}

public DynamicClassLoader() {
Expand Down
2 changes: 1 addition & 1 deletion cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23</artifactId>
<version>2.3-SNAPSHOT</version>
<version>2.2-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>

Expand Down
2 changes: 1 addition & 1 deletion core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23</artifactId>
<version>2.3-SNAPSHOT</version>
<version>2.2-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>

Expand Down
8 changes: 5 additions & 3 deletions core/src/main/java/org/apache/any23/Any23.java
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ public class Any23 {
* @param extractorGroup the group of extractors to be applied.
*/
public Any23(Configuration configuration, ExtractorGroup extractorGroup) {
if(configuration == null) throw new NullPointerException("configuration must be not null.");
if(configuration == null)
throw new NullPointerException("configuration must be not null.");
this.configuration = configuration;
logger.debug( configuration.getConfigurationDump() );

Expand Down Expand Up @@ -259,7 +260,8 @@ public void setMIMETypeDetector(MIMETypeDetector detector) {
* @throws IOException if an error occurs while initializing the internal {@link org.apache.any23.http.HTTPClient}.
*/
public DocumentSource createDocumentSource(String documentIRI) throws URISyntaxException, IOException {
if(documentIRI == null) throw new NullPointerException("documentIRI cannot be null.");
if(documentIRI == null)
throw new NullPointerException("documentIRI cannot be null.");
if (documentIRI.toLowerCase().startsWith("file:")) {
return new FileDocumentSource( new File(new URI(documentIRI)) );
}
Expand Down Expand Up @@ -453,7 +455,7 @@ public ExtractionReport extract(ExtractionParameters eps, DocumentSource in, Tri
}

private String getAcceptHeader() {
Collection<MIMEType> mimeTypes = new ArrayList<MIMEType>();
Collection<MIMEType> mimeTypes = new ArrayList<>();
for (ExtractorFactory<?> factory : factories) {
mimeTypes.addAll(factory.getSupportedMIMETypes());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,21 @@
* Singleton class acting as a register for all the various
* {@link Extractor}.
*/
@SuppressWarnings("rawtypes")
public class ExtractorRegistryImpl extends org.eclipse.rdf4j.common.lang.service.ServiceRegistry<String, ExtractorFactory> implements ExtractorRegistry {

/**
* The instance.
*/
private static ExtractorRegistry instance = null;

/**
* Public constructor for ExtractorRegistryImpl. Should normally call getInstance.
*/
public ExtractorRegistryImpl() {
super(ExtractorFactory.class);
}

/**
* The instance.
*/
private static ExtractorRegistry instance = null;

/**
* @return returns the {@link ExtractorRegistry} instance.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.CSV;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
Expand All @@ -38,6 +39,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.StringTokenizer;
import java.util.Iterator;

/**
* This extractor produces <i>RDF</i> from a <i>CSV file</i> .
Expand Down Expand Up @@ -77,17 +79,18 @@ public void run(

// build the parser
csvParser = CSVReaderBuilder.build(in);
Iterator<CSVRecord> rows = csvParser.iterator();

// get the header and generate the IRIs for column names
String[] header = csvParser.getLine();
CSVRecord header = rows.hasNext() ? rows.next() : null;
headerIRIs = processHeader(header, documentIRI);

// write triples to describe properties
writeHeaderPropertiesMetadata(header, out);

String[] nextLine;
int index = 0;
while ((nextLine = csvParser.getLine()) != null) {
while (rows.hasNext()) {
CSVRecord nextLine = rows.next();
IRI rowSubject = RDFUtils.iri(
documentIRI.toString(),
"row/" + index
Expand Down Expand Up @@ -151,17 +154,18 @@ private boolean isFloat(String number) {
* @param header
* @param out
*/
private void writeHeaderPropertiesMetadata(String[] header, ExtractionResult out) {
private void writeHeaderPropertiesMetadata(CSVRecord header, ExtractionResult out) {
int index = 0;
for (IRI singleHeader : headerIRIs) {
if (index > headerIRIs.length) {
break;
}
if (!RDFUtils.isAbsoluteIRI(header[index])) {
String headerString = header.get(index);
if (!RDFUtils.isAbsoluteIRI(headerString)) {
out.writeTriple(
singleHeader,
RDFS.LABEL,
SimpleValueFactory.getInstance().createLiteral(header[index])
SimpleValueFactory.getInstance().createLiteral(headerString)
);
}
out.writeTriple(
Expand All @@ -181,8 +185,11 @@ private void writeHeaderPropertiesMetadata(String[] header, ExtractionResult out
* @param header
* @return an array of {@link IRI}s identifying the column names.
*/
private IRI[] processHeader(String[] header, IRI documentIRI) {
IRI[] result = new IRI[header.length];
private IRI[] processHeader(CSVRecord header, IRI documentIRI) {
if (header == null)
return new IRI[0];

IRI[] result = new IRI[header.size()];
int index = 0;
for (String h : header) {
String candidate = h.trim();
Expand Down Expand Up @@ -222,7 +229,7 @@ private IRI normalize(String toBeNormalized, IRI documentIRI) {
*/
private void produceRowStatements(
IRI rowSubject,
String[] values,
CSVRecord values,
ExtractionResult out
) {
int index = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@
import org.apache.any23.extractor.rdf.JSONLDExtractorFactory;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.SINDICE;
import org.apache.commons.io.IOUtils;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
Expand Down Expand Up @@ -145,8 +147,7 @@ private Set<JSONLDScript> extractJSONLDScript(Document in,
for (int i = 0; i < attributes.getLength(); i++) {
if ("application/ld+json".equalsIgnoreCase(attributes.item(i).getTextContent())) {
extractor.run(extractionParameters, extractionContext,
DomUtils.nodeToInputStream(jsonldNode
.getFirstChild()), out);
IOUtils.toInputStream(jsonldNode.getTextContent(), StandardCharsets.UTF_8), out);
}
}
Node nameAttribute = attributes.getNamedItem("name");
Expand Down
Loading