Skip to content
This repository has been archived by the owner on Dec 11, 2019. It is now read-only.

Commit

Permalink
Recreated Geonames database
Browse files Browse the repository at this point in the history
  • Loading branch information
ymamakis committed Mar 18, 2016
1 parent 796af89 commit dcb76af
Show file tree
Hide file tree
Showing 32 changed files with 499 additions and 345 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,15 @@
: gmamakis, cesare Description: Map DBPedia to EDM-Agent -->

<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:edm="http://www.europeana.eu/schemas/edm/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:foaf="http://xmlns.com/foaf/0.1/"
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
xmlns:owl="http://www.w3.org/2002/07/owl#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdaGr2="http://rdvocab.info/ElementsGr2/"
xmlns:dbpedia-owl="http://dbpedia.org/ontology/"
xmlns:dbpprop="http://dbpedia.org/property/"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
>
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
xmlns:owl="http://www.w3.org/2002/07/owl#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:dbpedia-owl="http://dbpedia.org/ontology/"
xmlns:dbpprop="http://dbpedia.org/property/"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
>
<xsl:output indent="no"/>

<xsl:param name="rdf_about">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
<groupId>org.apache.jena</groupId>
<artifactId>apache-jena-libs</artifactId>
<version>2.11.1</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
Expand Down Expand Up @@ -49,43 +48,32 @@
<dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongo-java-driver</artifactId>
<version>2.7.2</version>
<scope>compile</scope>
<version>2.12.1</version>
</dependency>
<dependency>
<groupId>eu.europeana.enrichment</groupId>
<artifactId>enrichment-framework-common</artifactId>
<version>0.1-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>eu.europeana.corelib</groupId>
<artifactId>corelib-storage</artifactId>
<version>2.2</version>
<type>pom</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>cglib</groupId>
<artifactId>cglib-nodep</artifactId>
<version>3.1</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.thoughtworks.proxytoys</groupId>
<artifactId>proxytoys</artifactId>
<version>1.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.google.code.morphia</groupId>
<artifactId>morphia</artifactId>
<version>0.99.1-patched</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.mongojack</groupId>
Expand All @@ -111,8 +99,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,53 +1,36 @@
package eu.europeana.enrichment.harvester.database;

import java.io.IOException;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.logging.FileHandler;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.logging.SimpleFormatter;
import java.util.regex.Pattern;


import com.google.code.morphia.query.Query;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.update.UpdateLog;
import org.jibx.runtime.JiBXException;

import com.google.code.morphia.Datastore;
import com.google.code.morphia.Morphia;

import com.google.code.morphia.query.Query;
import com.google.code.morphia.query.UpdateOperations;
import com.google.code.morphia.query.UpdateResults;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.Mongo;
import com.mongodb.MongoException;
import com.mongodb.*;
import com.mongodb.util.JSON;

import eu.europeana.corelib.solr.entity.AgentImpl;
import eu.europeana.corelib.solr.entity.ConceptImpl;
import eu.europeana.enrichment.api.internal.AgentTermList;
import eu.europeana.enrichment.api.internal.ConceptTermList;
import eu.europeana.enrichment.api.internal.MongoTerm;
import eu.europeana.enrichment.api.internal.PlaceTermList;
import eu.europeana.enrichment.api.internal.TimespanTermList;
import eu.europeana.enrichment.api.internal.*;
import eu.europeana.enrichment.harvester.api.AgentMap;
import eu.europeana.enrichment.converters.*;
import org.apache.commons.lang.StringUtils;
import org.jibx.runtime.JiBXException;
import org.mongojack.DBQuery;
import org.mongojack.DBRef;
import org.mongojack.JacksonDBCollection;
import org.mongojack.WriteResult;

import java.io.IOException;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.logging.FileHandler;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.logging.SimpleFormatter;
import java.util.regex.Pattern;

public class DataManager {

private static final Logger log = Logger.getLogger(DataManager.class.getName());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
package eu.europeana.enrichment.harvester.dbpedia;

import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;

import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP;

import java.util.logging.Logger;

import eu.europeana.enrichment.harvester.database.DataManager;
import eu.europeana.enrichment.harvester.api.AgentMap;
import eu.europeana.enrichment.harvester.database.DataManager;
import org.apache.commons.lang.StringUtils;

import java.net.URISyntaxException;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.logging.Level;

import org.apache.commons.lang.StringUtils;
import java.util.logging.Logger;

public class DbPediaAgentsCollector {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,58 +1,29 @@
package eu.europeana.enrichment.harvester.dbpedia;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.swing.text.html.HTMLDocument.Iterator;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Element;



import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.RDFWriter;
import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP;

import eu.europeana.corelib.solr.entity.AgentImpl;
import eu.europeana.enrichment.converters.ContextualEntityToXmlConverter;
import eu.europeana.enrichment.harvester.api.AgentMap;
import eu.europeana.enrichment.harvester.database.DataManager;
import eu.europeana.enrichment.harvester.transform.edm.agent.AgentTransformer;
import eu.europeana.enrichment.harvester.transform.edm.concept.ConceptTransformer;
import eu.europeana.enrichment.harvester.util.MongoDataSerializer;
import eu.europeana.enrichment.converters.ContextualEntityToXmlConverter;
import org.w3c.dom.*;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.io.*;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;

public class DbPediaCollector {

Expand Down Expand Up @@ -327,7 +298,7 @@ public void printDBPediaConcepts() {


doc.appendChild(mainRootElement);
Set <String> conceptsSet = new HashSet<String>();
Set<String> conceptsSet = new HashSet<String>();

for(String line; (line = br.readLine()) != null; ) {
if (line!=null && line.trim().startsWith("http://dbpedia")){
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
package eu.europeana.enrichment.harvester.dbpedia;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.*;
import java.net.URL;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,17 @@
package eu.europeana.enrichment.harvester.dbpedia;

import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;

import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.*;
import com.hp.hpl.jena.rdf.model.Model;

import com.hp.hpl.jena.tdb.TDBFactory;

import java.util.logging.Logger;

import eu.europeana.enrichment.harvester.database.DataManager;
import eu.europeana.enrichment.harvester.api.AgentMap;
import eu.europeana.enrichment.harvester.database.DataManager;
import org.apache.commons.lang.StringUtils;

import java.net.URISyntaxException;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.logging.Level;

import org.apache.commons.lang.StringUtils;
import java.util.logging.Logger;

public class DbPediaLocalAgentsCollector {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,52 +1,30 @@
package eu.europeana.enrichment.harvester.freebase;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;

import org.apache.commons.httpclient.HttpException;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.ResultSetFormatter;
import com.hp.hpl.jena.rdf.model.InfModel;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelExtract;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFWriter;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.query.*;
import com.hp.hpl.jena.rdf.model.*;
import com.hp.hpl.jena.reasoner.Reasoner;
import com.hp.hpl.jena.reasoner.ReasonerRegistry;
import com.hp.hpl.jena.reasoner.ValidityReport;
import com.hp.hpl.jena.reasoner.rulesys.GenericRuleReasoner;
import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP;
import com.hp.hpl.jena.tdb.TDBFactory;
import com.hp.hpl.jena.util.FileManager;
import com.hp.hpl.jena.vocabulary.OWL;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.vocabulary.RDFS;

import eu.europeana.enrichment.harvester.api.AgentMap;
import eu.europeana.enrichment.harvester.database.DataManager;
import eu.europeana.enrichment.harvester.transform.edm.agent.AgentTransformer;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

public class FreeBaseCollector {

Expand Down
Loading

0 comments on commit dcb76af

Please sign in to comment.