-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #47 from kbss-cvut/41-rd4j-repository-not-initialized
[New] Migrate from rdf4j to graphdb + initialize repository
- Loading branch information
Showing
6 changed files
with
374 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
FROM ontotext/graphdb:10.6.3 | ||
|
||
# Override parent entrypoint | ||
ENTRYPOINT [] | ||
|
||
ENV GRAPHDB_HOME=/opt/graphdb/home | ||
ENV GRAPHDB_INSTALL_DIR=/opt/graphdb/dist | ||
|
||
WORKDIR ${GRAPHDB_HOME} | ||
|
||
# Install libs related to RDF processing | ||
### for arm64 | ||
RUN if command -v apt >/dev/null; then \ | ||
apt update && \ | ||
apt install -y python3-rdflib && \ | ||
apt install -y liburi-perl; \ | ||
fi | ||
|
||
### for amd64 | ||
RUN if command -v apk >/dev/null; then \ | ||
apk add py3-rdflib && \ | ||
apk add perl-uri; \ | ||
fi | ||
|
||
# Copy scripts | ||
COPY bin/* ${GRAPHDB_INSTALL_DIR}/bin/ | ||
|
||
EXPOSE 7200 | ||
|
||
# Assuming following input directories: | ||
# - /repo-config and data -- configuration ttl files to create repositories | ||
# - /root/graphdb-import -- files to import data to specific repositories | ||
CMD ${GRAPHDB_INSTALL_DIR}/bin/repo-init.sh /repo-config ${GRAPHDB_HOME} & ${GRAPHDB_INSTALL_DIR}/bin/graphdb -Dgraphdb.home=${GRAPHDB_HOME} -Dgraphdb.logback=${GRAPHDB_INSTALL_DIR}/conf/logback.xml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#!/usr/bin/python3 | ||
|
||
import sys | ||
from rdflib import Graph, URIRef | ||
|
||
def log(message): | ||
print("ERROR: " + message, file=sys.stderr) | ||
|
||
def check_params(): | ||
if len(sys.argv) != 3: | ||
log(f"""Illegal number of parameters. | ||
Script returns single value of <rdf-property-uri> from file specified by <rdf-file-path>. | ||
Usage: {sys.argv[0]} <rdf-file-path> <rdf-property-uri> | ||
Example: {sys.argv[0]} "./init-config/repo-config.ttl" "http://www.openrdf.org/config/repository#repositoryID" | ||
""") | ||
sys.exit(1) | ||
|
||
|
||
def check_property_has_single_value(results, rdf_property): | ||
if len(results) == 0: | ||
log(f"No values found for the specified property {rdf_property}.") | ||
sys.exit(2) | ||
elif len(results) > 1: | ||
error_message = f"Multiple values found for the property {rdf_property}. Triple that match pattern '?s <{rdf_property}> ?o' are:\n" | ||
for row in results: | ||
subject, value = row | ||
error_message += f" {subject} {rdf_property} {value} .\n" | ||
log(error_message) | ||
sys.exit(3) | ||
|
||
def load_rdf_graph(file_path): | ||
# Load RDF file into an RDFLib graph | ||
g = Graph() | ||
|
||
# Explicitly specify the format based on the file extension | ||
if file_path.endswith(".ttl"): | ||
g.parse(file_path, format="turtle") | ||
elif file_path.endswith(".rdf"): | ||
g.parse(file_path, format="xml") | ||
else: | ||
log(f"Unsupported RDF file format of {file_path}.") | ||
sys.exit(1) | ||
return g | ||
|
||
def main(): | ||
check_params() | ||
|
||
file_path = sys.argv[1] | ||
rdf_property = URIRef(sys.argv[2]) | ||
|
||
g = load_rdf_graph(file_path) | ||
|
||
# Query for subjects with the specified property | ||
query = f""" | ||
SELECT ?subject ?value | ||
WHERE {{ | ||
?subject <{rdf_property}> ?value. | ||
}} | ||
""" | ||
results = g.query(query) | ||
|
||
check_property_has_single_value(results, rdf_property) | ||
|
||
for row in results: | ||
subject, value = row | ||
print(f"{value}") | ||
|
||
if __name__ == "__main__": | ||
main() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/bin/sh | ||
|
||
# | ||
# Initializes GraphDB repositories (the repositories are created if they do not exist yet and some of the data are replaced) | ||
# | ||
|
||
SOURCE_DIR=$1 | ||
GRAPHDB_HOME=$2 | ||
|
||
SCRIPT_DIR="`dirname $0`" | ||
|
||
echo "INFO: Running initializer for GraphDB repositories ..." | ||
|
||
# Wait for GraphDB to start up | ||
echo "INFO: Waiting for GraphDB to start up..." | ||
sleep 15s | ||
|
||
ls ${SOURCE_DIR}/*-config.ttl | while read REPO_CONFIG_FILE; do | ||
|
||
REPO_NAME=`$SCRIPT_DIR/get-value-of-rdf-property.py $REPO_CONFIG_FILE 'http://www.openrdf.org/config/repository#repositoryID'` | ||
|
||
if [ -z "$REPO_NAME" ]; then | ||
echo "ERROR: Could not parse repository name from file $REPO_CONFIG_FILE" | ||
exit 1 | ||
fi | ||
|
||
if [ ! -d ${GRAPHDB_HOME}/data/repositories/${REPO_NAME} ] || [ -z "$(ls -A ${GRAPHDB_HOME})/data/repositories/${REPO_NAME}" ]; then | ||
echo "INFO: Initializing repository $REPO_NAME..." | ||
|
||
# Create repository based on configuration | ||
echo "INFO: Creating repository $REPO_NAME..." | ||
curl -X POST --header "Content-Type: multipart/form-data" -F "config=@${REPO_CONFIG_FILE}" "http://localhost:7200/rest/repositories" | ||
echo "INFO: Repository $REPO_NAME successfully initialized." | ||
else | ||
echo "INFO: Repository $REPO_NAME already exists. Skipping initialization..." | ||
fi | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
<configuration debug="false" scan="true" scanPeriod="30 seconds"> | ||
<jmxConfigurator /> | ||
|
||
<!-- Try to guess the logs destination based on the application server or fallback to a default logs directory(embedded mode) | ||
NOTE: We are using a really old version of logback so we have to use p().isEmpty instead of isDefined --> | ||
|
||
<if condition='p("logDestinationDirectory").isEmpty()'> | ||
<then> | ||
<!-- catalina.base if we are running on tomcat --> | ||
<if condition='!p("catalina.base").isEmpty()'> | ||
<then> | ||
<property name="logDestinationDirectory" value="${catalina.base}/logs/graphdb"/> | ||
</then> | ||
<else> | ||
<!-- jetty.base if we are running on tomcat --> | ||
<if condition='!p("jetty.base").isEmpty()'> | ||
<then> | ||
<property name="logDestinationDirectory" value="${jetty.base}/logs/graphdb"/> | ||
</then> | ||
<else> | ||
<!-- we are running in embedded mode --> | ||
<property name="logDestinationDirectory" value="logs"/> | ||
</else> | ||
</if> | ||
</else> | ||
</if> | ||
</then> | ||
</if> | ||
|
||
<property name="defaultPattern" value="[%-5p] %d{ISO8601} [%t | %c{5}]%X{headers} %m%n%ex" /> | ||
<property name="encoding" value="UTF-8" /> | ||
|
||
<!-- Audit log. Contains security related things --> | ||
<appender name="AuditLog" class="ch.qos.logback.core.rolling.RollingFileAppender"> | ||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> | ||
<fileNamePattern>${logDestinationDirectory}/audit-log-%d{yyyy-MM-dd}.log</fileNamePattern> | ||
</rollingPolicy> | ||
<encoder> | ||
<pattern>${defaultPattern}</pattern> | ||
<charset>${encoding}</charset> | ||
</encoder> | ||
</appender> | ||
|
||
<appender name="MainLog" class="ch.qos.logback.core.rolling.RollingFileAppender"> | ||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> | ||
<fileNamePattern>${logDestinationDirectory}/main-%d{yyyy-MM-dd}.log</fileNamePattern> | ||
</rollingPolicy> | ||
<encoder> | ||
<pattern>${defaultPattern}</pattern> | ||
<charset>${encoding}</charset> | ||
</encoder> | ||
</appender> | ||
|
||
<appender name="ErrorLog" class="ch.qos.logback.core.rolling.RollingFileAppender"> | ||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> | ||
<fileNamePattern>${logDestinationDirectory}/error-%d{yyyy-MM-dd}.log</fileNamePattern> | ||
</rollingPolicy> | ||
<encoder> | ||
<pattern>${defaultPattern}</pattern> | ||
<charset>${encoding}</charset> | ||
</encoder> | ||
<filter class="ch.qos.logback.classic.filter.ThresholdFilter"> | ||
<level>ERROR</level> | ||
</filter> | ||
</appender> | ||
|
||
<appender name="QueryLog" class="ch.qos.logback.core.rolling.RollingFileAppender"> | ||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> | ||
<fileNamePattern>${logDestinationDirectory}/query-log-%d{yyyy-MM-dd}.log</fileNamePattern> | ||
</rollingPolicy> | ||
<encoder> | ||
<pattern>${defaultPattern}</pattern> | ||
<charset>${encoding}</charset> | ||
</encoder> | ||
</appender> | ||
|
||
<appender name="SlowQueryLog" class="ch.qos.logback.core.rolling.RollingFileAppender"> | ||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> | ||
<fileNamePattern>${logDestinationDirectory}/slow-query-log-%d{yyyy-MM-dd}.log</fileNamePattern> | ||
</rollingPolicy> | ||
<encoder> | ||
<pattern>${defaultPattern}</pattern> | ||
<charset>${encoding}</charset> | ||
</encoder> | ||
</appender> | ||
|
||
<if condition='!p("graphdb.foreground").isEmpty()'> | ||
<then> | ||
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender"> | ||
<encoder> | ||
<pattern>${defaultPattern}</pattern> | ||
</encoder> | ||
</appender> | ||
</then> | ||
</if> | ||
|
||
<!-- Log all repository, user creations, modifications and deletions. Also logs successful or not attempts to | ||
login into system. Updates, queries and imports to repository. Set to "INFO" level for logging all former updates. | ||
Will log exceptions on "ERROR" level. Additivity is set to "false" and called first to prevent the messages | ||
from cluttering the other logs. --> | ||
<logger name="com.ontotext.forest.security.audit.AuditLoggingFilter" level="WARN" additivity="false"> | ||
<appender-ref ref="AuditLog"/> | ||
</logger> | ||
|
||
<!-- Log update operations on workers. Set to "INFO" level by default for logging all updates in workers' QueryLog. | ||
Will log exceptions on "ERROR" level. Additivity is set to "false" and called first to prevent the messages | ||
from cluttering the other logs.--> | ||
<logger name="com.ontotext.trree.monitorRepository.MonitorRepositoryConnection" level="WARN" additivity="false"> | ||
<appender-ref ref="QueryLog"/> | ||
</logger> | ||
|
||
<!-- Log querry operations on the repository. Set to "DEBUG" level for logging all querries. Will log exceptions on "ERROR" | ||
level. Additivity is set to "false" to prevent the messages from cluttering the other logs. --> | ||
<logger name="com.ontotext.trree.query.LoggingClosableIteration" level="WARN" additivity="false"> | ||
<appender-ref ref="QueryLog"/> | ||
</logger> | ||
|
||
<!-- Log slow queries on "INFO" level. Queries are deemed "slow" if they take more than "SlowOpThresholdMs" from the | ||
RepositorySettings property. Set the level to "OFF" to stop this log. Additivity is set to "false" to prevent the messages | ||
from cluttering the other logs. --> | ||
<logger name="slow-queries" level="ERROR" additivity="false"> | ||
<appender-ref ref="SlowQueryLog"/> | ||
</logger> | ||
|
||
<root> | ||
<level value="${graphdb.logger.root.level:-WARN}"/> | ||
<appender-ref ref="MainLog"/> | ||
<appender-ref ref="ErrorLog" /> | ||
<if condition='!p("graphdb.foreground").isEmpty()'> | ||
<then> | ||
<appender-ref ref="STDOUT"/> | ||
</then> | ||
</if> | ||
|
||
</root> | ||
|
||
<!-- Make some of the more verbose loggers less chatty --> | ||
<logger name="org.springframework" level="WARN"/> | ||
<logger name="org.apache" level="WARN"/> | ||
<logger name="com.github.ziplet" level="WARN"/> | ||
<logger name="springfox.documentation" level="WARN"/> | ||
|
||
<!-- OpenRefine loggers be less verbose --> | ||
<logger name="CsvExporter" level="WARN"/> | ||
<logger name="FileProjectManager" level="WARN"/> | ||
<logger name="HistoryEntry" level="WARN"/> | ||
<logger name="ImportingParserBase" level="WARN"/> | ||
<logger name="JsonParser" level="WARN"/> | ||
<logger name="ProjectManager" level="WARN"/> | ||
<logger name="RecordModel" level="WARN"/> | ||
<logger name="TreeImportUtilities" level="WARN"/> | ||
<logger name="velocity" level="WARN"/> | ||
<logger name="Velocity.*" level="WARN"/> | ||
<logger name="XmlImportUtilities" level="WARN"/> | ||
<logger name="binning_clusterer" level="WARN"/> | ||
<logger name="butterfly" level="WARN"/> | ||
<logger name="butterfly.*" level="WARN"/> | ||
<logger name="command" level="WARN"/> | ||
<logger name="compute-clusters_command" level="WARN"/> | ||
<logger name="create-import-job_command" level="WARN"/> | ||
<logger name="create-project_command" level="WARN"/> | ||
<logger name="get-scatterplot_command" level="WARN"/> | ||
<logger name="import-project_command" level="WARN"/> | ||
<logger name="importing" level="WARN"/> | ||
<logger name="importing-controller_command" level="WARN"/> | ||
<logger name="importing-utilities" level="WARN"/> | ||
<logger name="javascript" level="WARN"/> | ||
<logger name="kNN_clusterer" level="WARN"/> | ||
<logger name="open" level="WARN"/> | ||
<logger name="office" level="WARN"/> | ||
<logger name="project" level="WARN"/> | ||
<logger name="project_metadata" level="WARN"/> | ||
<logger name="project_metadata_utilities" level="WARN"/> | ||
<logger name="project_utilities" level="WARN"/> | ||
<logger name="recon-config" level="WARN"/> | ||
<logger name="recon-operation" level="WARN"/> | ||
<logger name="refine" level="WARN"/> | ||
<logger name="refine-standard-recon" level="WARN"/> | ||
<logger name="refine_clientSideResourceManager" level="WARN"/> | ||
<logger name="scatterplot_facet" level="WARN"/> | ||
|
||
<!-- GeoSPAQRL related deps be less verbose --> | ||
<logger name="hsqldb.*" level="WARN"/> | ||
<logger name="org.geotoolkit.*" level="WARN"/> | ||
|
||
<!-- SemanticVectors related logger be less verbose --> | ||
<logger name="pitt.search.semanticvectors.DocVectors" level="WARN"/> | ||
</configuration> |
33 changes: 33 additions & 0 deletions
33
deploy/db-server/init-config/s-pipes-hello-world-config.ttl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . | ||
@prefix rep: <http://www.openrdf.org/config/repository#> . | ||
@prefix sail: <http://www.openrdf.org/config/sail#> . | ||
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . | ||
@prefix graphdb: <http://www.ontotext.com/config/graphdb#>. | ||
|
||
<#s-pipes-hello-world> a rep:Repository; | ||
rep:repositoryID "s-pipes-hello-world"; | ||
rep:repositoryImpl [ | ||
rep:repositoryType "graphdb:SailRepository"; | ||
<http://www.openrdf.org/config/repository/sail#sailImpl> [ | ||
graphdb:base-URL "http://example.org/owlim#"; | ||
graphdb:check-for-inconsistencies "false"; | ||
graphdb:defaultNS ""; | ||
graphdb:disable-sameAs "true"; | ||
graphdb:enable-context-index "true"; | ||
graphdb:enable-literal-index "true"; | ||
graphdb:enablePredicateList "true"; | ||
graphdb:entity-id-size "32"; | ||
graphdb:entity-index-size "10000000"; | ||
graphdb:imports ""; | ||
graphdb:in-memory-literal-properties "true"; | ||
graphdb:owlim-license ""; | ||
graphdb:query-limit-results "0"; | ||
graphdb:query-timeout "0"; | ||
graphdb:read-only "false"; | ||
graphdb:repository-type "file-repository"; | ||
graphdb:storage-folder "storage"; | ||
graphdb:throw-QueryEvaluationException-on-timeout "false"; | ||
sail:sailType "graphdb:Sail" | ||
] | ||
]; | ||
rdfs:label "SPipes Hello World repository" . |
Oops, something went wrong.