Skip to content

Commit

Permalink
[New] Migrate from rdf4j to graphdb + initialize repository
Browse files Browse the repository at this point in the history
  • Loading branch information
blcham committed Aug 9, 2024
1 parent 8f94dee commit e8ae31a
Show file tree
Hide file tree
Showing 6 changed files with 374 additions and 22 deletions.
33 changes: 33 additions & 0 deletions deploy/db-server/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
FROM ontotext/graphdb:10.6.3

# Override parent entrypoint
ENTRYPOINT []

ENV GRAPHDB_HOME=/opt/graphdb/home
ENV GRAPHDB_INSTALL_DIR=/opt/graphdb/dist

WORKDIR ${GRAPHDB_HOME}

# Install libs related to RDF processing
### for arm64
RUN if command -v apt >/dev/null; then \
apt update && \
apt install -y python3-rdflib && \
apt install -y liburi-perl; \
fi

### for amd64
RUN if command -v apk >/dev/null; then \
apk add py3-rdflib && \
apk add perl-uri; \
fi

# Copy scripts
COPY bin/* ${GRAPHDB_INSTALL_DIR}/bin/

EXPOSE 7200

# Assuming following input directories:
# - /repo-config and data -- configuration ttl files to create repositories
# - /root/graphdb-import -- files to import data to specific repositories
CMD ${GRAPHDB_INSTALL_DIR}/bin/repo-init.sh /repo-config ${GRAPHDB_HOME} & ${GRAPHDB_INSTALL_DIR}/bin/graphdb -Dgraphdb.home=${GRAPHDB_HOME} -Dgraphdb.logback=${GRAPHDB_INSTALL_DIR}/conf/logback.xml
73 changes: 73 additions & 0 deletions deploy/db-server/bin/get-value-of-rdf-property.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/usr/bin/python3

import sys
from rdflib import Graph, URIRef

def log(message):
print("ERROR: " + message, file=sys.stderr)

def check_params():
if len(sys.argv) != 3:
log(f"""Illegal number of parameters.
Script returns single value of <rdf-property-uri> from file specified by <rdf-file-path>.
Usage: {sys.argv[0]} <rdf-file-path> <rdf-property-uri>
Example: {sys.argv[0]} "./init-config/repo-config.ttl" "http://www.openrdf.org/config/repository#repositoryID"
""")
sys.exit(1)


def check_property_has_single_value(results, rdf_property):
if len(results) == 0:
log(f"No values found for the specified property {rdf_property}.")
sys.exit(2)
elif len(results) > 1:
error_message = f"Multiple values found for the property {rdf_property}. Triple that match pattern '?s <{rdf_property}> ?o' are:\n"
for row in results:
subject, value = row
error_message += f" {subject} {rdf_property} {value} .\n"
log(error_message)
sys.exit(3)

def load_rdf_graph(file_path):
# Load RDF file into an RDFLib graph
g = Graph()

# Explicitly specify the format based on the file extension
if file_path.endswith(".ttl"):
g.parse(file_path, format="turtle")
elif file_path.endswith(".rdf"):
g.parse(file_path, format="xml")
else:
log(f"Unsupported RDF file format of {file_path}.")
sys.exit(1)
return g

def main():
check_params()

file_path = sys.argv[1]
rdf_property = URIRef(sys.argv[2])

g = load_rdf_graph(file_path)

# Query for subjects with the specified property
query = f"""
SELECT ?subject ?value
WHERE {{
?subject <{rdf_property}> ?value.
}}
"""
results = g.query(query)

check_property_has_single_value(results, rdf_property)

for row in results:
subject, value = row
print(f"{value}")

if __name__ == "__main__":
main()

37 changes: 37 additions & 0 deletions deploy/db-server/bin/repo-init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/sh

#
# Initializes GraphDB repositories (the repositories are created if they do not exist yet and some of the data are replaced)
#

SOURCE_DIR=$1
GRAPHDB_HOME=$2

SCRIPT_DIR="`dirname $0`"

echo "INFO: Running initializer for GraphDB repositories ..."

# Wait for GraphDB to start up
echo "INFO: Waiting for GraphDB to start up..."
sleep 15s

ls ${SOURCE_DIR}/*-config.ttl | while read REPO_CONFIG_FILE; do

REPO_NAME=`$SCRIPT_DIR/get-value-of-rdf-property.py $REPO_CONFIG_FILE 'http://www.openrdf.org/config/repository#repositoryID'`

if [ -z "$REPO_NAME" ]; then
echo "ERROR: Could not parse repository name from file $REPO_CONFIG_FILE"
exit 1
fi

if [ ! -d ${GRAPHDB_HOME}/data/repositories/${REPO_NAME} ] || [ -z "$(ls -A ${GRAPHDB_HOME})/data/repositories/${REPO_NAME}" ]; then
echo "INFO: Initializing repository $REPO_NAME..."

# Create repository based on configuration
echo "INFO: Creating repository $REPO_NAME..."
curl -X POST --header "Content-Type: multipart/form-data" -F "config=@${REPO_CONFIG_FILE}" "http://localhost:7200/rest/repositories"
echo "INFO: Repository $REPO_NAME successfully initialized."
else
echo "INFO: Repository $REPO_NAME already exists. Skipping initialization..."
fi
done
188 changes: 188 additions & 0 deletions deploy/db-server/conf/logback.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
<configuration debug="false" scan="true" scanPeriod="30 seconds">
<jmxConfigurator />

<!-- Try to guess the logs destination based on the application server or fallback to a default logs directory(embedded mode)
NOTE: We are using a really old version of logback so we have to use p().isEmpty instead of isDefined -->

<if condition='p("logDestinationDirectory").isEmpty()'>
<then>
<!-- catalina.base if we are running on tomcat -->
<if condition='!p("catalina.base").isEmpty()'>
<then>
<property name="logDestinationDirectory" value="${catalina.base}/logs/graphdb"/>
</then>
<else>
<!-- jetty.base if we are running on tomcat -->
<if condition='!p("jetty.base").isEmpty()'>
<then>
<property name="logDestinationDirectory" value="${jetty.base}/logs/graphdb"/>
</then>
<else>
<!-- we are running in embedded mode -->
<property name="logDestinationDirectory" value="logs"/>
</else>
</if>
</else>
</if>
</then>
</if>

<property name="defaultPattern" value="[%-5p] %d{ISO8601} [%t | %c{5}]%X{headers} %m%n%ex" />
<property name="encoding" value="UTF-8" />

<!-- Audit log. Contains security related things -->
<appender name="AuditLog" class="ch.qos.logback.core.rolling.RollingFileAppender">
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${logDestinationDirectory}/audit-log-%d{yyyy-MM-dd}.log</fileNamePattern>
</rollingPolicy>
<encoder>
<pattern>${defaultPattern}</pattern>
<charset>${encoding}</charset>
</encoder>
</appender>

<appender name="MainLog" class="ch.qos.logback.core.rolling.RollingFileAppender">
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${logDestinationDirectory}/main-%d{yyyy-MM-dd}.log</fileNamePattern>
</rollingPolicy>
<encoder>
<pattern>${defaultPattern}</pattern>
<charset>${encoding}</charset>
</encoder>
</appender>

<appender name="ErrorLog" class="ch.qos.logback.core.rolling.RollingFileAppender">
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${logDestinationDirectory}/error-%d{yyyy-MM-dd}.log</fileNamePattern>
</rollingPolicy>
<encoder>
<pattern>${defaultPattern}</pattern>
<charset>${encoding}</charset>
</encoder>
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>ERROR</level>
</filter>
</appender>

<appender name="QueryLog" class="ch.qos.logback.core.rolling.RollingFileAppender">
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${logDestinationDirectory}/query-log-%d{yyyy-MM-dd}.log</fileNamePattern>
</rollingPolicy>
<encoder>
<pattern>${defaultPattern}</pattern>
<charset>${encoding}</charset>
</encoder>
</appender>

<appender name="SlowQueryLog" class="ch.qos.logback.core.rolling.RollingFileAppender">
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${logDestinationDirectory}/slow-query-log-%d{yyyy-MM-dd}.log</fileNamePattern>
</rollingPolicy>
<encoder>
<pattern>${defaultPattern}</pattern>
<charset>${encoding}</charset>
</encoder>
</appender>

<if condition='!p("graphdb.foreground").isEmpty()'>
<then>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>${defaultPattern}</pattern>
</encoder>
</appender>
</then>
</if>

<!-- Log all repository, user creations, modifications and deletions. Also logs successful or not attempts to
login into system. Updates, queries and imports to repository. Set to "INFO" level for logging all former updates.
Will log exceptions on "ERROR" level. Additivity is set to "false" and called first to prevent the messages
from cluttering the other logs. -->
<logger name="com.ontotext.forest.security.audit.AuditLoggingFilter" level="WARN" additivity="false">
<appender-ref ref="AuditLog"/>
</logger>

<!-- Log update operations on workers. Set to "INFO" level by default for logging all updates in workers' QueryLog.
Will log exceptions on "ERROR" level. Additivity is set to "false" and called first to prevent the messages
from cluttering the other logs.-->
<logger name="com.ontotext.trree.monitorRepository.MonitorRepositoryConnection" level="WARN" additivity="false">
<appender-ref ref="QueryLog"/>
</logger>

<!-- Log querry operations on the repository. Set to "DEBUG" level for logging all querries. Will log exceptions on "ERROR"
level. Additivity is set to "false" to prevent the messages from cluttering the other logs. -->
<logger name="com.ontotext.trree.query.LoggingClosableIteration" level="WARN" additivity="false">
<appender-ref ref="QueryLog"/>
</logger>

<!-- Log slow queries on "INFO" level. Queries are deemed "slow" if they take more than "SlowOpThresholdMs" from the
RepositorySettings property. Set the level to "OFF" to stop this log. Additivity is set to "false" to prevent the messages
from cluttering the other logs. -->
<logger name="slow-queries" level="ERROR" additivity="false">
<appender-ref ref="SlowQueryLog"/>
</logger>

<root>
<level value="${graphdb.logger.root.level:-WARN}"/>
<appender-ref ref="MainLog"/>
<appender-ref ref="ErrorLog" />
<if condition='!p("graphdb.foreground").isEmpty()'>
<then>
<appender-ref ref="STDOUT"/>
</then>
</if>

</root>

<!-- Make some of the more verbose loggers less chatty -->
<logger name="org.springframework" level="WARN"/>
<logger name="org.apache" level="WARN"/>
<logger name="com.github.ziplet" level="WARN"/>
<logger name="springfox.documentation" level="WARN"/>

<!-- OpenRefine loggers be less verbose -->
<logger name="CsvExporter" level="WARN"/>
<logger name="FileProjectManager" level="WARN"/>
<logger name="HistoryEntry" level="WARN"/>
<logger name="ImportingParserBase" level="WARN"/>
<logger name="JsonParser" level="WARN"/>
<logger name="ProjectManager" level="WARN"/>
<logger name="RecordModel" level="WARN"/>
<logger name="TreeImportUtilities" level="WARN"/>
<logger name="velocity" level="WARN"/>
<logger name="Velocity.*" level="WARN"/>
<logger name="XmlImportUtilities" level="WARN"/>
<logger name="binning_clusterer" level="WARN"/>
<logger name="butterfly" level="WARN"/>
<logger name="butterfly.*" level="WARN"/>
<logger name="command" level="WARN"/>
<logger name="compute-clusters_command" level="WARN"/>
<logger name="create-import-job_command" level="WARN"/>
<logger name="create-project_command" level="WARN"/>
<logger name="get-scatterplot_command" level="WARN"/>
<logger name="import-project_command" level="WARN"/>
<logger name="importing" level="WARN"/>
<logger name="importing-controller_command" level="WARN"/>
<logger name="importing-utilities" level="WARN"/>
<logger name="javascript" level="WARN"/>
<logger name="kNN_clusterer" level="WARN"/>
<logger name="open" level="WARN"/>
<logger name="office" level="WARN"/>
<logger name="project" level="WARN"/>
<logger name="project_metadata" level="WARN"/>
<logger name="project_metadata_utilities" level="WARN"/>
<logger name="project_utilities" level="WARN"/>
<logger name="recon-config" level="WARN"/>
<logger name="recon-operation" level="WARN"/>
<logger name="refine" level="WARN"/>
<logger name="refine-standard-recon" level="WARN"/>
<logger name="refine_clientSideResourceManager" level="WARN"/>
<logger name="scatterplot_facet" level="WARN"/>

<!-- GeoSPAQRL related deps be less verbose -->
<logger name="hsqldb.*" level="WARN"/>
<logger name="org.geotoolkit.*" level="WARN"/>

<!-- SemanticVectors related logger be less verbose -->
<logger name="pitt.search.semanticvectors.DocVectors" level="WARN"/>
</configuration>
33 changes: 33 additions & 0 deletions deploy/db-server/init-config/s-pipes-hello-world-config.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix rep: <http://www.openrdf.org/config/repository#> .
@prefix sail: <http://www.openrdf.org/config/sail#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix graphdb: <http://www.ontotext.com/config/graphdb#>.

<#s-pipes-hello-world> a rep:Repository;
rep:repositoryID "s-pipes-hello-world";
rep:repositoryImpl [
rep:repositoryType "graphdb:SailRepository";
<http://www.openrdf.org/config/repository/sail#sailImpl> [
graphdb:base-URL "http://example.org/owlim#";
graphdb:check-for-inconsistencies "false";
graphdb:defaultNS "";
graphdb:disable-sameAs "true";
graphdb:enable-context-index "true";
graphdb:enable-literal-index "true";
graphdb:enablePredicateList "true";
graphdb:entity-id-size "32";
graphdb:entity-index-size "10000000";
graphdb:imports "";
graphdb:in-memory-literal-properties "true";
graphdb:owlim-license "";
graphdb:query-limit-results "0";
graphdb:query-timeout "0";
graphdb:read-only "false";
graphdb:repository-type "file-repository";
graphdb:storage-folder "storage";
graphdb:throw-QueryEvaluationException-on-timeout "false";
sail:sailType "graphdb:Sail"
]
];
rdfs:label "SPipes Hello World repository" .
Loading

0 comments on commit e8ae31a

Please sign in to comment.