Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: colocate query and stats updates #29

Merged
merged 1 commit into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 9 additions & 27 deletions src/main/java/com/neo4j/data/importer/GedcomImporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,44 +38,26 @@ public Stream<Statistics> loadGedcom(@Name("file") String file) throws IOExcepti
var model = loadModel(filePath);

var dateParser = new Parser();
var personExtractors = new PersonExtractors(dateParser, model);
var statistics = new Statistics();
try (Transaction tx = db.beginTx()) {

var personExtractors = new PersonExtractors(dateParser, model);
model.getPeople().forEach(person -> {
var attributes = personExtractors.get().apply(person);
var personsStats = tx.execute("CREATE (i:Person) SET i = $attributes", Map.of("attributes", attributes))
var personExtractor = personExtractors.get();
var attributes = personExtractor.apply(person);
var personsStats = tx.execute(personExtractor.query(), Map.of("attributes", attributes))
.getQueryStatistics();

statistics.addNodesCreated(personsStats.getNodesCreated());
personExtractor.updateCounters(personsStats, statistics);
});

var familyExtractors = new FamilyExtractors(dateParser);
model.getFamilies().forEach(family -> {
var attributes = familyExtractors.get().apply(family);
var stats = tx.execute(
"""
UNWIND $spouseIdPairs AS spouseInfo
MATCH (spouse1:Person {id: spouseInfo.id1}),
(spouse2:Person {id: spouseInfo.id2})
CREATE (spouse1)-[r:SPOUSE_OF]->(spouse2)
FOREACH (marriageInfo IN spouseInfo.events["MARR"] |
CREATE (spouse1)-[r:MARRIED_TO]->(spouse2)
SET r = marriageInfo
)
FOREACH (divorceInfo IN spouseInfo.events["DIV"] |
CREATE (spouse1)-[r:DIVORCED]->(spouse2)
SET r = divorceInfo
)
WITH spouse1, spouse2
UNWIND $childIds AS childId
MATCH (child:Person {id: childId})
CREATE (child)-[:CHILD_OF]->(spouse1)
CREATE (child)-[:CHILD_OF]->(spouse2)
""",
attributes)
var familyExtractor = familyExtractors.get();
var familyStats = tx.execute(familyExtractor.query(), familyExtractor.apply(family))
.getQueryStatistics();

statistics.addRelationshipsCreated(stats.getRelationshipsCreated());
familyExtractor.updateCounters(familyStats, statistics);
});

tx.commit();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
package com.neo4j.data.importer.extractors;

import com.neo4j.data.importer.Statistics;
import java.util.Map;
import java.util.function.Function;
import org.neo4j.graphdb.QueryStatistics;

public interface AttributeExtractor<T> extends Function<T, Map<String, Object>> {}
public interface AttributeExtractor<T> extends Function<T, Map<String, Object>> {
String query();

void updateCounters(QueryStatistics results, Statistics counters);
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package com.neo4j.data.importer.extractors;

import com.neo4j.data.importer.Statistics;
import com.neo4j.data.importer.extractors.Lists.Pair;
import java.util.List;
import java.util.Map;
import org.folg.gedcom.model.Family;
import org.neo4j.graphdb.QueryStatistics;

public interface FamilyExtractor extends AttributeExtractor<Family> {

Expand All @@ -13,6 +15,28 @@ public interface FamilyExtractor extends AttributeExtractor<Family> {

List<String> childReferences(Family family);

default String query() {
return """
UNWIND $spouseIdPairs AS spouseInfo
MATCH (spouse1:Person {id: spouseInfo.id1}),
(spouse2:Person {id: spouseInfo.id2})
CREATE (spouse1)-[r:SPOUSE_OF]->(spouse2)
FOREACH (marriageInfo IN spouseInfo.events["MARR"] |
CREATE (spouse1)-[r:MARRIED_TO]->(spouse2)
SET r = marriageInfo
)
FOREACH (divorceInfo IN spouseInfo.events["DIV"] |
CREATE (spouse1)-[r:DIVORCED]->(spouse2)
SET r = divorceInfo
)
WITH spouse1, spouse2
UNWIND $childIds AS childId
MATCH (child:Person {id: childId})
CREATE (child)-[:CHILD_OF]->(spouse1)
CREATE (child)-[:CHILD_OF]->(spouse2)
""";
}

default Map<String, Object> apply(Family family) {
var familyEvents = familyEvents(family);
var spouseInfo = spouseReferences(family).stream()
Expand All @@ -23,4 +47,8 @@ default Map<String, Object> apply(Family family) {
.toList();
return Map.of("spouseIdPairs", spouseInfo, "childIds", childReferences(family));
}

default void updateCounters(QueryStatistics results, Statistics counters) {
counters.addRelationshipsCreated(results.getRelationshipsCreated());
}
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package com.neo4j.data.importer.extractors;

import com.neo4j.data.importer.Statistics;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.folg.gedcom.model.Person;
import org.neo4j.graphdb.QueryStatistics;

interface PersonExtractor extends AttributeExtractor<Person> {

Expand All @@ -22,6 +24,10 @@ default Optional<String> preferredFirstName(Person person) {
return Optional.empty();
}

default String query() {
return "CREATE (i:Person) SET i = $attributes";
}

default Map<String, Object> apply(Person person) {
Map<String, Object> attributes = new HashMap<>(facts(person));
attributes.put("id", id(person));
Expand All @@ -31,4 +37,8 @@ default Map<String, Object> apply(Person person) {
preferredFirstName(person).ifPresent(gender -> attributes.put("preferred_first_name", gender));
return attributes;
}

default void updateCounters(QueryStatistics results, Statistics counters) {
counters.addNodesCreated(results.getNodesCreated());
}
}