Skip to content

Commit

Permalink
[#2917] Modify Solr to not use auto-created fields to prevent unneces…
Browse files Browse the repository at this point in the history
…sary tokenization (#11323)
  • Loading branch information
wkurniawan07 authored Aug 3, 2021
1 parent 236913f commit c3def1a
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 17 deletions.
8 changes: 6 additions & 2 deletions solr/solr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@

# Create core to run students collection
bin/solr create -c students -s 2 -rf 2
curl -X POST -H "Content-Type: application/json" --data-binary '{"add-copy-field": {"source": "*", "dest": "_text_"}}' localhost:8983/solr/students/schema
bin/solr config -c students -p 8983 -action set-user-property -property update.autoCreateFields -value false
curl -X POST -H 'Content-type: application/json' --data-binary '{"add-field": {"name": "courseId", "type": "string"}}' localhost:8983/solr/students/schema
curl -X POST -H 'Content-type: application/json' --data-binary '{"add-field": {"name": "email", "type": "string"}}' localhost:8983/solr/students/schema

# Create core to run instructors collection
bin/solr create -c instructors -s 2 -rf 2
curl -X POST -H "Content-Type: application/json" --data-binary '{"add-copy-field": {"source": "*", "dest": "_text_"}}' localhost:8983/solr/instructors/schema
bin/solr config -c instructors -p 8983 -action set-user-property -property update.autoCreateFields -value false
curl -X POST -H 'Content-type: application/json' --data-binary '{"add-field": {"name": "courseId", "type": "string"}}' localhost:8983/solr/instructors/schema
curl -X POST -H 'Content-type: application/json' --data-binary '{"add-field": {"name": "email", "type": "string"}}' localhost:8983/solr/instructors/schema
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,16 @@ class InstructorSearchDocument extends SearchDocument<InstructorAttributes> {
Map<String, Object> getSearchableFields() {
Map<String, Object> fields = new HashMap<>();
InstructorAttributes instructor = attribute;
String[] searchableTexts = {
instructor.getName(), instructor.getEmail(), instructor.getCourseId(),
course == null ? "" : course.getName(),
instructor.getGoogleId(), instructor.getRole(), instructor.getDisplayedName(),
};

fields.put("id", instructor.getEmail() + "%" + instructor.getCourseId());
fields.put("name", instructor.getName());
fields.put("email", instructor.getEmail());
fields.put("_text_", String.join(" ", searchableTexts));
fields.put("courseId", instructor.getCourseId());
fields.put("courseName", course == null ? "" : course.getName());
fields.put("googleId", instructor.getGoogleId());
fields.put("role", instructor.getRole());
fields.put("displayedName", instructor.getDisplayedName());
fields.put("email", instructor.getEmail());

return fields;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,16 @@ class StudentSearchDocument extends SearchDocument<StudentAttributes> {
Map<String, Object> getSearchableFields() {
Map<String, Object> fields = new HashMap<>();
StudentAttributes student = attribute;
String[] searchableTexts = {
student.getName(), student.getEmail(), student.getCourse(),
course == null ? "" : course.getName(),
student.getTeam(), student.getSection(),
};

fields.put("id", student.getId());
fields.put("name", student.getName());
fields.put("email", student.getEmail());
fields.put("_text_", String.join(" ", searchableTexts));
fields.put("courseId", student.getCourse());
fields.put("courseName", course == null ? "" : course.getName());
fields.put("team", student.getTeam());
fields.put("section", student.getSection());
fields.put("email", student.getEmail());

return fields;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,9 @@ public List<StudentAttributes> searchStudents(String queryString, List<Instructo
QueryResponse response = performQuery(query);
SolrDocumentList documents = response.getResults();

// Even though FQ has been applied, it may still match some unwanted results,
// e.g. if a course ID specified in FQ is the substring of another valid course.
// An additional filtering is done here such that only exact match will be returned.
// TODO a better way is to modify the field type in Solr instead of doing this
// Sanity check such that the course ID of the students match exactly.
// In ideal case, this check is not expected to do anything,
// i.e. the resulting list should be the same as the incoming list.

List<SolrDocument> filteredDocuments = documents.stream()
.filter(document -> {
Expand Down

0 comments on commit c3def1a

Please sign in to comment.