Skip to content

Commit

Permalink
Feature: Add support of multilingual search (#40)
Browse files Browse the repository at this point in the history
* update get_term_search_query to support multilanguages search

* rename var

* fix search lang suffix to use underscore not @

* add multilangual search test

---------

Co-authored-by: Syphax Bouazzouni <[email protected]>
  • Loading branch information
haddadzineddine and syphax-bouazzouni authored Sep 5, 2023
1 parent e85558a commit ff7650e
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 22 deletions.
2 changes: 1 addition & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ GEM
rack (>= 0.4)
rack-attack (6.6.1)
rack (>= 1.0, < 3)
rack-cache (1.13.0)
rack-cache (1.14.0)
rack (>= 0.4)
rack-cors (1.0.6)
rack (>= 1.6.0)
Expand Down
15 changes: 9 additions & 6 deletions helpers/search_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def get_term_search_query(text, params={})
end
end

lang = params["lang"] || params["language"]
lang_suffix = lang && !lang.eql?("all") ? "_#{lang}" : ""

query = ""
params["defType"] = "edismax"
params["stopwords"] = "true"
Expand All @@ -98,25 +101,25 @@ def get_term_search_query(text, params={})

if params[EXACT_MATCH_PARAM] == "true"
query = "\"#{solr_escape(text)}\""
params["qf"] = "resource_id^20 prefLabelExact^10 synonymExact #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact synonymExact #{QUERYLESS_FIELDS_STR}"
params["qf"] = "resource_id^20 prefLabelExact#{lang_suffix }^10 synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}"
elsif params[SUGGEST_PARAM] == "true" || text[-1] == '*'
text.gsub!(/\*+$/, '')
query = "\"#{solr_escape(text)}\""
params["qt"] = "/suggest_ncbo"
params["qf"] = "prefLabelExact^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["qf"] = "prefLabelExact#{lang_suffix }^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["pf"] = "prefLabelSuggest^50"
params["hl.fl"] = "prefLabelExact prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "prefLabelExact#{lang_suffix } prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
else
if text.strip.empty?
query = '*'
else
query = solr_escape(text)
end

params["qf"] = "resource_id^100 prefLabelExact^90 prefLabel^70 synonymExact^50 synonym^10 #{QUERYLESS_FIELDS_STR}"
params["qf"] = "resource_id^100 prefLabelExact#{lang_suffix }^90 prefLabel#{lang_suffix }^70 synonymExact#{lang_suffix }^50 synonym#{lang_suffix }^10 #{QUERYLESS_FIELDS_STR}"
params["qf"] << " property" if params[INCLUDE_PROPERTIES_PARAM] == "true"
params["hl.fl"] = "resource_id prefLabelExact prefLabel synonymExact synonym #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } prefLabel#{lang_suffix } synonymExact#{lang_suffix } synonym#{lang_suffix } #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "#{params["hl.fl"]} property" if params[INCLUDE_PROPERTIES_PARAM] == "true"
end

Expand Down
44 changes: 44 additions & 0 deletions test/controllers/test_search_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -213,4 +213,48 @@ def test_search_provisional_class
assert_equal @@test_pc_child.label, provisional[0]["prefLabel"].first
end

def test_multilingual_search
get "/search?q=Activity&ontologies=BROSEARCHTEST-0"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]

doc = res["collection"].select{|doc| doc["@id"].to_s.eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first
refute_nil doc

#res = LinkedData::Models::Class.search("prefLabel_none:Activity", {:fq => "submissionAcronym:BROSEARCHTEST-0", :start => 0, :rows => 80}, :main)
#refute_equal 0, res["response"]["numFound"]
#refute_nil res["response"]["docs"].select{|doc| doc["resource_id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

get "/search?q=Activit%C3%A9&ontologies=BROSEARCHTEST-0&lang=fr"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first



get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=en"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first


get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=fr&require_exact_match=true"
res = MultiJson.load(last_response.body)
assert_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=en&require_exact_match=true"
res = MultiJson.load(last_response.body)
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

get "/search?q=Activity&ontologies=BROSEARCHTEST-0&lang=en&require_exact_match=true"
res = MultiJson.load(last_response.body)
assert_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

get "/search?q=Activit%C3%A9&ontologies=BROSEARCHTEST-0&lang=fr&require_exact_match=true"
res = MultiJson.load(last_response.body)
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first


end

end
3 changes: 3 additions & 0 deletions test/data/ontology_files/BRO_v3.2.owl
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,9 @@

<owl:Class rdf:about="&activity;Activity">
<core:prefLabel rdf:datatype="&xsd;string">Activity</core:prefLabel>
<core:prefLabel rdf:datatype="&xsd;string">Activity</core:prefLabel>
<core:prefLabel xml:lang="en">ActivityEnglish</core:prefLabel>
<core:prefLabel xml:lang="fr">Activité</core:prefLabel>
<desc:definition rdf:datatype="&xsd;string">Activity of interest that may be related to a BRO:Resource.</desc:definition>
<core:altLabel>activities</core:altLabel>
</owl:Class>
Expand Down
13 changes: 13 additions & 0 deletions test/solr/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
version: '3.8'

services:
op_solr:
image: solr:8.8
volumes:
- ./solr_configsets:/configsets:ro
ports:
- "8983:8983"
command: >
bash -c "precreate-core term_search_core1 /configsets/term_search
&& precreate-core prop_search_core1 /configsets/property_search
&& solr-foreground"
35 changes: 20 additions & 15 deletions test/solr/generate_ncbo_configsets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,23 @@
# generates solr configsets by merging _default configset with config files in config/solr
# _default is copied from sorl distribuion solr-8.10.1/server/solr/configsets/_default/

pushd solr/configsets
ld_config='../../../../ontologies_linked_data/config/solr/'
#ld_config='../../../../config/solr/'
ls -l $ld_config
pwd
[ -d property_search ] && rm -Rf property_search
[ -d term_search ] && rm -Rf property_search
[ -d $ld_config/property_search ] || echo "cant find ontologies_linked_data project"
mkdir -p property_search/conf
mkdir -p term_search/conf
cp -a _default/conf/* property_search/conf/
cp -a _default/conf/* term_search/conf/
cp -a $ld_config/property_search/* property_search/conf
cp -a $ld_config/term_search/* term_search/conf
popd
#cd solr/configsets
ld_config='config/solr'
configsets='test/solr/configsets'
[ -d ${configsets}/property_search ] && rm -Rf ${configsets}/property_search
[ -d ${configsets}/term_search ] && rm -Rf ${configsets}/term_search
if [[ ! -d ${ld_config}/property_search ]]; then
echo 'cant find ld solr config sets'
exit 1
fi
if [[ ! -d ${configsets}/_default/conf ]]; then
echo 'cant find default solr configset'
exit 1
fi
mkdir -p ${configsets}/property_search/conf
mkdir -p ${configsets}/term_search/conf
cp -a ${configsets}/_default/conf/* ${configsets}/property_search/conf/
cp -a ${configsets}/_default/conf/* ${configsets}/term_search/conf/
cp -a $ld_config/property_search/* ${configsets}/property_search/conf
cp -a $ld_config/term_search/* ${configsets}/term_search/conf

0 comments on commit ff7650e

Please sign in to comment.