Skip to content

Commit

Permalink
Feature: Add support of multilingual search (#40)
Browse files Browse the repository at this point in the history
* update get_term_search_query to support multilanguages search

* rename var

* fix search lang suffix to use underscore not @

* add multilangual search test

---------

Co-authored-by: Syphax Bouazzouni <[email protected]>
  • Loading branch information
haddadzineddine and syphax-bouazzouni committed Sep 5, 2023
1 parent c0c5e0e commit 882853c
Show file tree
Hide file tree
Showing 11 changed files with 190 additions and 67 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ tmp/*
# Editor temp files
*.swp
*.swo
test/solr
53 changes: 26 additions & 27 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
GIT
remote: https://github.com/ncbo/ncbo_ontology_recommender.git
revision: d0ac992c88bd417f2f2137ba62934c3c41b6db7c
revision: 83e835de368bc9f19da800a477982e0ad770900d
branch: master
specs:
ncbo_ontology_recommender (0.0.1)
Expand All @@ -11,7 +11,7 @@ GIT

GIT
remote: https://github.com/ontoportal-lirmm/goo.git
revision: b769c165906163e30a026dba511ae1069c4eed3d
revision: ddb95e427950fde3ac715aec340394208c8166fe
branch: development
specs:
goo (0.0.2)
Expand Down Expand Up @@ -53,7 +53,7 @@ GIT

GIT
remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git
revision: 69756f8a7cff39d283065217559c68820d6d95e3
revision: 4c89c8346766d23e09b24c8e29750bf3a91e6b53
branch: development
specs:
ontologies_linked_data (0.0.1)
Expand Down Expand Up @@ -103,16 +103,16 @@ GEM
activesupport (3.2.22.5)
i18n (~> 0.6, >= 0.6.4)
multi_json (~> 1.0)
addressable (2.8.4)
addressable (2.8.5)
public_suffix (>= 2.0.2, < 6.0)
airbrussh (1.4.1)
airbrussh (1.4.2)
sshkit (>= 1.6.1, != 1.7.0)
backports (3.24.1)
bcrypt (3.1.18)
bcrypt (3.1.19)
bcrypt_pbkdf (1.1.0)
bigdecimal (1.4.2)
builder (3.2.4)
capistrano (3.17.2)
capistrano (3.17.3)
airbrussh (>= 1.0.0)
i18n
rake (>= 10.0.0)
Expand Down Expand Up @@ -162,7 +162,7 @@ GEM
ffi (~> 1.0)
google-apis-analytics_v3 (0.13.0)
google-apis-core (>= 0.11.0, < 2.a)
google-apis-core (0.11.0)
google-apis-core (0.11.1)
addressable (~> 2.5, >= 2.5.1)
googleauth (>= 0.16.2, < 2.a)
httpclient (>= 2.8.1, < 3.a)
Expand All @@ -171,7 +171,7 @@ GEM
retriable (>= 2.0, < 4.a)
rexml
webrick
googleauth (1.5.2)
googleauth (1.7.0)
faraday (>= 0.17.3, < 3.a)
jwt (>= 1.4, < 3.0)
memoist (~> 0.16)
Expand All @@ -191,9 +191,9 @@ GEM
json-schema (2.8.1)
addressable (>= 2.4)
json_pure (2.6.3)
jwt (2.7.0)
jwt (2.7.1)
kgio (2.11.4)
libxml-ruby (4.1.0)
libxml-ruby (4.1.1)
logger (1.5.3)
macaddr (1.7.2)
systemu (~> 2.6.5)
Expand All @@ -204,18 +204,18 @@ GEM
net-smtp
memoist (0.16.2)
method_source (1.0.0)
mime-types (3.4.1)
mime-types (3.5.1)
mime-types-data (~> 3.2015)
mime-types-data (3.2023.0218.1)
mini_mime (1.1.2)
mime-types-data (3.2023.0808)
mini_mime (1.1.5)
minitest (4.7.5)
minitest-stub_any_instance (1.0.3)
mlanett-redis-lock (0.2.7)
redis
multi_json (1.15.0)
multipart-post (2.3.0)
net-http-persistent (2.9.4)
net-imap (0.3.4)
net-imap (0.3.7)
date
net-protocol
net-pop (0.1.2)
Expand All @@ -226,9 +226,9 @@ GEM
net-ssh (>= 2.6.5, < 8.0.0)
net-smtp (0.3.3)
net-protocol
net-ssh (7.0.1)
net-ssh (7.2.0)
netrc (0.11.0)
newrelic_rpm (9.0.0)
newrelic_rpm (9.4.2)
oj (2.18.5)
omni_logger (0.1.4)
logger
Expand All @@ -239,21 +239,21 @@ GEM
pry (0.14.2)
coderay (~> 1.1)
method_source (~> 1.0)
public_suffix (5.0.1)
public_suffix (5.0.3)
rack (1.6.13)
rack-accept (0.4.5)
rack (>= 0.4)
rack-attack (6.6.1)
rack (>= 1.0, < 3)
rack-cache (1.13.0)
rack-cache (1.14.0)
rack (>= 0.4)
rack-cors (1.0.6)
rack (>= 1.6.0)
rack-mini-profiler (3.1.0)
rack-mini-profiler (3.1.1)
rack (>= 1.2.0)
rack-protection (1.5.5)
rack
rack-test (2.0.2)
rack-test (2.1.0)
rack (>= 1.3)
rack-timeout (0.6.3)
raindrops (0.20.1)
Expand Down Expand Up @@ -282,7 +282,7 @@ GEM
mime-types (>= 1.16, < 4.0)
netrc (~> 0.8)
retriable (3.1.2)
rexml (3.2.5)
rexml (3.2.6)
rsolr (2.5.0)
builder (>= 2.1.2)
faraday (>= 0.9, < 3, != 2.0.0)
Expand Down Expand Up @@ -318,13 +318,13 @@ GEM
rack-test
sinatra (~> 1.4.0)
tilt (>= 1.3, < 3)
sshkit (1.21.4)
sshkit (1.21.5)
net-scp (>= 1.1.2)
net-ssh (>= 2.8.0)
systemu (2.6.5)
temple (0.10.0)
tilt (2.1.0)
timeout (0.3.2)
temple (0.10.2)
tilt (2.2.0)
timeout (0.4.0)
trailblazer-option (0.1.2)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
Expand All @@ -346,7 +346,6 @@ PLATFORMS
x86_64-darwin-21
x86_64-linux


DEPENDENCIES
activesupport (~> 3.0)
bcrypt_pbkdf (>= 1.0, < 2.0)
Expand Down
16 changes: 15 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,14 @@ services:

redis-ut:
image: redis
ports:
- 6379:6379

4store-ut:
image: bde2020/4store
#volume: fourstore:/var/lib/4store
ports:
- 9000:9000
command: >
bash -c "4s-backend-setup --segments 4 ontoportal_kb
&& 4s-backend ontoportal_kb
Expand All @@ -88,10 +92,20 @@ services:


solr-ut:
image: ontoportal/solr-ut:0.1
image: solr:8
volumes:
- ./test/solr/configsets:/configsets:ro
ports:
- "8983:8983"
command: >
bash -c "precreate-core term_search_core1 /configsets/term_search
&& precreate-core prop_search_core1 /configsets/property_search
&& solr-foreground"
mgrep-ut:
image: ontoportal/mgrep-ncbo:0.1
ports:
- "55556:55555"

agraph-ut:
image: franzinc/agraph:v7.3.0
Expand Down
16 changes: 10 additions & 6 deletions helpers/search_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def get_term_search_query(text, params={})
end
end

lang = params["lang"] || params["language"]
lang_suffix = lang && !lang.eql?("all") ? "_#{lang}" : ""

query = ""
params["defType"] = "edismax"
params["stopwords"] = "true"
Expand All @@ -98,25 +101,25 @@ def get_term_search_query(text, params={})

if params[EXACT_MATCH_PARAM] == "true"
query = "\"#{solr_escape(text)}\""
params["qf"] = "resource_id^20 prefLabelExact^10 synonymExact #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact synonymExact #{QUERYLESS_FIELDS_STR}"
params["qf"] = "resource_id^20 prefLabelExact#{lang_suffix }^10 synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}"
elsif params[SUGGEST_PARAM] == "true" || text[-1] == '*'
text.gsub!(/\*+$/, '')
query = "\"#{solr_escape(text)}\""
params["qt"] = "/suggest_ncbo"
params["qf"] = "prefLabelExact^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["qf"] = "prefLabelExact#{lang_suffix }^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["pf"] = "prefLabelSuggest^50"
params["hl.fl"] = "prefLabelExact prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "prefLabelExact#{lang_suffix } prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
else
if text.strip.empty?
query = '*'
else
query = solr_escape(text)
end

params["qf"] = "resource_id^100 prefLabelExact^90 prefLabel^70 synonymExact^50 synonym^10 #{QUERYLESS_FIELDS_STR}"
params["qf"] = "resource_id^100 prefLabelExact#{lang_suffix }^90 prefLabel#{lang_suffix }^70 synonymExact#{lang_suffix }^50 synonym#{lang_suffix }^10 #{QUERYLESS_FIELDS_STR}"
params["qf"] << " property" if params[INCLUDE_PROPERTIES_PARAM] == "true"
params["hl.fl"] = "resource_id prefLabelExact prefLabel synonymExact synonym #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } prefLabel#{lang_suffix } synonymExact#{lang_suffix } synonym#{lang_suffix } #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "#{params["hl.fl"]} property" if params[INCLUDE_PROPERTIES_PARAM] == "true"
end

Expand Down Expand Up @@ -345,6 +348,7 @@ def populate_classes_from_search(classes, ontology_acronyms=nil)
doc[:submission] = old_class.submission
doc[:properties] = MultiJson.load(doc.delete(:propertyRaw)) if include_param_contains?(:properties)
instance = LinkedData::Models::Class.read_only(doc)
instance.prefLabel = instance.prefLabel.first if instance.prefLabel.is_a?(Array)
classes_hash[ont_uri_class_uri] = instance
end

Expand Down
71 changes: 63 additions & 8 deletions test/controllers/test_search_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_search_ontology_filter
assert last_response.ok?
results = MultiJson.load(last_response.body)
doc = results["collection"][0]
assert_equal "cell line", doc["prefLabel"]
assert_equal "cell line", doc["prefLabel"].first
assert doc["links"]["ontology"].include? acronym
results["collection"].each do |doc|
acr = doc["links"]["ontology"].split('/')[-1]
Expand All @@ -103,7 +103,8 @@ def test_search_other_filters
get "search?q=data&require_definitions=true"
assert last_response.ok?
results = MultiJson.load(last_response.body)
assert_equal 26, results["collection"].length
assert results["collection"].all? {|doc| !doc["definition"].nil? && doc.values.flatten.join(" ").include?("data") }
#assert_equal 26, results["collection"].length

get "search?q=data&require_definitions=false"
assert last_response.ok?
Expand All @@ -115,10 +116,14 @@ def test_search_other_filters

get "search?q=Integration%20and%20Interoperability&ontologies=#{acronym}"
results = MultiJson.load(last_response.body)
assert_equal 22, results["collection"].length

assert results["collection"].all? { |x| !x["obsolete"] }
count = results["collection"].length

get "search?q=Integration%20and%20Interoperability&ontologies=#{acronym}&also_search_obsolete=false"
results = MultiJson.load(last_response.body)
assert_equal 22, results["collection"].length
assert_equal count, results["collection"].length

get "search?q=Integration%20and%20Interoperability&ontologies=#{acronym}&also_search_obsolete=true"
results = MultiJson.load(last_response.body)
assert_equal 29, results["collection"].length
Expand All @@ -134,8 +139,14 @@ def test_search_other_filters
# testing cui and semantic_types flags
get "search?q=Funding%20Resource&ontologies=#{acronym}&include=prefLabel,synonym,definition,notation,cui,semanticType"
results = MultiJson.load(last_response.body)
assert_equal 35, results["collection"].length
assert_equal "Funding Resource", results["collection"][0]["prefLabel"]
#assert_equal 35, results["collection"].length
assert results["collection"].all? do |r|
["prefLabel", "synonym", "definition", "notation", "cui", "semanticType"].map {|x| r[x]}
.flatten
.join(' ')
.include?("Funding Resource")
end
assert_equal "Funding Resource", results["collection"][0]["prefLabel"].first
assert_equal "T028", results["collection"][0]["semanticType"][0]
assert_equal "X123456", results["collection"][0]["cui"][0]

Expand Down Expand Up @@ -190,7 +201,7 @@ def test_search_provisional_class
assert_equal 10, results["collection"].length
provisional = results["collection"].select {|res| assert_equal ontology_type, res["ontologyType"]; res["provisional"]}
assert_equal 1, provisional.length
assert_equal @@test_pc_root.label, provisional[0]["prefLabel"]
assert_equal @@test_pc_root.label, provisional[0]["prefLabel"].first

# subtree root with provisional class test
get "search?ontology=#{acronym}&subtree_root_id=#{CGI::escape(@@cls_uri.to_s)}&also_search_provisional=true"
Expand All @@ -199,7 +210,51 @@ def test_search_provisional_class

provisional = results["collection"].select {|res| res["provisional"]}
assert_equal 1, provisional.length
assert_equal @@test_pc_child.label, provisional[0]["prefLabel"]
assert_equal @@test_pc_child.label, provisional[0]["prefLabel"].first
end

def test_multilingual_search
get "/search?q=Activity&ontologies=BROSEARCHTEST-0"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]

doc = res["collection"].select{|doc| doc["@id"].to_s.eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first
refute_nil doc

#res = LinkedData::Models::Class.search("prefLabel_none:Activity", {:fq => "submissionAcronym:BROSEARCHTEST-0", :start => 0, :rows => 80}, :main)
#refute_equal 0, res["response"]["numFound"]
#refute_nil res["response"]["docs"].select{|doc| doc["resource_id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

get "/search?q=Activit%C3%A9&ontologies=BROSEARCHTEST-0&lang=fr"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first



get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=en"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first


get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=fr&require_exact_match=true"
res = MultiJson.load(last_response.body)
assert_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=en&require_exact_match=true"
res = MultiJson.load(last_response.body)
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

get "/search?q=Activity&ontologies=BROSEARCHTEST-0&lang=en&require_exact_match=true"
res = MultiJson.load(last_response.body)
assert_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

get "/search?q=Activit%C3%A9&ontologies=BROSEARCHTEST-0&lang=fr&require_exact_match=true"
res = MultiJson.load(last_response.body)
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first


end

end
3 changes: 3 additions & 0 deletions test/data/ontology_files/BRO_v3.2.owl
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,9 @@

<owl:Class rdf:about="&activity;Activity">
<core:prefLabel rdf:datatype="&xsd;string">Activity</core:prefLabel>
<core:prefLabel rdf:datatype="&xsd;string">Activity</core:prefLabel>
<core:prefLabel xml:lang="en">ActivityEnglish</core:prefLabel>
<core:prefLabel xml:lang="fr">Activité</core:prefLabel>
<desc:definition rdf:datatype="&xsd;string">Activity of interest that may be related to a BRO:Resource.</desc:definition>
<core:altLabel>activities</core:altLabel>
</owl:Class>
Expand Down
2 changes: 1 addition & 1 deletion test/data/ontology_files/thesaurusINRAE_nouv_structure.rdf
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
<owl:NamedIndividual rdf:about="http://aims.fao.org/aos/agrovoc/xl_tr_1331561625299">
<rdf:type rdf:resource="http://www.w3.org/2008/05/skos-xl#Label"/>
<skos:notation rdf:datatype="http://aims.fao.org/aos/agrovoc/AgrovocCode">1331561625299</skos:notation>
<skos-xl:literalForm xml:lang="tr">aktivite</skos-xl:literalForm>
<skos-xl:literalForm >aktivite</skos-xl:literalForm>
<terms:created rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2012-03-12T22:13:45Z</terms:created>
<terms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2017-09-22T14:09:06Z</terms:modified>
<void:inDataset rdf:resource="http://aims.fao.org/aos/agrovoc/void.ttl#Agrovoc"/>
Expand Down
Loading

0 comments on commit 882853c

Please sign in to comment.