diff --git a/.dockerignore b/.dockerignore index cf76ed57..3b15d33c 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,3 +9,4 @@ tmp/* # Editor temp files *.swp *.swo +test/solr diff --git a/Gemfile.lock b/Gemfile.lock index 1f315067..6bd8d01c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ncbo/ncbo_ontology_recommender.git - revision: d0ac992c88bd417f2f2137ba62934c3c41b6db7c + revision: 83e835de368bc9f19da800a477982e0ad770900d branch: master specs: ncbo_ontology_recommender (0.0.1) @@ -11,7 +11,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/goo.git - revision: b769c165906163e30a026dba511ae1069c4eed3d + revision: ddb95e427950fde3ac715aec340394208c8166fe branch: development specs: goo (0.0.2) @@ -53,7 +53,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git - revision: 69756f8a7cff39d283065217559c68820d6d95e3 + revision: 4c89c8346766d23e09b24c8e29750bf3a91e6b53 branch: development specs: ontologies_linked_data (0.0.1) @@ -103,16 +103,16 @@ GEM activesupport (3.2.22.5) i18n (~> 0.6, >= 0.6.4) multi_json (~> 1.0) - addressable (2.8.4) + addressable (2.8.5) public_suffix (>= 2.0.2, < 6.0) - airbrussh (1.4.1) + airbrussh (1.4.2) sshkit (>= 1.6.1, != 1.7.0) backports (3.24.1) - bcrypt (3.1.18) + bcrypt (3.1.19) bcrypt_pbkdf (1.1.0) bigdecimal (1.4.2) builder (3.2.4) - capistrano (3.17.2) + capistrano (3.17.3) airbrussh (>= 1.0.0) i18n rake (>= 10.0.0) @@ -162,7 +162,7 @@ GEM ffi (~> 1.0) google-apis-analytics_v3 (0.13.0) google-apis-core (>= 0.11.0, < 2.a) - google-apis-core (0.11.0) + google-apis-core (0.11.1) addressable (~> 2.5, >= 2.5.1) googleauth (>= 0.16.2, < 2.a) httpclient (>= 2.8.1, < 3.a) @@ -171,7 +171,7 @@ GEM retriable (>= 2.0, < 4.a) rexml webrick - googleauth (1.5.2) + googleauth (1.7.0) faraday (>= 0.17.3, < 3.a) jwt (>= 1.4, < 3.0) memoist (~> 0.16) @@ -191,9 +191,9 @@ GEM json-schema (2.8.1) addressable (>= 2.4) json_pure (2.6.3) - jwt (2.7.0) + jwt (2.7.1) kgio (2.11.4) - libxml-ruby (4.1.0) + libxml-ruby (4.1.1) logger (1.5.3) macaddr (1.7.2) systemu (~> 2.6.5) @@ -204,10 +204,10 @@ GEM net-smtp memoist (0.16.2) method_source (1.0.0) - mime-types (3.4.1) + mime-types (3.5.1) mime-types-data (~> 3.2015) - mime-types-data (3.2023.0218.1) - mini_mime (1.1.2) + mime-types-data (3.2023.0808) + mini_mime (1.1.5) minitest (4.7.5) minitest-stub_any_instance (1.0.3) mlanett-redis-lock (0.2.7) @@ -215,7 +215,7 @@ GEM multi_json (1.15.0) multipart-post (2.3.0) net-http-persistent (2.9.4) - net-imap (0.3.4) + net-imap (0.3.7) date net-protocol net-pop (0.1.2) @@ -226,9 +226,9 @@ GEM net-ssh (>= 2.6.5, < 8.0.0) net-smtp (0.3.3) net-protocol - net-ssh (7.0.1) + net-ssh (7.2.0) netrc (0.11.0) - newrelic_rpm (9.0.0) + newrelic_rpm (9.4.2) oj (2.18.5) omni_logger (0.1.4) logger @@ -239,21 +239,21 @@ GEM pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (5.0.1) + public_suffix (5.0.3) rack (1.6.13) rack-accept (0.4.5) rack (>= 0.4) rack-attack (6.6.1) rack (>= 1.0, < 3) - rack-cache (1.13.0) + rack-cache (1.14.0) rack (>= 0.4) rack-cors (1.0.6) rack (>= 1.6.0) - rack-mini-profiler (3.1.0) + rack-mini-profiler (3.1.1) rack (>= 1.2.0) rack-protection (1.5.5) rack - rack-test (2.0.2) + rack-test (2.1.0) rack (>= 1.3) rack-timeout (0.6.3) raindrops (0.20.1) @@ -282,7 +282,7 @@ GEM mime-types (>= 1.16, < 4.0) netrc (~> 0.8) retriable (3.1.2) - rexml (3.2.5) + rexml (3.2.6) rsolr (2.5.0) builder (>= 2.1.2) faraday (>= 0.9, < 3, != 2.0.0) @@ -318,13 +318,13 @@ GEM rack-test sinatra (~> 1.4.0) tilt (>= 1.3, < 3) - sshkit (1.21.4) + sshkit (1.21.5) net-scp (>= 1.1.2) net-ssh (>= 2.8.0) systemu (2.6.5) - temple (0.10.0) - tilt (2.1.0) - timeout (0.3.2) + temple (0.10.2) + tilt (2.2.0) + timeout (0.4.0) trailblazer-option (0.1.2) tzinfo (2.0.6) concurrent-ruby (~> 1.0) @@ -346,7 +346,6 @@ PLATFORMS x86_64-darwin-21 x86_64-linux - DEPENDENCIES activesupport (~> 3.0) bcrypt_pbkdf (>= 1.0, < 2.0) diff --git a/docker-compose.yml b/docker-compose.yml index de084081..5cb64963 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -75,10 +75,14 @@ services: redis-ut: image: redis + ports: + - 6379:6379 4store-ut: image: bde2020/4store #volume: fourstore:/var/lib/4store + ports: + - 9000:9000 command: > bash -c "4s-backend-setup --segments 4 ontoportal_kb && 4s-backend ontoportal_kb @@ -88,10 +92,20 @@ services: solr-ut: - image: ontoportal/solr-ut:0.1 + image: solr:8 + volumes: + - ./test/solr/configsets:/configsets:ro + ports: + - "8983:8983" + command: > + bash -c "precreate-core term_search_core1 /configsets/term_search + && precreate-core prop_search_core1 /configsets/property_search + && solr-foreground" mgrep-ut: image: ontoportal/mgrep-ncbo:0.1 + ports: + - "55556:55555" agraph-ut: image: franzinc/agraph:v7.3.0 diff --git a/helpers/search_helper.rb b/helpers/search_helper.rb index 10de14c0..071000d9 100644 --- a/helpers/search_helper.rb +++ b/helpers/search_helper.rb @@ -82,6 +82,9 @@ def get_term_search_query(text, params={}) end end + lang = params["lang"] || params["language"] + lang_suffix = lang && !lang.eql?("all") ? "_#{lang}" : "" + query = "" params["defType"] = "edismax" params["stopwords"] = "true" @@ -98,15 +101,15 @@ def get_term_search_query(text, params={}) if params[EXACT_MATCH_PARAM] == "true" query = "\"#{solr_escape(text)}\"" - params["qf"] = "resource_id^20 prefLabelExact^10 synonymExact #{QUERYLESS_FIELDS_STR}" - params["hl.fl"] = "resource_id prefLabelExact synonymExact #{QUERYLESS_FIELDS_STR}" + params["qf"] = "resource_id^20 prefLabelExact#{lang_suffix }^10 synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}" elsif params[SUGGEST_PARAM] == "true" || text[-1] == '*' text.gsub!(/\*+$/, '') query = "\"#{solr_escape(text)}\"" params["qt"] = "/suggest_ncbo" - params["qf"] = "prefLabelExact^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" + params["qf"] = "prefLabelExact#{lang_suffix }^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" params["pf"] = "prefLabelSuggest^50" - params["hl.fl"] = "prefLabelExact prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "prefLabelExact#{lang_suffix } prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" else if text.strip.empty? query = '*' @@ -114,9 +117,9 @@ def get_term_search_query(text, params={}) query = solr_escape(text) end - params["qf"] = "resource_id^100 prefLabelExact^90 prefLabel^70 synonymExact^50 synonym^10 #{QUERYLESS_FIELDS_STR}" + params["qf"] = "resource_id^100 prefLabelExact#{lang_suffix }^90 prefLabel#{lang_suffix }^70 synonymExact#{lang_suffix }^50 synonym#{lang_suffix }^10 #{QUERYLESS_FIELDS_STR}" params["qf"] << " property" if params[INCLUDE_PROPERTIES_PARAM] == "true" - params["hl.fl"] = "resource_id prefLabelExact prefLabel synonymExact synonym #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } prefLabel#{lang_suffix } synonymExact#{lang_suffix } synonym#{lang_suffix } #{QUERYLESS_FIELDS_STR}" params["hl.fl"] = "#{params["hl.fl"]} property" if params[INCLUDE_PROPERTIES_PARAM] == "true" end @@ -345,6 +348,7 @@ def populate_classes_from_search(classes, ontology_acronyms=nil) doc[:submission] = old_class.submission doc[:properties] = MultiJson.load(doc.delete(:propertyRaw)) if include_param_contains?(:properties) instance = LinkedData::Models::Class.read_only(doc) + instance.prefLabel = instance.prefLabel.first if instance.prefLabel.is_a?(Array) classes_hash[ont_uri_class_uri] = instance end diff --git a/test/controllers/test_search_controller.rb b/test/controllers/test_search_controller.rb index 44c67c7e..74be75d2 100644 --- a/test/controllers/test_search_controller.rb +++ b/test/controllers/test_search_controller.rb @@ -85,7 +85,7 @@ def test_search_ontology_filter assert last_response.ok? results = MultiJson.load(last_response.body) doc = results["collection"][0] - assert_equal "cell line", doc["prefLabel"] + assert_equal "cell line", doc["prefLabel"].first assert doc["links"]["ontology"].include? acronym results["collection"].each do |doc| acr = doc["links"]["ontology"].split('/')[-1] @@ -103,7 +103,8 @@ def test_search_other_filters get "search?q=data&require_definitions=true" assert last_response.ok? results = MultiJson.load(last_response.body) - assert_equal 26, results["collection"].length + assert results["collection"].all? {|doc| !doc["definition"].nil? && doc.values.flatten.join(" ").include?("data") } + #assert_equal 26, results["collection"].length get "search?q=data&require_definitions=false" assert last_response.ok? @@ -115,10 +116,14 @@ def test_search_other_filters get "search?q=Integration%20and%20Interoperability&ontologies=#{acronym}" results = MultiJson.load(last_response.body) - assert_equal 22, results["collection"].length + + assert results["collection"].all? { |x| !x["obsolete"] } + count = results["collection"].length + get "search?q=Integration%20and%20Interoperability&ontologies=#{acronym}&also_search_obsolete=false" results = MultiJson.load(last_response.body) - assert_equal 22, results["collection"].length + assert_equal count, results["collection"].length + get "search?q=Integration%20and%20Interoperability&ontologies=#{acronym}&also_search_obsolete=true" results = MultiJson.load(last_response.body) assert_equal 29, results["collection"].length @@ -134,8 +139,14 @@ def test_search_other_filters # testing cui and semantic_types flags get "search?q=Funding%20Resource&ontologies=#{acronym}&include=prefLabel,synonym,definition,notation,cui,semanticType" results = MultiJson.load(last_response.body) - assert_equal 35, results["collection"].length - assert_equal "Funding Resource", results["collection"][0]["prefLabel"] + #assert_equal 35, results["collection"].length + assert results["collection"].all? do |r| + ["prefLabel", "synonym", "definition", "notation", "cui", "semanticType"].map {|x| r[x]} + .flatten + .join(' ') + .include?("Funding Resource") + end + assert_equal "Funding Resource", results["collection"][0]["prefLabel"].first assert_equal "T028", results["collection"][0]["semanticType"][0] assert_equal "X123456", results["collection"][0]["cui"][0] @@ -190,7 +201,7 @@ def test_search_provisional_class assert_equal 10, results["collection"].length provisional = results["collection"].select {|res| assert_equal ontology_type, res["ontologyType"]; res["provisional"]} assert_equal 1, provisional.length - assert_equal @@test_pc_root.label, provisional[0]["prefLabel"] + assert_equal @@test_pc_root.label, provisional[0]["prefLabel"].first # subtree root with provisional class test get "search?ontology=#{acronym}&subtree_root_id=#{CGI::escape(@@cls_uri.to_s)}&also_search_provisional=true" @@ -199,7 +210,51 @@ def test_search_provisional_class provisional = results["collection"].select {|res| res["provisional"]} assert_equal 1, provisional.length - assert_equal @@test_pc_child.label, provisional[0]["prefLabel"] + assert_equal @@test_pc_child.label, provisional[0]["prefLabel"].first + end + + def test_multilingual_search + get "/search?q=Activity&ontologies=BROSEARCHTEST-0" + res = MultiJson.load(last_response.body) + refute_equal 0, res["totalCount"] + + doc = res["collection"].select{|doc| doc["@id"].to_s.eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + refute_nil doc + + #res = LinkedData::Models::Class.search("prefLabel_none:Activity", {:fq => "submissionAcronym:BROSEARCHTEST-0", :start => 0, :rows => 80}, :main) + #refute_equal 0, res["response"]["numFound"] + #refute_nil res["response"]["docs"].select{|doc| doc["resource_id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + get "/search?q=Activit%C3%A9&ontologies=BROSEARCHTEST-0&lang=fr" + res = MultiJson.load(last_response.body) + refute_equal 0, res["totalCount"] + refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + + + get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=en" + res = MultiJson.load(last_response.body) + refute_equal 0, res["totalCount"] + refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + + get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=fr&require_exact_match=true" + res = MultiJson.load(last_response.body) + assert_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=en&require_exact_match=true" + res = MultiJson.load(last_response.body) + refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + get "/search?q=Activity&ontologies=BROSEARCHTEST-0&lang=en&require_exact_match=true" + res = MultiJson.load(last_response.body) + assert_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + get "/search?q=Activit%C3%A9&ontologies=BROSEARCHTEST-0&lang=fr&require_exact_match=true" + res = MultiJson.load(last_response.body) + refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + end end diff --git a/test/data/ontology_files/BRO_v3.2.owl b/test/data/ontology_files/BRO_v3.2.owl index d64075cc..b2aeccf5 100644 --- a/test/data/ontology_files/BRO_v3.2.owl +++ b/test/data/ontology_files/BRO_v3.2.owl @@ -631,6 +631,9 @@ Activity + Activity + ActivityEnglish + Activité Activity of interest that may be related to a BRO:Resource. activities diff --git a/test/data/ontology_files/thesaurusINRAE_nouv_structure.rdf b/test/data/ontology_files/thesaurusINRAE_nouv_structure.rdf index 8353d82f..ca303834 100644 --- a/test/data/ontology_files/thesaurusINRAE_nouv_structure.rdf +++ b/test/data/ontology_files/thesaurusINRAE_nouv_structure.rdf @@ -30,7 +30,7 @@ 1331561625299 - aktivite + aktivite 2012-03-12T22:13:45Z 2017-09-22T14:09:06Z diff --git a/test/middleware/test_rack_attack.rb b/test/middleware/test_rack_attack.rb index 43143080..0b10c9e1 100644 --- a/test/middleware/test_rack_attack.rb +++ b/test/middleware/test_rack_attack.rb @@ -18,14 +18,14 @@ def self.before_suite LinkedData::OntologiesAPI.settings.req_per_second_per_ip = 1 LinkedData::OntologiesAPI.settings.safe_ips = Set.new(["1.2.3.4", "1.2.3.5"]) - @@user = LinkedData::Models::User.new({username: "user", password: "test_password", email: "test_email@example.org"}) + @@user = LinkedData::Models::User.new({username: "user", password: "test_password", email: "test_email1@example.org"}) @@user.save - @@bp_user = LinkedData::Models::User.new({username: "ncbobioportal", password: "test_password", email: "test_email@example.org"}) + @@bp_user = LinkedData::Models::User.new({username: "ncbobioportal", password: "test_password", email: "test_email2@example.org"}) @@bp_user.save admin_role = LinkedData::Models::Users::Role.find("ADMINISTRATOR").first - @@admin = LinkedData::Models::User.new({username: "admin", password: "test_password", email: "test_email@example.org", role: [admin_role]}) + @@admin = LinkedData::Models::User.new({username: "admin", password: "test_password", email: "test_email3@example.org", role: [admin_role]}) @@admin.save # Redirect output or we get a bunch of noise from Rack (gets reset in the after_suite method). diff --git a/test/solr/configsets/term_search/conf/schema.xml b/test/solr/configsets/term_search/conf/schema.xml index 6b18a2a1..fa95e127 100644 --- a/test/solr/configsets/term_search/conf/schema.xml +++ b/test/solr/configsets/term_search/conf/schema.xml @@ -128,11 +128,20 @@ - - - - - + + + + + + + + + + + + + + @@ -140,9 +149,18 @@ + + + + + + + - + + + @@ -251,6 +269,17 @@ + + + + + + + + + + + diff --git a/test/solr/docker-compose.yml b/test/solr/docker-compose.yml new file mode 100644 index 00000000..3ddae69c --- /dev/null +++ b/test/solr/docker-compose.yml @@ -0,0 +1,13 @@ +version: '3.8' + +services: + op_solr: + image: solr:8.8 + volumes: + - ./solr_configsets:/configsets:ro + ports: + - "8983:8983" + command: > + bash -c "precreate-core term_search_core1 /configsets/term_search + && precreate-core prop_search_core1 /configsets/property_search + && solr-foreground" diff --git a/test/solr/generate_ncbo_configsets.sh b/test/solr/generate_ncbo_configsets.sh index 893f7f3a..7b4281f7 100755 --- a/test/solr/generate_ncbo_configsets.sh +++ b/test/solr/generate_ncbo_configsets.sh @@ -2,18 +2,23 @@ # generates solr configsets by merging _default configset with config files in config/solr # _default is copied from sorl distribuion solr-8.10.1/server/solr/configsets/_default/ -pushd solr/configsets -ld_config='../../../../ontologies_linked_data/config/solr/' -#ld_config='../../../../config/solr/' -ls -l $ld_config -pwd -[ -d property_search ] && rm -Rf property_search -[ -d term_search ] && rm -Rf property_search -[ -d $ld_config/property_search ] || echo "cant find ontologies_linked_data project" -mkdir -p property_search/conf -mkdir -p term_search/conf -cp -a _default/conf/* property_search/conf/ -cp -a _default/conf/* term_search/conf/ -cp -a $ld_config/property_search/* property_search/conf -cp -a $ld_config/term_search/* term_search/conf -popd +#cd solr/configsets +ld_config='config/solr' +configsets='test/solr/configsets' +[ -d ${configsets}/property_search ] && rm -Rf ${configsets}/property_search +[ -d ${configsets}/term_search ] && rm -Rf ${configsets}/term_search +if [[ ! -d ${ld_config}/property_search ]]; then + echo 'cant find ld solr config sets' + exit 1 +fi +if [[ ! -d ${configsets}/_default/conf ]]; then + echo 'cant find default solr configset' + exit 1 +fi +mkdir -p ${configsets}/property_search/conf +mkdir -p ${configsets}/term_search/conf +cp -a ${configsets}/_default/conf/* ${configsets}/property_search/conf/ +cp -a ${configsets}/_default/conf/* ${configsets}/term_search/conf/ +cp -a $ld_config/property_search/* ${configsets}/property_search/conf +cp -a $ld_config/term_search/* ${configsets}/term_search/conf +