Skip to content

Commit

Permalink
Merge pull request #1448 from sul-dlss/geo-aardvark
Browse files Browse the repository at this point in the history
Add Aardvark indexing config for Earthworks
  • Loading branch information
thatbudakguy authored Jun 28, 2024
2 parents d2e7f91 + 402d83c commit c18c49c
Show file tree
Hide file tree
Showing 31 changed files with 8,762 additions and 57 deletions.
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,7 @@ end
gem 'activesupport', '~> 7.0'
gem 'slop'

gem 'cocina-models'
gem 'dor-event-client'
gem 'factory_bot', '~> 6.2'
gem 'stanford-geo', '0.2.0'
60 changes: 60 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ GEM
sshkit (>= 1.6.1, != 1.7.0)
amq-protocol (2.3.2)
ast (2.4.2)
attr_extras (7.1.0)
base64 (0.2.0)
bcrypt_pbkdf (1.1.1)
bcrypt_pbkdf (1.1.1-arm64-darwin)
Expand Down Expand Up @@ -63,6 +64,24 @@ GEM
sshkit (~> 1.2)
capistrano-shared_configs (0.2.2)
chronic (0.10.2)
cocina-models (0.97.0)
activesupport
deprecation
dry-struct (~> 1.0)
dry-types (~> 1.1)
edtf
equivalent-xml
i18n
jsonpath
nokogiri
openapi3_parser
openapi_parser (~> 1.0)
super_diff
thor
zeitwerk (~> 2.1)
commonmarker (1.1.4-arm64-darwin)
commonmarker (1.1.4-x86_64-darwin)
commonmarker (1.1.4-x86_64-linux)
concurrent-ruby (1.3.3)
config (5.5.1)
deep_merge (~> 1.2, >= 1.2.1)
Expand All @@ -77,6 +96,8 @@ GEM
irb (~> 1.10)
reline (>= 0.3.8)
deep_merge (1.2.2)
deprecation (1.1.0)
activesupport
diff-lcs (1.5.1)
digest-crc (0.6.5)
rake (>= 12.0.0, < 14.0.0)
Expand All @@ -98,9 +119,31 @@ GEM
dot-properties (0.1.4)
bundler (>= 2.2.33)
drb (2.2.1)
dry-core (1.0.1)
concurrent-ruby (~> 1.0)
zeitwerk (~> 2.6)
dry-inflector (1.0.0)
dry-logic (1.5.0)
concurrent-ruby (~> 1.0)
dry-core (~> 1.0, < 2)
zeitwerk (~> 2.6)
dry-struct (1.6.0)
dry-core (~> 1.0, < 2)
dry-types (>= 1.7, < 2)
ice_nine (~> 0.11)
zeitwerk (~> 2.6)
dry-types (1.7.2)
bigdecimal (~> 3.0)
concurrent-ruby (~> 1.0)
dry-core (~> 1.0)
dry-inflector (~> 1.0)
dry-logic (~> 1.4)
zeitwerk (~> 2.6)
ed25519 (1.3.0)
edtf (3.1.1)
activesupport (>= 3.0, < 8.0)
equivalent-xml (0.6.0)
nokogiri (>= 1.4.3)
erubi (1.13.0)
factory_bot (6.4.6)
activesupport (>= 5.0.0)
Expand All @@ -125,12 +168,15 @@ GEM
httpclient (2.8.3)
i18n (1.14.5)
concurrent-ruby (~> 1.0)
ice_nine (0.11.2)
io-console (0.7.2)
irb (1.13.2)
rdoc (>= 4.0.0)
reline (>= 0.4.2)
iso-639 (0.3.6)
json (2.7.2)
jsonpath (1.1.5)
multi_json
language_server-protocol (3.17.0.3)
llhttp-ffi (0.5.0)
ffi-compiler (~> 1.0)
Expand All @@ -156,6 +202,7 @@ GEM
rails_autolink
stanford-mods (~> 3.3, >= 3.3.9)
view_component
multi_json (1.15.0)
mutex_m (0.2.0)
net-scp (4.0.0)
net-ssh (>= 2.6.5, < 8.0.0)
Expand All @@ -171,10 +218,16 @@ GEM
nom-xml (1.2.0)
i18n
nokogiri
openapi3_parser (0.10.0)
commonmarker (>= 1.0)
openapi_parser (1.0.0)
optimist (3.1.0)
parallel (1.25.1)
parser (3.3.3.0)
ast (~> 2.4.1)
racc
patience_diff (1.2.0)
optimist (~> 3.0)
pg (1.5.6)
psych (5.1.2)
stringio
Expand Down Expand Up @@ -264,12 +317,17 @@ GEM
net-scp (>= 1.1.2)
net-sftp (>= 2.1.2)
net-ssh (>= 2.8.0)
stanford-geo (0.2.0)
stanford-mods (3.3.9)
activesupport
mods (~> 3.0, >= 3.0.4)
statsd-ruby (1.5.0)
stringio (3.1.1)
strscan (3.1.0)
super_diff (0.12.1)
attr_extras (>= 6.2.4)
diff-lcs
patience_diff
thor (1.3.1)
traject (3.8.2)
concurrent-ruby (>= 0.8.0)
Expand Down Expand Up @@ -315,6 +373,7 @@ DEPENDENCIES
capistrano-bundler
capistrano-rvm
capistrano-shared_configs
cocina-models
config
csv
debouncer
Expand All @@ -337,6 +396,7 @@ DEPENDENCIES
ruby-kafka
simplecov
slop
stanford-geo (= 0.2.0)
stanford-mods (~> 3.0)
statsd-ruby
traject (~> 3.0)
Expand Down
4 changes: 2 additions & 2 deletions config/deploy/stage.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@
{
key: 'earthworks_stage_indexer',
count: 1,
config: './lib/traject/config/geo_config.rb',
config: './lib/traject/config/geo_aardvark_config.rb',
settings: {
'log.file' => 'log/traject_earthworks-stage-indexer.log',
'kafka.topic' => 'purl_fetcher_stage',
'kafka.consumer_group_id' => 'earthworks-stage-indexer',
'solr.url' => 'http://sul-solr.stanford.edu/solr/earthworks-stage',
'solr.url' => 'http://sul-solr.stanford.edu/solr/earthworks-aardvark-stage',
'purl.url' => 'https://sul-purl-stage.stanford.edu',
'stacks.url' => 'https://sul-stacks-stage.stanford.edu',
'geoserver.pub_url' => 'https://earthworks-geoserver-stage-b.stanford.edu/geoserver',
Expand Down
67 changes: 57 additions & 10 deletions lib/public_cocina_record.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,74 @@
require 'http'

class PublicCocinaRecord
attr_reader :druid, :purl_url
attr_reader :public_cocina_doc, :druid, :purl_url

def self.fetch(url)
response = HTTP.get(url)
response.body if response.status.ok?
def self.fetch(druid, purl_url: 'https://purl.stanford.edu')
response = HTTP.get("#{purl_url}/#{druid}.json")
new(druid, response.body, purl_url:) if response.status.ok?
end

def initialize(druid, purl_url: 'https://purl.stanford.edu')
def initialize(druid, public_cocina, purl_url: 'https://purl.stanford.edu')
@druid = druid
@purl_url = purl_url
@public_cocina_doc = JSON.parse(public_cocina)
end

def cocina_access
@cocina_access ||= public_cocina_doc['access']
end

def cocina_structural
@cocina_structural ||= public_cocina_doc['structural']
end

def cocina_description
@cocina_description ||= public_cocina_doc['description']
end

def cocina_titles(type: :main)
titles = cocina_description['title'].map { |title| Cocina::Models::Title.new(title) }
case type
when :main
Cocina::Models::Builders::TitleBuilder.main_title(titles)
when :full
Cocina::Models::Builders::TitleBuilder.full_title(titles)
when :additional
Cocina::Models::Builders::TitleBuilder.additional_titles(titles)
else
raise ArgumentError, "Invalid title type: #{type}"
end
end

def created
Time.parse(public_cocina_doc['created'])
end

def modified
Time.parse(public_cocina_doc['modified'])
end

def content_type
public_cocina_doc['type'].split('/').last
end

def files
cocina_structural&.fetch('contains', [])&.flat_map { |fileset| fileset.dig('structural', 'contains') } || []
end

def public_cocina?
!!public_cocina
public_cocina.present?
end

def collection?
content_type == 'collection'
end

def public_cocina
@public_cocina ||= self.class.fetch("#{purl_url}/#{druid}.json")
def public?
[cocina_access['view'], cocina_access['download']].include? 'world'
end

def public_cocina_doc
@public_cocina_doc ||= JSON.parse(public_cocina)
def stanford_only?
[cocina_access['view'], cocina_access['download']].include? 'stanford'
end
end
10 changes: 4 additions & 6 deletions lib/public_xml_record.rb
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
# frozen_string_literal: true

require 'http'
require 'active_support' # some transitive dependencies don't require active_support this first, as they must in Rails 7
require 'active_support/core_ext/module/delegation'
require 'mods_display'
require 'dor/rights_auth'

class PublicXmlRecord
attr_reader :public_xml_doc, :druid, :purl_url

def self.fetch(purl_url, druid)
def self.fetch(druid, purl_url: 'https://purl.stanford.edu')
response = HTTP.get("#{purl_url}/#{druid}.xml")
new(purl_url, druid, response.body) if response.status.ok?
new(druid, response.body, purl_url:) if response.status.ok?
end

def initialize(purl_url, druid, public_xml)
@purl_url = purl_url
def initialize(druid, public_xml, purl_url: 'https://purl.stanford.edu')
@druid = druid
@purl_url = purl_url
@public_xml_doc = Nokogiri::XML(public_xml)
end

Expand Down
25 changes: 18 additions & 7 deletions lib/purl_record.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,32 @@ def searchworks_id
catkey.nil? ? druid : catkey
end

def public_xml?
!!public_xml
def druid_tree
druid.match(/(..)(...)(..)(....)/).captures.join('/')
end

def public_xml
@public_xml ||= PublicXmlRecord.fetch(purl_url, druid)
@public_xml ||= PublicXmlRecord.fetch(druid, purl_url:)
end

delegate :mods, :rights, :public?, :stanford_only?, :rights_xml, :collection?,
def public_cocina
@public_cocina ||= PublicCocinaRecord.fetch(druid, purl_url:)
end

def public_xml?
public_xml.present?
end

def public_cocina?
public_cocina.present?
end

delegate :mods, :rights, :rights_xml, :collection?, :public?, :stanford_only?,
:thumb, :dor_content_type, :dor_resource_content_type, :dor_file_mimetype,
:dor_resource_count, :dor_read_rights, :collections, :constituents,
:catkey, :label, :stanford_mods, :mods_display,
:public_xml_doc, to: :public_xml

def druid_tree
druid.match(/(..)(...)(..)(....)/).captures.join('/')
end
delegate :cocina_access, :cocina_structural, :cocina_description, :cocina_titles,
:created, :modified, :public_cocina_doc, :content_type, :files, to: :public_cocina
end
Loading

0 comments on commit c18c49c

Please sign in to comment.