Skip to content

Commit

Permalink
Strip out more characters
Browse files Browse the repository at this point in the history
  • Loading branch information
dshorthouse committed Jun 29, 2024
1 parent 6d912fb commit dfa8215
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 3 deletions.
2 changes: 1 addition & 1 deletion dwc_agent.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Gem::Specification.new do |s|
s.name = 'dwc_agent'
s.version = DwcAgent::Version.version
s.license = 'MIT'
s.date = '2024-06-17'
s.date = '2024-06-30'
s.summary = "Parse Darwin Core agent terms such as recordedBy and identifiedBy"
s.description = "Parses the typically messy content in Darwin Core terms that contain people names"
s.authors = ["David P. Shorthouse"]
Expand Down
4 changes: 3 additions & 1 deletion lib/dwc_agent/constants.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
module DwcAgent
STRIP_OUT = %r{
(?i:acc\s?\#)|
["'-]{2,}|
\-\.\s|
[,;]?\s*(?i:1st|2nd|3rd|[4-9]th)|
\s*?\d+\.\d+|
\b\d+\(?(?i:[[:alpha:]])\)?\b|
Expand Down Expand Up @@ -103,7 +105,7 @@ module DwcAgent
(?i:no\s+coll\.?(ector)?)|
(?i:not?)\s+(?i:name|date|details?|specific)?\s*?(?i:given|name|date|noted)|
(?i:non?)\s+(?i:specificato)|
\b[,;]\s+\d+\z|
\b[,;]\s+\d+\.?\z|
[!@?]|
[,]?\d+|
\s+\d+?(\/|\.)?(?i:i|ii|iii|iv|v|vi|vii|viii|ix|x)(\/|\.)\d+|
Expand Down
2 changes: 1 addition & 1 deletion lib/dwc_agent/version.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ class Version

MAJOR = 3
MINOR = 1
PATCH = 2
PATCH = 3
BUILD = 0

def self.version
Expand Down
20 changes: 20 additions & 0 deletions spec/dwc_agent/parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2143,5 +2143,25 @@ module DwcAgent
expect(parsed[2].values_at(:given, :family)).to eq(["West", nil])
end

it "should ignore exclamation marks and period at end" do
input = '! L.P. Kvist, 1996.'
parsed = parser.parse(input)
expect(parsed.size).to eq(1)
expect(parsed[0].values_at(:given, :family)).to eq(["L.P.", "Kvist"])
end

it "should strip out multiple quotes" do
input = '"""Antonio Bausa"""'
parsed = parser.parse(input)
expect(parsed.size).to eq(1)
expect(parsed[0].values_at(:given, :family)).to eq(["Antonio", "Bausa"])
end

it "should strip out double dashes and extra punctuation" do
input = "-- Luke & -. Robertson"
parsed = parser.parse(input)
expect(parsed[0].values_at(:given, :family)).to eq(["Luke", nil])
end

end
end

0 comments on commit dfa8215

Please sign in to comment.