-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Better handling of missing DOIs when outputting Crossref metadata (#551, #565) #627
Merged
Merged
Changes from 3 commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
640b39e
Omit chapters with no DOI from Crossref output
rhigman a378dac
Raise error if no DOIs are found for either parent work or chapters
rhigman 57bb64b
Update changelog
rhigman 7b2ed50
Review markups: remove unnecessary variable, streamline unreachable c…
rhigman File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -79,14 +79,25 @@ impl XmlSpecification for DoiDepositCrossref { | |
|
||
impl XmlElementBlock<DoiDepositCrossref> for Work { | ||
fn xml_element<W: Write>(&self, w: &mut EventWriter<W>) -> ThothResult<()> { | ||
let work_type = match &self.work_type { | ||
let work = self; | ||
if work.doi.is_none() | ||
&& !work | ||
.relations | ||
.iter() | ||
.any(|r| r.relation_type == RelationType::HAS_CHILD && r.related_work.doi.is_some()) | ||
{ | ||
return Err(ThothError::IncompleteMetadataRecord( | ||
DEPOSIT_ERROR.to_string(), | ||
"No work or chapter DOIs to deposit".to_string(), | ||
)); | ||
} | ||
let work_type = match &work.work_type { | ||
WorkType::MONOGRAPH => "monograph", | ||
WorkType::EDITED_BOOK => "edited_book", | ||
WorkType::TEXTBOOK => "reference", | ||
WorkType::JOURNAL_ISSUE | WorkType::BOOK_SET | WorkType::BOOK_CHAPTER => "other", | ||
WorkType::Other(_) => unreachable!(), | ||
}; | ||
let work = self; | ||
// As an alternative to `book_metadata` and `book_series_metadata` below, | ||
// `book_set_metadata` can be used for works which are part of a set. | ||
// Omitted at present but could be considered as a future enhancement. | ||
|
@@ -132,7 +143,11 @@ impl XmlElementBlock<DoiDepositCrossref> for Work { | |
.iter() | ||
.filter(|r| r.relation_type == RelationType::HAS_CHILD) | ||
{ | ||
XmlElementBlock::<DoiDepositCrossref>::xml_element(chapter, w)?; | ||
// If chapter has no DOI, nothing to output (`content_item` element | ||
// representing chapter must contain `doi_data` element with `doi`) | ||
if chapter.related_work.doi.is_some() { | ||
XmlElementBlock::<DoiDepositCrossref>::xml_element(chapter, w)?; | ||
} | ||
} | ||
Ok(()) | ||
}) | ||
|
@@ -692,12 +707,8 @@ fn write_chapter_doi_collection<W: Write>( | |
)); | ||
} | ||
} else { | ||
// `doi_data` element is mandatory for `content_item`, and must contain | ||
// both `doi` element and `resource` (landing page) element | ||
return Err(ThothError::IncompleteMetadataRecord( | ||
DEPOSIT_ERROR.to_string(), | ||
"Missing chapter DOI".to_string(), | ||
)); | ||
// Caller should only pass in chapters which have DOIs | ||
unreachable!() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess we can now get rid off this condition and simply unwrap the DOI
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh yes, nice :) |
||
} | ||
Ok(()) | ||
} | ||
|
@@ -1436,17 +1447,6 @@ mod tests { | |
output, | ||
"Could not generate doideposit::crossref: Missing chapter Landing Page".to_string() | ||
); | ||
|
||
// Restore landing page but remove DOI. Result: error, as above | ||
test_relations.related_work.edition = None; | ||
test_relations.related_work.landing_page = | ||
Some("https://www.book.com/chapter_one".to_string()); | ||
test_relations.related_work.doi = None; | ||
let output = generate_test_output(false, &test_relations); | ||
assert_eq!( | ||
output, | ||
"Could not generate doideposit::crossref: Missing chapter DOI".to_string() | ||
); | ||
} | ||
|
||
#[test] | ||
|
@@ -1974,6 +1974,9 @@ mod tests { | |
test_work.publications[0].locations.clear(); | ||
// Remove last (hardback) publication | ||
test_work.publications.pop(); | ||
// Change sole relation to chapter with no DOI | ||
test_work.relations[0].relation_type = RelationType::HAS_CHILD; | ||
test_work.relations[0].related_work.doi = None; | ||
|
||
let output = generate_test_output(true, &test_work); | ||
// Work type changed | ||
|
@@ -2022,6 +2025,18 @@ mod tests { | |
assert!(!output.contains(r#" <item crawler="yahoo">"#)); | ||
assert!(!output.contains(r#" <item crawler="scirus">"#)); | ||
assert!(!output.contains(r#" <collection property="text-mining">"#)); | ||
// Only chapters with no DOI supplied: no `content_item` elements emitted | ||
assert!(!output.contains(r#" <content_item component_type="chapter">"#)); | ||
assert!(!output.contains(r#" <title>Part</title>"#)); | ||
assert!(!output.contains(r#" <subtitle>One</subtitle>"#)); | ||
assert!(!output.contains(r#" <component_number>1</component_number>"#)); | ||
assert!(!output.contains(r#" <month>02</month>"#)); | ||
assert!(!output.contains(r#" <day>28</day>"#)); | ||
assert!(!output.contains(r#" <year>2000</year>"#)); | ||
assert!(!output.contains(r#" <pages>"#)); | ||
assert!(!output.contains(r#" <first_page>10</first_page>"#)); | ||
assert!(!output.contains(r#" <last_page>20</last_page>"#)); | ||
assert!(!output.contains(r#" <resource>https://www.book.com/part_one</resource>"#)); | ||
|
||
// Change work type, remove landing page, remove XML ISBN, | ||
// remove all but the omitted contributor | ||
|
@@ -2065,18 +2080,31 @@ mod tests { | |
assert!(!output.contains(r#" <doi>10.00001/BOOK.0001</doi>"#)); | ||
assert!(!output.contains(r#" <resource>https://www.book.com</resource>"#)); | ||
|
||
// Change work type again, replace landing page but remove DOI | ||
// Remove DOI (so neither work nor chapter DOIs are present). Result: error | ||
test_work.doi = None; | ||
let output = generate_test_output(false, &test_work); | ||
assert_eq!( | ||
output, | ||
"Could not generate doideposit::crossref: No work or chapter DOIs to deposit" | ||
.to_string() | ||
); | ||
|
||
// Change work type again, replace landing page, replace chapter DOI | ||
test_work.work_type = WorkType::JOURNAL_ISSUE; | ||
test_work.landing_page = Some("https://www.book.com".to_string()); | ||
test_work.doi = None; | ||
test_work.relations[0].related_work.doi = | ||
Some(Doi::from_str("https://doi.org/10.00001/PART.0001").unwrap()); | ||
let output = generate_test_output(true, &test_work); | ||
// Work type changed | ||
assert!(!output.contains(r#" <book book_type="reference">"#)); | ||
assert!(output.contains(r#" <book book_type="other">"#)); | ||
// No DOI: entire `doi_data` element omitted (even though landing page restored) | ||
assert!(!output.contains(r#" <doi_data>"#)); | ||
// No work DOI: entire work-specific `doi_data` element omitted (even though landing page restored) | ||
assert!(!output.contains(r#" <doi>10.00001/BOOK.0001</doi>"#)); | ||
assert!(!output.contains(r#" <resource>https://www.book.com</resource>"#)); | ||
// But chapter-specific `doi_data` element will be present (at same nesting level) | ||
assert!(output.contains(r#" <doi_data>"#)); | ||
assert!(output.contains(r#" <doi>10.00001/PART.0001</doi>"#)); | ||
assert!(output.contains(r#" <resource>https://www.book.com/part_one</resource>"#)); | ||
|
||
// Remove publication date. Result: error | ||
test_work.publication_date = None; | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know this was introduced earlier, but I don't think it's necessary to declare this variable