Skip to content

Commit

Permalink
textvalidation: added TextValidationMode and TextValidationResult
Browse files Browse the repository at this point in the history
  • Loading branch information
proycon committed May 25, 2024
1 parent c5c0071 commit 28fca87
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 67 deletions.
3 changes: 3 additions & 0 deletions src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ pub use webanno::*;
#[cfg(feature = "transpose")]
pub use transpose::*;

#[cfg(feature = "textvalidation")]
pub use crate::textvalidation::*;

use crate::annotation::{Annotation, AnnotationHandle};
use crate::annotationdata::{AnnotationData, AnnotationDataHandle};
use crate::annotationdataset::{AnnotationDataSet, AnnotationDataSetHandle};
Expand Down
170 changes: 108 additions & 62 deletions src/textvalidation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,86 @@ use sha1::{Digest, Sha1};

const TEXTVALIDATION_SET: &str = "https://w3id.org/stam/extensions/stam-textvalidation/";

#[derive(Copy, Clone, PartialEq)]
pub enum TextValidationMode {
/// Use checksums
Checksum,
/// Use text
Text,
/// Use both
Both,
/// Automatically choose based on text length
Auto,
}

impl Default for TextValidationMode {
fn default() -> Self {
Self::Auto
}
}

pub struct TextValidationResult {
valid: usize,
invalid: usize,
missing: usize,
}

impl TextValidationResult {
/// Returns the number of annotations that do not have validation information
pub fn missing(&self) -> usize {
self.missing
}
/// Returns the number of annotations that are valid
pub fn valid(&self) -> usize {
self.valid
}
/// Returns the number of annotations that are invalid
pub fn invalid(&self) -> usize {
self.invalid
}
/// Returns a boolean representing the validation result
pub fn is_ok(&self) -> bool {
self.invalid == 0 && self.missing == 0
}
pub fn is_ok_maybe_incomplete(&self) -> bool {
self.invalid == 0
}
}

impl AnnotationStore {
/// Mark all annotations in this store as valid, enabling text validation checks in the future.
pub fn make_validation_checksums(&mut self) -> Result<(), StamError> {
let mut queue = Vec::new();
pub fn make_validation(&mut self, mode: TextValidationMode) -> Result<(), StamError> {
let mut queue_checksums = Vec::new();
let mut queue_texts = Vec::new();
for annotation in self.annotations() {
if annotation.validation_checksum().is_none() {
let (do_checksum, do_text) = match mode {
TextValidationMode::Checksum => (true, false),
TextValidationMode::Text => (false, true),
TextValidationMode::Both => (true, true),
TextValidationMode::Auto => {
let textlen = annotation
.textselections()
.fold(0 as usize, |a, x| a + (x.end() - x.begin()));
if textlen < 40 {
(false, true)
} else {
(true, false)
}
}
};

if do_checksum && annotation.validation_checksum().is_none() {
if let Some(checksum) =
annotation.text_checksum(annotation.text_validation_delimiter().unwrap_or(""))
{
queue.push((annotation.handle(), checksum));
queue_checksums.push((annotation.handle(), checksum));
}
}
if do_text && annotation.validation_text().is_none() {
let text =
annotation.text_join(annotation.text_validation_delimiter().unwrap_or(""));
if !text.is_empty() {
queue_texts.push((annotation.handle(), text));
}
}
}
Expand All @@ -50,7 +120,7 @@ impl AnnotationStore {

// add validation information to existing annotations
if let Some(set_handle) = set_handle {
for (handle, checksum) in queue {
for (handle, checksum) in queue_checksums {
let set: &mut AnnotationDataSet = self.get_mut(set_handle)?;
let data_handle = set.insert_data(BuildItem::None, "checksum", checksum, true)?;
let annotation: &mut Annotation = self.get_mut(handle)?;
Expand All @@ -59,37 +129,7 @@ impl AnnotationStore {
self.dataset_data_annotation_map
.insert(set_handle, data_handle, handle);
}
} else {
panic!("Set must exist");
}
Ok(())
}

/// Mark all annotations in this store as valid, enabling text validation checks in the future.
/// Note, this may have considerable space overhead, [`make_validation_checksums()`] is preferred!
pub fn make_validation_texts(&mut self) -> Result<(), StamError> {
let mut queue = Vec::new();
for annotation in self.annotations() {
if annotation.validation_text().is_none() {
let text =
annotation.text_join(annotation.text_validation_delimiter().unwrap_or(""));
if !text.is_empty() {
queue.push((annotation.handle(), text));
}
}
}

// get or add the dataset
let mut set_handle = self.dataset(TEXTVALIDATION_SET).map(|set| set.handle());
if set_handle.is_none() {
set_handle = self
.insert(AnnotationDataSet::new(self.config.clone()).with_id(TEXTVALIDATION_SET))
.ok();
}

// add validation information to existing annotations
if let Some(set_handle) = set_handle {
for (handle, text) in queue {
for (handle, text) in queue_texts {
let set: &mut AnnotationDataSet = self.get_mut(set_handle)?;
let data_handle = set.insert_data(BuildItem::None, "text", text, true)?;
let annotation: &mut Annotation = self.get_mut(handle)?;
Expand All @@ -112,32 +152,34 @@ impl AnnotationStore {
}

/// Validate the text of all annotations
pub fn validate_text(&self, warn_for_all: bool) -> Result<(), StamError> {
let mut failures = 0;
/// If quiet is not set, it will output validation failures to stderr
pub fn validate_text(&self, quiet: bool) -> TextValidationResult {
let mut total = TextValidationResult {
invalid: 0,
valid: 0,
missing: 0,
};
for annotation in self.annotations() {
if !annotation.validate_text() {
let msg = format!(
"Failed on {}",
annotation
.id()
.unwrap_or(annotation.as_ref().temp_id().as_deref().unwrap())
);
failures += 1;
if warn_for_all {
eprintln!("[STAM VALIDATION] {}", msg);
let result = annotation.validate_text();
if result == Some(true) {
total.valid += 1;
} else {
if result.is_none() {
total.missing += 1;
} else {
return Err(StamError::ValidationError(msg, ""));
total.invalid += 1;
}
if !quiet {
eprintln!(
"[STAM VALIDATION] Failed on {}",
annotation
.id()
.unwrap_or(annotation.as_ref().temp_id().as_deref().unwrap())
);
}
}
}
if failures == 0 {
Ok(())
} else {
return Err(StamError::ValidationError(
format!("{} annotations failed to validate their text!", failures),
"",
));
}
return total;
}
}

Expand Down Expand Up @@ -184,24 +226,28 @@ impl<'store> ResultItem<'store, Annotation> {
}

/// Tests if the annotation's target text is still valid
/// Returns false if there is no validation information in the annotation at all!
pub fn validate_text(&self) -> bool {
/// Returns None if there is no validation information in the annotation at all!
pub fn validate_text(&self) -> Option<bool> {
let delimiter = self.text_validation_delimiter();
let mut found = false;
if let Some(refchecksum) = self.validation_checksum() {
let checksum = self.text_checksum(delimiter.unwrap_or(""));
found = true;
if checksum.as_deref() != Some(refchecksum) {
return false;
return Some(false);
}
}
if let Some(reftext) = self.validation_text() {
let text = self.text_join(delimiter.unwrap_or(""));
found = true;
if reftext != text {
return false;
return Some(false);
}
}
found
if found {
Some(true)
} else {
None
}
}
}
10 changes: 5 additions & 5 deletions tests/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2587,21 +2587,21 @@ fn remove_key_strict() -> Result<(), StamError> {
fn textvalidation_none() -> Result<(), StamError> {
let store = setup_example_1()?;
let annotation = store.annotation("A1").or_fail()?;
assert_eq!(annotation.validate_text(), false);
assert_eq!(annotation.validate_text(), None);
Ok(())
}

#[test]
#[cfg(feature = "textvalidation")]
fn textvalidation_checksum() -> Result<(), StamError> {
let mut store = setup_example_1()?;
store.make_validation_checksums()?;
store.make_validation(TextValidationMode::Checksum)?;
let annotation = store.annotation("A1").or_fail()?;
assert_eq!(
annotation.validation_checksum(),
Some("7c211433f02071597741e6ff5a8ea34789abbf43")
);
assert_eq!(annotation.validate_text(), true);
assert_eq!(annotation.validate_text(), Some(true));
assert!(store.validate_text(true).is_ok());
Ok(())
}
Expand All @@ -2610,10 +2610,10 @@ fn textvalidation_checksum() -> Result<(), StamError> {
#[cfg(feature = "textvalidation")]
fn textvalidation_texts() -> Result<(), StamError> {
let mut store = setup_example_1()?;
store.make_validation_texts()?;
store.make_validation(TextValidationMode::Text)?;
let annotation = store.annotation("A1").or_fail()?;
assert_eq!(annotation.validation_text(), Some("world"));
assert_eq!(annotation.validate_text(), true);
assert_eq!(annotation.validate_text(), Some(true));
assert!(store.validate_text(true).is_ok());
Ok(())
}

0 comments on commit 28fca87

Please sign in to comment.