Skip to content

Commit

Permalink
parity checking between ostt and ref updated
Browse files Browse the repository at this point in the history
  • Loading branch information
mohammad2928 committed Jul 21, 2021
1 parent 3670ba7 commit 3857d18
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 42 deletions.
30 changes: 14 additions & 16 deletions SLTev/ASReval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from utilities import check_empty_line, osst_checking, pipeline_input, get_SLTev_home_path
from utilities import check_time_stamp_candiates_format, submission_argument, parity_checking_between_ostt_reference
from evaluator import normal_timestamp_evaluation, simple_timestamp_evaluation
from evaluator import normal_evaluation_without_parity
from ASRev import simple_asr_evaluation, normal_asr_evaluation


Expand Down Expand Up @@ -57,34 +58,31 @@ def main(input_files=[], file_formats=[], simple="False"):
continue
if osst_checking(gold_files["ostt"][0]) == 1: # OStt checking
continue


inputs_object = {
'ostt': gold_files["ostt"][0],
'references': gold_files["ost"],
'SLTev_home': SLTev_home,
'candidate': candidate_file[0]
}

_ = check_time_stamp_candiates_format(candidate_file[0]) # submission checking

parity_state, error = parity_checking_between_ostt_reference(gold_files["ostt"][0], gold_files["ost"])
if parity_state == 0:
eprint(
"Evaulation for ",
candidate_file[0],
" failed, the number of Complete lines (C) in ",
gold_files["ostt"][0],
" and ",
" ".join(gold_files["ost"]),
" are not equal",
)
eprint(error)
normal_evaluation_without_parity(inputs_object)
continue

_ = check_time_stamp_candiates_format(candidate_file[0]) # submission checking

print(
"Evaluating the file ",
candidate_file[0],
" in terms of translation quality against ",
" ".join(gold_files["ost"]),
)

inputs_object = {
'ostt': gold_files["ostt"][0],
'references': gold_files["ost"],
'SLTev_home': SLTev_home,
'candidate': candidate_file[0]
}
if simple == "False":
normal_timestamp_evaluation(inputs_object)
else:
Expand Down
37 changes: 14 additions & 23 deletions SLTev/SLTeval.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utilities import check_empty_line, osst_checking, pipeline_input, get_SLTev_home_path
from utilities import check_time_stamp_candiates_format, submission_argument, parity_checking_between_ostt_reference
from evaluator import normal_timestamp_evaluation, simple_timestamp_evaluation

from evaluator import normal_evaluation_without_parity

def reference_checking(gold_files):
check_all_gold_files_flag = 0
Expand Down Expand Up @@ -36,23 +36,23 @@ def main(input_files=[], file_formats=[], simple="False"):
if osst_checking(gold_files["ostt"][0]) == 1: # OStt checking
continue

parity_state, error = parity_checking_between_ostt_reference(gold_files["ostt"][0], gold_files["ref"])
if parity_state == 0:
eprint(
"evaluation for ",
candidate_file[0],
" failed, the number of Complete lines (C) in ",
gold_files["ostt"][0],
" and ",
" ".join(gold_files["ref"]),
" are not equal",
)
eprint(error)
continue
evaluation_object = {
'ostt':gold_files["ostt"][0],
'references': gold_files["ref"],
'candidate': candidate_file[0],
'align': gold_files["align"],
'SLTev_home': sltev_home,
}

state = check_time_stamp_candiates_format(candidate_file[0]) # candidate checking
if state:
continue

parity_state, error = parity_checking_between_ostt_reference(gold_files["ostt"][0], gold_files["ref"])
if parity_state == 0:
eprint(error)
normal_evaluation_without_parity(evaluation_object)
continue

if (
gold_files["align"] != [] and
Expand All @@ -72,15 +72,6 @@ def main(input_files=[], file_formats=[], simple="False"):
" ".join(gold_files["ref"]),
)


evaluation_object = {
'ostt':gold_files["ostt"][0],
'references': gold_files["ref"],
'candidate': candidate_file[0],
'align': gold_files["align"],
'SLTev_home': sltev_home,
}

if simple == "False":
normal_timestamp_evaluation(evaluation_object)
else:
Expand Down
2 changes: 1 addition & 1 deletion SLTev/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
sltev_home = pkg_resources.resource_filename("SLTev", "")
sys.path.insert(1, sltev_home)

__version__ = "1.2.1"
__version__ = "1.2.2"
38 changes: 38 additions & 0 deletions SLTev/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,44 @@ def time_span_bleu_score_evaluation(evaluation_object):
except:
pass


def normal_evaluation_without_parity(inputs_object):
print_headers()
current_path = os.getcwd()
MovedWords = 1 # count of moving words to each side when we are using Time-based segmentation and word-based segmentation
references = read_references(inputs_object.get('references', []))
references_statistical_info(references) # print statistical info
average_refernces_token_count = get_average_references_token_count(references)
candidate_sentences = read_candidate_file(inputs_object.get('candidate'))

evaluation_object = {
'candidate_sentences': candidate_sentences,
'language': 'en',
'SLTev_home': inputs_object.get('SLTev_home'),
'current_path': current_path,
'MovedWords': MovedWords,
'average_refernces_token_count': average_refernces_token_count,
'references': references,
'time_span': 3000
}

# bleu score evaluation
documantlevel_bleu_score_evaluation(references, candidate_sentences)
wordbased_segmenter_bleu_score_evaluation(evaluation_object)

#flicker evaluation
print("tot Flicker count_changed_Tokens ", int(calc_revise_count(candidate_sentences)))
print("tot Flicker count_changed_content ", int(calc_flicker_score(candidate_sentences)))
print(
"mean flicker across sentences ",
str("{0:.3f}".format(round(calc_average_flickers_per_sentence(candidate_sentences), 3))),
)
print(
"mean flicker across whole documents ",
str("{0:.3f}".format(round(calc_average_flickers_per_tokens(candidate_sentences), 3))),
)


def simple_timestamp_evaluation(inputs_object):
current_path = os.getcwd()
MovedWords = 1 # count of moving words to each side when we are using Time-based segmentation and word-based segmentation
Expand Down
1 change: 0 additions & 1 deletion SLTev/files_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,4 +265,3 @@ def quality_segmenter(evaluation_object, temp_folder):
os.chdir("..")
shutil.rmtree(temp_folder)
return candidate_segments, mWERQuality

2 changes: 1 addition & 1 deletion SLTev/quality_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def calc_bleu_score_segmenterlevel(evaluation_object, temp_folder):
segmenter_sentence, _ = quality_segmenter(evaluation_object, temp_folder)
sys = [" ".join(i) for i in segmenter_sentence]
refs = merge_references_sentences[:]
bleu_sacre = sacrebleu.corpus_bleu(sys, refs)
bleu_sacre = sacrebleu.corpus_bleu(sys, refs, force=True)
sacre_bleu_score = bleu_sacre.score
return sacre_bleu_score

Expand Down

0 comments on commit 3857d18

Please sign in to comment.