Skip to content

Commit

Permalink
Adding python script to generate markdown document with human readabl…
Browse files Browse the repository at this point in the history
…e list of core MultiRecord Measure tests from TG2_multirecord_measure_tests.csv. Adding generated markdown human readable list of Core MultiRecord Measure tests generated from csv list as of 2024-08-13. Updating generated markdown human readable list of CORE tests generated from CORE test spreadsheet csv as of 2024-08-13.
  • Loading branch information
chicoreus committed Aug 13, 2024
1 parent f6c479f commit d0e17f8
Show file tree
Hide file tree
Showing 3 changed files with 10,095 additions and 35 deletions.
97 changes: 97 additions & 0 deletions tg2/core/generation/build/testmeasurecsvtodoc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Produce markdown document listing tests from
# intermediate csv list of MultiRecord measure tests
# TG2_multirecord_measure_tests.csv
#
# @author Paul J. Morris
#
# Assumes run from generation/build directory of tg2 repository.
#
import pandas
import re
import sys
with open ("../../TG2_multirecord_measure_tests.csv", newline='') as csvfile:
sys.stdout = open("../docs/core_multirecord_measure_tests.md","w")
rawDataFrame = pandas.read_csv(csvfile)
dataFrame = rawDataFrame.sort_values(by=['Type','IE Class', 'Label'],ascending=[False,True,True])
header_names = list(dataFrame.columns)
try:
print ("---")
print ("title: TDWG BDQ Core MultiRecord Measure Tests")
print ("geometry: margin=1cm")
print ("titlepage: true")
print ("---")
usecaseDict = dict()
for index, row in dataFrame.iterrows():
usecasesTerm = str(row["UseCases"])
test = row['Label']
foundUseCases = [val.strip() for val in usecasesTerm.split(',')]
for useCase in foundUseCases:
usecaseDict.setdefault(useCase,[]).append(test)
print("# Tests by UseCase")
print("")
print("These MultiRecord Measures operate on the results of Validations performed on a data set (a MultiRecord) for purposes of quality control or quality assurance.")
print("")
for useCase in usecaseDict.keys():
print("## ",useCase)
for test in usecaseDict[useCase]:
print("[",test,"](#",test,")", sep="", end=", ")
print()
print()
for index, row in dataFrame.iterrows():
print("# ",row['Label'])
print("https://rs.tdwg.org/bdq/{}".format(row['GUID']))
print()
print("## Description")
print()
print(row['Description'])
print()
print("## Specification")
print()
print(row['Specification'])
print()
for header in header_names:
if header == "Label":
pass # shown above
elif header == "Description":
pass # shown above
elif header == "Specification":
pass
elif header == "IssueState":
pass # skip
elif header == "IssueLabels":
pass # skip
elif header == "Examples":
print("## Examples")
print()
if not pandas.isna(row[header]) :
examplesRaw = row[header]
# examples are paired in the form [key:value response:value],[key:value response:value]
# display as two lines without the enclosing square brackets.
examples = [val.strip() for val in examplesRaw.split('],[')]
for example in examples:
print(re.sub("\]$","",re.sub("^\[","",example)))
print()
print()
elif header == "#":
# print("| Rationale Management | https://github.com/tdwg/bdq/issues/",row[header],"|")
print("## Rationale Management")
print()
print("[GitHub Issue: {}](https://github.com/tdwg/bdq/issues/{})".format(row[header],row[header]))
print()
else:
if not pandas.isna(row[header]) :
print("## ",header)
print()
# some values (citations) are html lists
value = row[header]
value = value.replace("<ul>","")
value = value.replace("</ul>","")
value = value.replace("<li>","- ")
value = value.replace("</li>","\n")
print(value)
print()
print()
print("********************")
print()
except pandas.errors.ParserError as e:
sys.exit("Error reading file: {}".format(e))
Loading

0 comments on commit d0e17f8

Please sign in to comment.