From 3fa8f010a796723a6345bb2455abf8393106e51b Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Nov 2024 16:04:48 -0500 Subject: [PATCH 01/11] Move v1.2 datapackage files into a subfolder --- .../{ => v1.2}/SCoData_books_v1.2_2022-12_datapackage.json | 0 .../{ => v1.2}/SCoData_combined_v1.2_2022-01_datapackage.json | 0 .../{ => v1.2}/SCoData_events_v1.2_2022-01_datapackage.json | 0 .../{ => v1.2}/SCoData_members_v1.2_2022-01_datapackage.json | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename data-export-meta/{ => v1.2}/SCoData_books_v1.2_2022-12_datapackage.json (100%) rename data-export-meta/{ => v1.2}/SCoData_combined_v1.2_2022-01_datapackage.json (100%) rename data-export-meta/{ => v1.2}/SCoData_events_v1.2_2022-01_datapackage.json (100%) rename data-export-meta/{ => v1.2}/SCoData_members_v1.2_2022-01_datapackage.json (100%) diff --git a/data-export-meta/SCoData_books_v1.2_2022-12_datapackage.json b/data-export-meta/v1.2/SCoData_books_v1.2_2022-12_datapackage.json similarity index 100% rename from data-export-meta/SCoData_books_v1.2_2022-12_datapackage.json rename to data-export-meta/v1.2/SCoData_books_v1.2_2022-12_datapackage.json diff --git a/data-export-meta/SCoData_combined_v1.2_2022-01_datapackage.json b/data-export-meta/v1.2/SCoData_combined_v1.2_2022-01_datapackage.json similarity index 100% rename from data-export-meta/SCoData_combined_v1.2_2022-01_datapackage.json rename to data-export-meta/v1.2/SCoData_combined_v1.2_2022-01_datapackage.json diff --git a/data-export-meta/SCoData_events_v1.2_2022-01_datapackage.json b/data-export-meta/v1.2/SCoData_events_v1.2_2022-01_datapackage.json similarity index 100% rename from data-export-meta/SCoData_events_v1.2_2022-01_datapackage.json rename to data-export-meta/v1.2/SCoData_events_v1.2_2022-01_datapackage.json diff --git a/data-export-meta/SCoData_members_v1.2_2022-01_datapackage.json b/data-export-meta/v1.2/SCoData_members_v1.2_2022-01_datapackage.json similarity index 100% rename from data-export-meta/SCoData_members_v1.2_2022-01_datapackage.json rename to data-export-meta/v1.2/SCoData_members_v1.2_2022-01_datapackage.json From affec27074413805003c7a958c09a2f32d710054 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Nov 2024 16:29:51 -0500 Subject: [PATCH 02/11] Generate readme info for all resources in a datapackage file --- data-export-meta/readme_info.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/data-export-meta/readme_info.py b/data-export-meta/readme_info.py index 8f5223ba..f3d7ba69 100755 --- a/data-export-meta/readme_info.py +++ b/data-export-meta/readme_info.py @@ -33,8 +33,8 @@ def readme_info(df, dp_resource): with open(sys.argv[1]) as packagejson: datapackage = json.load(packagejson) - csvfile = datapackage["resources"][0]["path"] - print("Inspecting %s...\n\n" % csvfile) - - df = pd.read_csv(csvfile) - readme_info(df, datapackage["resources"][0]) + for resource in datapackage["resources"]: + csvfile = resource["path"] + print("\n\nInspecting %s...\n\n" % csvfile) + df = pd.read_csv(csvfile) + readme_info(df, resource) From 2219e3462eca38946a4f7685fdb66960322f240a Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Nov 2024 16:37:39 -0500 Subject: [PATCH 03/11] Update to use argparse and make field list optional --- data-export-meta/readme_info.py | 42 ++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/data-export-meta/readme_info.py b/data-export-meta/readme_info.py index f3d7ba69..c72dbde9 100755 --- a/data-export-meta/readme_info.py +++ b/data-export-meta/readme_info.py @@ -8,11 +8,13 @@ import json import sys +import argparse +import pathlib import pandas as pd -def readme_info(df, dp_resource): +def readme_info(df, dp_resource, field_list=True): print("1. Number of fields: %d\n" % len(df.columns)) print("2. Number of rows: {:,}\n".format(len(df))) schema_fields = dp_resource["schema"]["fields"] @@ -20,21 +22,33 @@ def readme_info(df, dp_resource): assert len(schema_fields) == len(df.columns) field_info = {field["name"]: field for field in schema_fields} - print("3. Field List:") - for col in df.columns: - print("%s : %s" % (col, field_info[col]["description"])) + if field_list: + print("3. Field List:") + for col in df.columns: + print("%s : %s" % (col, field_info[col]["description"])) if __name__ == "__main__": - if len(sys.argv) < 2: - print("Please provide path to frictionless datapackage file") - exit(0) - - with open(sys.argv[1]) as packagejson: + parser = argparse.ArgumentParser( + "Generate dataset info readme from datapackage and data files" + ) + parser.add_argument("datapackage", type=pathlib.Path) + # flag to determine whether fields be listed + parser.add_argument( + "--field-list", + help="Generate field list in readme.txt format", + action=argparse.BooleanOptionalAction, + default=True, + ) + args = parser.parse_args() + + with args.datapackage.open() as packagejson: datapackage = json.load(packagejson) - for resource in datapackage["resources"]: - csvfile = resource["path"] - print("\n\nInspecting %s...\n\n" % csvfile) - df = pd.read_csv(csvfile) - readme_info(df, resource) + for resource in datapackage["resources"]: + # resource path should be relative to the datapackage file + datafile = args.datapackage.parent / resource["path"] + print("\n\nInspecting %s...\n\n" % datafile) + with datafile.open() as csvfile: + df = pd.read_csv(csvfile) + readme_info(df, resource, field_list=args.field_list) From eda1c26e5619a4465bc526682a7a5c4654cc305a Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Nov 2024 16:37:59 -0500 Subject: [PATCH 04/11] Add preliminary v2.0 datapackage file --- .../v2.0/SCoData_v2.0_2024_datapackage.json | 734 ++++++++++++++++++ 1 file changed, 734 insertions(+) create mode 100644 data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json diff --git a/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json b/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json new file mode 100644 index 00000000..62581637 --- /dev/null +++ b/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json @@ -0,0 +1,734 @@ +{ + "profile": "tabular-data-package", + "resources": [ + { + "name": "book_creators", + "path": "SCoData_book_creators_v2.0_2024-10.csv", + "profile": "tabular-data-resource", + "schema": { + "fields": [ + { + "name": "id", + "type": "string", + "format": "default", + "title": "Creator identifier", + "description": "unique identifier for creator" + }, + { + "name": "name", + "type": "string", + "format": "default", + "title": "Name ", + "description": "full name; may include variant names; for more, see https://shakespeareandco.princeton.edu/about/faq/#names" + }, + { + "name": "sort_name", + "type": "string", + "format": "default", + "title": "Sort name ", + "description": "authorized name" + }, + { + "name": "gender", + "type": "string", + "format": "default", + "title": "Gender", + "description": "male, female, nonbinary, unknown; for more, see https://shakespeareandco.princeton.edu/about/faq/#gender\n" + }, + { + "name": "is_organization", + "type": "boolean", + "format": "default", + "title": "Is an organization? ", + "description": "creator is an organization instead of a person (boolean)" + }, + { + "name": "birth_year", + "type": "integer", + "title": "Birth year", + "description": "birth year, if known" + }, + { + "name": "death_year", + "type": "integer", + "title": "Death year", + "description": "death year, if known" + }, + { + "name": "viaf_url", + "type": "string", + "format": "default", + "title": "VIAF url ", + "description": "URL for Virtual Internet Authority File (VIAF, https://viaf.org/) identifier, if available" + }, + { + "name": "wikipedia_url", + "type": "string", + "format": "uri", + "title": "Wikipedia url", + "description": "URL for Wikipedia page, if available" + }, + { + "name": "nationalities", + "type": "string", + "format": "default", + "title": "Nationalities", + "description": "countries for known nationality (if multiple, separated by semicolons)" + }, + { + "name": "member_uri", + "type": "string", + "format": "uri", + "title": "Member URI", + "description": "identifier; member detail page on https://shakespeareandco.princeton.edu" + }, + { + "name": "notes", + "type": "string", + "format": "default", + "title": "Notes", + "description": "more information (text with markdown formatting)" + }, + { + "name": "updated", + "type": "datetime", + "format": "default", + "title": "Date updated", + "description": "timestamp record was last modified in the Shakespeare and Company Project database before export" + } + ] + } + }, + { + "name": "books", + "path": "SCoData_books_v2.0_2024-10.csv", + "profile": "tabular-data-resource", + "schema": { + "fields": [ + { + "name": "id", + "type": "string", + "format": "default", + "title": "Book identifier", + "description": "unique identifier for book" + }, + { + "name": "uri", + "type": "string", + "format": "uri", + "title": "Book URI", + "description": "full URI for book; book detail page on https://shakespeareandco.princeton.edu" + }, + { + "name": "title", + "type": "string", + "format": "default", + "title": "Title ", + "description": "title of the book or other item" + }, + { + "name": "author", + "type": "string", + "format": "default", + "title": "Author", + "description": "author or authors, last name first (multiple, separated by semicolon)" + }, + { + "name": "editor", + "type": "any", + "format": "default", + "title": "Editor(s)", + "description": "editor(s) of work" + }, + { + "name": "translator", + "type": "any", + "format": "default", + "title": "Translator(s)", + "description": "translator(s) of work" + }, + { + "name": "introduction", + "type": "any", + "format": "default", + "title": "Author of introduction", + "description": "author of an introduction to work" + }, + { + "name": "illustrator", + "type": "any", + "format": "default", + "description": "illustrator(s) of work", + "title": "Illustrator(s)" + }, + { + "name": "photographer", + "type": "any", + "format": "default", + "title": "Photographer(s)", + "description": "photographer(s) featured in work" + }, + { + "name": "year", + "type": "year", + "format": "default", + "description": "year published", + "title": "Publication year" + }, + { + "name": "format", + "type": "string", + "format": "default", + "title": "Format", + "description": "type of item (article, book, periodical, phonograph record, photograph, print)" + }, + { + "name": "genre_category", + "type": "string", + "format": "default", + "title": "Genre", + "description": "genre of work (drama, fiction, nonfiction, periodical, poetry; if multiple, separated by semicolons)" + }, + { + "name": "uncertain", + "type": "boolean", + "format": "default", + "title": "Item is uncertain?", + "description": "boolean indicating if item is ambiguous or unidentifiable" + }, + { + "name": "ebook_url", + "type": "string", + "format": "uri", + "title": "Ebook URL", + "description": "link to a digital edition of this work" + }, + { + "name": "volumes_issues", + "type": "any", + "format": "default", + "title": "Volume/Issue numbers", + "description": "list of multivolume volumes or periodical issues known to have circulated (separated by semicolon)" + }, + { + "name": "notes", + "type": "any", + "format": "default", + "title": "Notes", + "description": "more information, e.g. about uncertain titles (text with markdown formatting)" + }, + { + "name": "event_count", + "type": "integer", + "format": "default", + "title": "Associated events count", + "description": "total number of events associated with this title (integer)" + }, + { + "name": "borrow_count", + "type": "integer", + "format": "default", + "title": "Borrow count", + "description": "total number of borrowing events associated with this title (integer)" + }, + { + "name": "purchase_count", + "type": "integer", + "format": "default", + "title": "Purchase count", + "description": "total number of purchase events associated with this title (integer)" + }, + { + "name": "circulation_years", + "type": "string", + "rdfType": "https://schema.org/ItemList", + "format": "default", + "constraints": { + "pattern": "(\\d{4})?(;\\d{4})*" + }, + "title": "Circulation years", + "description": "list of years of known activity for this title (if multiple, separated by semicolon)" + }, + { + "name": "updated", + "type": "datetime", + "format": "default", + "title": "Date updated", + "description": "timestamp record was last modified in the Shakespeare and Company Project database before export" + } + ] + } + }, + { + "name": "events", + "path": "SCoData_events_v2.0_2024-10.csv", + "profile": "tabular-data-resource", + "schema": { + "fields": [ + { + "name": "event_type", + "type": "string", + "format": "default", + "title": "Event type", + "description": "type of event" + }, + { + "name": "start_date", + "type": "string", + "rdfType": "https://schema.org/Date", + "constraints": { + "pattern": "(\\d{4}|-)?(?:-([01]\\d))?(?:-([0-3]\\d))?" + }, + "title": "Start date", + "description": "start date, if known (ISO 8601 format; YYYY, YY-MM, YYYY-MM-DD, or --MM-DD)" + }, + { + "name": "end_date", + "type": "any", + "format": "default", + "title": "End date", + "description": "end date, if known (ISO 8601 format; YYYY, YY-MM, YYYY-MM-DD, or --MM-DD)" + }, + { + "name": "member_ids", + "type": "string", + "format": "default", + "title": "Member indentifier", + "description": "unique identifier for members associated with this event (if multiple, separated by semicolons)" + }, + { + "name": "member_uris", + "type": "string", + "format": "default", + "title": "Member URI", + "description": "list of URIs for members associated with this event (if multiple, separated by semicolons)" + }, + { + "name": "member_names", + "type": "string", + "format": "default", + "title": "Member name", + "description": "list of full member names with variants (if multiple, separated by semicolons; order matches member_uris)" + }, + { + "name": "member_sort_names", + "type": "string", + "format": "default", + "title": "Member sort name", + "description": "list of member authorized sort names (if multiple, separated by semicolons; order matches member_uris)" + }, + { + "name": "subscription_price_paid", + "type": "number", + "format": "default", + "title": "Subscription price paid", + "description": "amount paid for a subscription event (numeric)" + }, + { + "name": "subscription_deposit", + "type": "number", + "format": "default", + "title": "Subscription deposit", + "description": "amount deposited for a new subscription (numeric)" + }, + { + "name": "subscription_duration", + "type": "string", + "format": "default", + "title": "Subscription duration", + "description": "logical subscription duration (human readable, e.g. 6 months, 1 year)" + }, + { + "name": "subscription_duration_days", + "type": "integer", + "format": "default", + "title": "Subscription duration in days", + "description": "actual subscription duration in days (integer)" + }, + { + "name": "subscription_volumes", + "type": "integer", + "format": "default", + "title": "Number of subscription volumes", + "description": "number of volumes paid for in the subscription" + }, + { + "name": "subscription_category", + "type": "string", + "format": "default", + "title": "Subscription categogry", + "description": "subscription plan, if any; see https://shakespeareandco.princeton.edu/about/faq/#lending-library-plans " + }, + { + "name": "subscription_purchase_date", + "type": "string", + "format": "default", + "title": "Subscription purchase date", + "description": "date the subscription was purchased (ISO 8601 format; YYYY, YYYY-MM, YYYY-MM-DD, or --MM-DD)" + }, + { + "name": "reimbursement_refund", + "type": "number", + "format": "default", + "title": "Reimbursement refund", + "description": "amount refunded for a reimbursement event (numeric)" + }, + { + "name": "borrow_status", + "type": "string", + "format": "default", + "title": "Borrow status", + "description": "status code indicating how a borrowing event ended (returned, bought, missing, unknown)" + }, + { + "name": "borrow_duration_days", + "type": "integer", + "format": "default", + "title": "Borrow duration in days", + "description": "borrow duration in days, if known (integer)" + }, + { + "name": "purchase_price", + "type": "number", + "format": "default", + "title": "Purchase price", + "description": "amount paid for a purchase" + }, + { + "name": "currency", + "type": "string", + "format": "default", + "title": "Type of currency ", + "description": "currency code indicating currency of subscription price paid, deposit, reimbursement refund, or purchase price (ISO 4217 currency code)" + }, + { + "name": "item_uri", + "type": "string", + "format": "default", + "title": "Item URI ", + "description": "identifier for book associated with this event, if there is one" + }, + { + "name": "item_title", + "type": "string", + "format": "default", + "title": "Item title", + "description": "title of the book associated with this event" + }, + { + "name": "item_volume", + "type": "string", + "format": "default", + "title": "Item volume", + "description": "volume / issue of this work for this event, if item is a multivolume work or periodical and volume/issue information is known" + }, + { + "name": "item_authors", + "type": "string", + "format": "default", + "title": "Item author(s)", + "description": "list of authors for this work; authorized names, last name first (if multiple, separated by semicolon)" + }, + { + "name": "item_year", + "type": "string", + "format": "default", + "title": "Item year", + "description": "publication year" + }, + { + "name": "item_notes", + "type": "string", + "format": "default", + "title": "Item notes", + "description": "notes about the item" + }, + { + "name": "source_type", + "type": "string", + "format": "default", + "title": "Source type", + "description": "type of source this data was drawn from (could be multiple; separated by semicolons)" + }, + { + "name": "source_citation", + "type": "string", + "format": "default", + "title": "Source citation ", + "description": "bibliographic citation for the source of this data" + }, + { + "name": "source_manifest", + "type": "string", + "format": "default", + "title": "Source manifest", + "description": "IIIF Presentation manifest URL for a digitized edition of the source of this data (if multiple, separated by semicolons)" + }, + { + "name": "source_image", + "type": "string", + "format": "default", + "title": "Source image", + "description": "IIIF Image URL for the digitized image in the IIIF manifest documenting this event, if known (if multiple, separated by semicolons)" + } + ] + } + }, + { + "name": "member_addresses", + "path": "SCoData_member_addresses_v2.0_2024-10.csv", + "profile": "tabular-data-resource", + "schema": { + "fields": [ + { + "name": "member_ids", + "type": "string", + "format": "default", + "title": "Member identifier", + "description": "unique identifier for member; member detail page on https://shakespeareandco.princeton.edu" + }, + { + "name": "member_names", + "type": "string", + "format": "default", + "title": "Member name ", + "description": "full name of member; may include variant names; for more, see https://shakespeareandco.princeton.edu/about/faq/#names" + }, + { + "name": "member_sort_names", + "type": "string", + "format": "default", + "title": "Sort name", + "description": "authorized name" + }, + { + "name": "member_uris", + "type": "string", + "format": "uri", + "title": "Member URI", + "description": "full URI for member; member detail page on https://shakespeareandco.princeton.edu" + }, + { + "name": "care_of_person_id", + "type": "string", + "format": "default", + "title": "Care of person identifier", + "description": "identifier of person who relays correspondance to intended recipient " + }, + { + "name": "care_of_person_name", + "type": "string", + "format": "default", + "title": "Care of person name", + "description": "name of person who relays correspondance to intended recipient " + }, + { + "name": "location_name", + "type": "string", + "format": "default", + "description": "location name ", + "title": "Location name" + }, + { + "name": "street_address", + "type": "string", + "format": "default", + "title": "Street address", + "description": "address (house number and street name) " + }, + { + "name": "postal_code", + "type": "string", + "format": "default", + "title": "Postal code", + "description": "postal code associated with address" + }, + { + "name": "city", + "type": "string", + "format": "default", + "title": "City", + "description": "city associated with address" + }, + { + "name": "arrondissement", + "type": "integer", + "format": "default", + "title": "Arrondissement", + "description": "Paris arrondissements for addresses in Paris (integer)" + }, + { + "name": "country", + "type": "string", + "format": "default", + "title": "Country", + "description": "country associated with address" + }, + { + "name": "longitude", + "type": "number", + "format": "default", + "title": "Longitude ", + "description": "longitudinal coordinate of address (numeric) " + }, + { + "name": "latitude", + "type": "number", + "format": "default", + "title": "Latitude", + "description": "latitudinal coordinate of address (numeric) " + } + ] + } + }, + { + "name": "members", + "path": "SCoData_members_v2.0_2024-10.csv", + "profile": "tabular-data-resource", + "schema": { + "fields": [ + { + "name": "id", + "type": "string", + "format": "default", + "title": "Member identifier", + "description": "unique identifier for member" + }, + { + "name": "uri", + "type": "string", + "format": "uri", + "title": "Member URI", + "description": "full URI for member; member detail page on https://shakespeareandco.princeton.edu" + }, + { + "name": "name", + "type": "string", + "format": "default", + "title": "Name ", + "description": "full name; may include variant names, name as written on lending library card; for more, see https://shakespeareandco.princeton.edu/about/faq/#names" + }, + { + "name": "sort_name", + "type": "string", + "format": "default", + "title": "Sort name ", + "description": "authorized name" + }, + { + "name": "title", + "type": "any", + "format": "default", + "title": "Title ", + "description": "honorific address if known, e.g. Mr., Mrs. etc." + }, + { + "name": "gender", + "type": "string", + "format": "default", + "title": "Gender ", + "description": "male, female, nonbinary, unknown; for more, see https://shakespeareandco.princeton.edu/about/faq/#gender" + }, + { + "name": "is_organization", + "type": "boolean", + "format": "default", + "title": "Is an organization?", + "description": "member is an organization instead of a person (boolean)" + }, + { + "name": "has_card", + "type": "boolean", + "format": "default", + "title": "Has a member card? ", + "description": "member has an extant lending library card (boolean)" + }, + { + "name": "birth_year", + "type": "date", + "format": "%Y", + "title": "Birth year ", + "description": "birth year, if known" + }, + { + "name": "death_year", + "type": "date", + "format": "%Y", + "title": "Death year", + "description": "death year, if known" + }, + { + "name": "membership_years", + "type": "string", + "format": "default", + "title": "Years of membership", + "description": "list of known active membership years (multiple, separated by semicolons)" + }, + { + "name": "viaf_url", + "type": "string", + "format": "default", + "title": "VIAF URL", + "description": "URL for Virtual Internet Authority File (VIAF, https://viaf.org/) identifier, if available" + }, + { + "name": "wikipedia_url", + "type": "string", + "format": "uri", + "title": "Wikipedia URL", + "description": "URL for Wikipedia page, if available" + }, + { + "name": "nationalities", + "type": "string", + "format": "default", + "title": "Nationalities", + "description": "countries for known nationality (if multiple, separated by semicolons)" + }, + { + "name": "addresses", + "type": "string", + "format": "default", + "title": "Address(es)", + "description": "list of known addresses (if multiple, separated by semicolons)" + }, + { + "name": "postal_codes", + "type": "string", + "format": "default", + "title": "Postal code(s)", + "description": "list of postal addresses from addresses (if multiple, separated by semicolons; order matches addresses)" + }, + { + "name": "arrondissements", + "type": "string", + "format": "default", + "title": "Arrondissement(s)", + "description": "list of Paris arrondissements (integer; if multiple, separated by semicolons; order matches addresses)" + }, + { + "name": "coordinates", + "type": "string", + "format": "default", + "title": "Coordinates", + "description": "list of geographical coordinates for known addresses (pairs of latitude, longitude; if multiple, separated by semicolons; order matches addresses)" + }, + { + "name": "notes", + "type": "string", + "format": "default", + "title": "Notes", + "description": "more information (text with markdown formatting)" + }, + { + "name": "updated", + "type": "datetime", + "format": "default", + "title": "Date updated", + "description": "timestamp record was last modified in the Shakespeare and Company Project database before export" + } + ] + } + } + ] +} \ No newline at end of file From c3332c6aa6ea92350be2f7f08612087c8a2fb15d Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Nov 2024 16:55:50 -0500 Subject: [PATCH 05/11] Make types more specific when possible --- .../v2.0/SCoData_v2.0_2024_datapackage.json | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json b/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json index 62581637..b514b809 100644 --- a/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json +++ b/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json @@ -1,5 +1,9 @@ { "profile": "tabular-data-package", + "title": "Shakespeare and Company Project Dataset: Lending Library Members, Events, and Books", + "homepage": "https://shakespeareandco.princeton.edu/about/data/", + "version": "2.0", + "image": "https://shakespeareandco.princeton.edu/static/img/social.png", "resources": [ { "name": "book_creators", @@ -57,7 +61,7 @@ { "name": "viaf_url", "type": "string", - "format": "default", + "format": "uri", "title": "VIAF url ", "description": "URL for Virtual Internet Authority File (VIAF, https://viaf.org/) identifier, if available" }, @@ -135,35 +139,35 @@ }, { "name": "editor", - "type": "any", + "type": "string", "format": "default", "title": "Editor(s)", "description": "editor(s) of work" }, { "name": "translator", - "type": "any", + "type": "string", "format": "default", "title": "Translator(s)", "description": "translator(s) of work" }, { "name": "introduction", - "type": "any", + "type": "string", "format": "default", "title": "Author of introduction", "description": "author of an introduction to work" }, { "name": "illustrator", - "type": "any", + "type": "string", "format": "default", "description": "illustrator(s) of work", "title": "Illustrator(s)" }, { "name": "photographer", - "type": "any", + "type": "string", "format": "default", "title": "Photographer(s)", "description": "photographer(s) featured in work" @@ -667,7 +671,7 @@ { "name": "viaf_url", "type": "string", - "format": "default", + "format": "uri", "title": "VIAF URL", "description": "URL for Virtual Internet Authority File (VIAF, https://viaf.org/) identifier, if available" }, From 45f4ad0a334a79f86254c734dcb5a7499ff04a0b Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Nov 2024 16:58:21 -0500 Subject: [PATCH 06/11] Re-order resources into logical order; add title and version --- .../v2.0/SCoData_v2.0_2024_datapackage.json | 476 +++++++++--------- 1 file changed, 238 insertions(+), 238 deletions(-) diff --git a/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json b/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json index b514b809..40420d2a 100644 --- a/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json +++ b/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json @@ -5,9 +5,9 @@ "version": "2.0", "image": "https://shakespeareandco.princeton.edu/static/img/social.png", "resources": [ - { - "name": "book_creators", - "path": "SCoData_book_creators_v2.0_2024-10.csv", + { + "name": "members", + "path": "SCoData_members_v2.0_2024-10.csv", "profile": "tabular-data-resource", "schema": { "fields": [ @@ -15,15 +15,22 @@ "name": "id", "type": "string", "format": "default", - "title": "Creator identifier", - "description": "unique identifier for creator" + "title": "Member identifier", + "description": "unique identifier for member" + }, + { + "name": "uri", + "type": "string", + "format": "uri", + "title": "Member URI", + "description": "full URI for member; member detail page on https://shakespeareandco.princeton.edu" }, { "name": "name", "type": "string", "format": "default", "title": "Name ", - "description": "full name; may include variant names; for more, see https://shakespeareandco.princeton.edu/about/faq/#names" + "description": "full name; may include variant names, name as written on lending library card; for more, see https://shakespeareandco.princeton.edu/about/faq/#names" }, { "name": "sort_name", @@ -32,44 +39,67 @@ "title": "Sort name ", "description": "authorized name" }, + { + "name": "title", + "type": "any", + "format": "default", + "title": "Title ", + "description": "honorific address if known, e.g. Mr., Mrs. etc." + }, { "name": "gender", "type": "string", "format": "default", - "title": "Gender", - "description": "male, female, nonbinary, unknown; for more, see https://shakespeareandco.princeton.edu/about/faq/#gender\n" + "title": "Gender ", + "description": "male, female, nonbinary, unknown; for more, see https://shakespeareandco.princeton.edu/about/faq/#gender" }, { "name": "is_organization", "type": "boolean", "format": "default", - "title": "Is an organization? ", - "description": "creator is an organization instead of a person (boolean)" + "title": "Is an organization?", + "description": "member is an organization instead of a person (boolean)" + }, + { + "name": "has_card", + "type": "boolean", + "format": "default", + "title": "Has a member card? ", + "description": "member has an extant lending library card (boolean)" }, { "name": "birth_year", - "type": "integer", - "title": "Birth year", + "type": "date", + "format": "%Y", + "title": "Birth year ", "description": "birth year, if known" }, { "name": "death_year", - "type": "integer", + "type": "date", + "format": "%Y", "title": "Death year", "description": "death year, if known" }, + { + "name": "membership_years", + "type": "string", + "format": "default", + "title": "Years of membership", + "description": "list of known active membership years (multiple, separated by semicolons)" + }, { "name": "viaf_url", "type": "string", "format": "uri", - "title": "VIAF url ", + "title": "VIAF URL", "description": "URL for Virtual Internet Authority File (VIAF, https://viaf.org/) identifier, if available" }, { "name": "wikipedia_url", "type": "string", "format": "uri", - "title": "Wikipedia url", + "title": "Wikipedia URL", "description": "URL for Wikipedia page, if available" }, { @@ -80,178 +110,39 @@ "description": "countries for known nationality (if multiple, separated by semicolons)" }, { - "name": "member_uri", - "type": "string", - "format": "uri", - "title": "Member URI", - "description": "identifier; member detail page on https://shakespeareandco.princeton.edu" - }, - { - "name": "notes", - "type": "string", - "format": "default", - "title": "Notes", - "description": "more information (text with markdown formatting)" - }, - { - "name": "updated", - "type": "datetime", - "format": "default", - "title": "Date updated", - "description": "timestamp record was last modified in the Shakespeare and Company Project database before export" - } - ] - } - }, - { - "name": "books", - "path": "SCoData_books_v2.0_2024-10.csv", - "profile": "tabular-data-resource", - "schema": { - "fields": [ - { - "name": "id", - "type": "string", - "format": "default", - "title": "Book identifier", - "description": "unique identifier for book" - }, - { - "name": "uri", - "type": "string", - "format": "uri", - "title": "Book URI", - "description": "full URI for book; book detail page on https://shakespeareandco.princeton.edu" - }, - { - "name": "title", - "type": "string", - "format": "default", - "title": "Title ", - "description": "title of the book or other item" - }, - { - "name": "author", - "type": "string", - "format": "default", - "title": "Author", - "description": "author or authors, last name first (multiple, separated by semicolon)" - }, - { - "name": "editor", - "type": "string", - "format": "default", - "title": "Editor(s)", - "description": "editor(s) of work" - }, - { - "name": "translator", - "type": "string", - "format": "default", - "title": "Translator(s)", - "description": "translator(s) of work" - }, - { - "name": "introduction", - "type": "string", - "format": "default", - "title": "Author of introduction", - "description": "author of an introduction to work" - }, - { - "name": "illustrator", - "type": "string", - "format": "default", - "description": "illustrator(s) of work", - "title": "Illustrator(s)" - }, - { - "name": "photographer", + "name": "addresses", "type": "string", "format": "default", - "title": "Photographer(s)", - "description": "photographer(s) featured in work" - }, - { - "name": "year", - "type": "year", - "format": "default", - "description": "year published", - "title": "Publication year" + "title": "Address(es)", + "description": "list of known addresses (if multiple, separated by semicolons)" }, { - "name": "format", + "name": "postal_codes", "type": "string", "format": "default", - "title": "Format", - "description": "type of item (article, book, periodical, phonograph record, photograph, print)" + "title": "Postal code(s)", + "description": "list of postal addresses from addresses (if multiple, separated by semicolons; order matches addresses)" }, { - "name": "genre_category", + "name": "arrondissements", "type": "string", "format": "default", - "title": "Genre", - "description": "genre of work (drama, fiction, nonfiction, periodical, poetry; if multiple, separated by semicolons)" - }, - { - "name": "uncertain", - "type": "boolean", - "format": "default", - "title": "Item is uncertain?", - "description": "boolean indicating if item is ambiguous or unidentifiable" + "title": "Arrondissement(s)", + "description": "list of Paris arrondissements (integer; if multiple, separated by semicolons; order matches addresses)" }, { - "name": "ebook_url", + "name": "coordinates", "type": "string", - "format": "uri", - "title": "Ebook URL", - "description": "link to a digital edition of this work" - }, - { - "name": "volumes_issues", - "type": "any", "format": "default", - "title": "Volume/Issue numbers", - "description": "list of multivolume volumes or periodical issues known to have circulated (separated by semicolon)" + "title": "Coordinates", + "description": "list of geographical coordinates for known addresses (pairs of latitude, longitude; if multiple, separated by semicolons; order matches addresses)" }, { "name": "notes", - "type": "any", - "format": "default", - "title": "Notes", - "description": "more information, e.g. about uncertain titles (text with markdown formatting)" - }, - { - "name": "event_count", - "type": "integer", - "format": "default", - "title": "Associated events count", - "description": "total number of events associated with this title (integer)" - }, - { - "name": "borrow_count", - "type": "integer", - "format": "default", - "title": "Borrow count", - "description": "total number of borrowing events associated with this title (integer)" - }, - { - "name": "purchase_count", - "type": "integer", - "format": "default", - "title": "Purchase count", - "description": "total number of purchase events associated with this title (integer)" - }, - { - "name": "circulation_years", "type": "string", - "rdfType": "https://schema.org/ItemList", "format": "default", - "constraints": { - "pattern": "(\\d{4})?(;\\d{4})*" - }, - "title": "Circulation years", - "description": "list of years of known activity for this title (if multiple, separated by semicolon)" + "title": "Notes", + "description": "more information (text with markdown formatting)" }, { "name": "updated", @@ -263,7 +154,7 @@ ] } }, - { + { "name": "events", "path": "SCoData_events_v2.0_2024-10.csv", "profile": "tabular-data-resource", @@ -477,6 +368,166 @@ } ] } + }, + { + "name": "books", + "path": "SCoData_books_v2.0_2024-10.csv", + "profile": "tabular-data-resource", + "schema": { + "fields": [ + { + "name": "id", + "type": "string", + "format": "default", + "title": "Book identifier", + "description": "unique identifier for book" + }, + { + "name": "uri", + "type": "string", + "format": "uri", + "title": "Book URI", + "description": "full URI for book; book detail page on https://shakespeareandco.princeton.edu" + }, + { + "name": "title", + "type": "string", + "format": "default", + "title": "Title ", + "description": "title of the book or other item" + }, + { + "name": "author", + "type": "string", + "format": "default", + "title": "Author", + "description": "author or authors, last name first (multiple, separated by semicolon)" + }, + { + "name": "editor", + "type": "string", + "format": "default", + "title": "Editor(s)", + "description": "editor(s) of work" + }, + { + "name": "translator", + "type": "string", + "format": "default", + "title": "Translator(s)", + "description": "translator(s) of work" + }, + { + "name": "introduction", + "type": "string", + "format": "default", + "title": "Author of introduction", + "description": "author of an introduction to work" + }, + { + "name": "illustrator", + "type": "string", + "format": "default", + "description": "illustrator(s) of work", + "title": "Illustrator(s)" + }, + { + "name": "photographer", + "type": "string", + "format": "default", + "title": "Photographer(s)", + "description": "photographer(s) featured in work" + }, + { + "name": "year", + "type": "year", + "format": "default", + "description": "year published", + "title": "Publication year" + }, + { + "name": "format", + "type": "string", + "format": "default", + "title": "Format", + "description": "type of item (article, book, periodical, phonograph record, photograph, print)" + }, + { + "name": "genre_category", + "type": "string", + "format": "default", + "title": "Genre", + "description": "genre of work (drama, fiction, nonfiction, periodical, poetry; if multiple, separated by semicolons)" + }, + { + "name": "uncertain", + "type": "boolean", + "format": "default", + "title": "Item is uncertain?", + "description": "boolean indicating if item is ambiguous or unidentifiable" + }, + { + "name": "ebook_url", + "type": "string", + "format": "uri", + "title": "Ebook URL", + "description": "link to a digital edition of this work" + }, + { + "name": "volumes_issues", + "type": "any", + "format": "default", + "title": "Volume/Issue numbers", + "description": "list of multivolume volumes or periodical issues known to have circulated (separated by semicolon)" + }, + { + "name": "notes", + "type": "any", + "format": "default", + "title": "Notes", + "description": "more information, e.g. about uncertain titles (text with markdown formatting)" + }, + { + "name": "event_count", + "type": "integer", + "format": "default", + "title": "Associated events count", + "description": "total number of events associated with this title (integer)" + }, + { + "name": "borrow_count", + "type": "integer", + "format": "default", + "title": "Borrow count", + "description": "total number of borrowing events associated with this title (integer)" + }, + { + "name": "purchase_count", + "type": "integer", + "format": "default", + "title": "Purchase count", + "description": "total number of purchase events associated with this title (integer)" + }, + { + "name": "circulation_years", + "type": "string", + "rdfType": "https://schema.org/ItemList", + "format": "default", + "constraints": { + "pattern": "(\\d{4})?(;\\d{4})*" + }, + "title": "Circulation years", + "description": "list of years of known activity for this title (if multiple, separated by semicolon)" + }, + { + "name": "updated", + "type": "datetime", + "format": "default", + "title": "Date updated", + "description": "timestamp record was last modified in the Shakespeare and Company Project database before export" + } + ] + } }, { "name": "member_addresses", @@ -586,8 +637,8 @@ } }, { - "name": "members", - "path": "SCoData_members_v2.0_2024-10.csv", + "name": "book_creators", + "path": "SCoData_book_creators_v2.0_2024-10.csv", "profile": "tabular-data-resource", "schema": { "fields": [ @@ -595,22 +646,15 @@ "name": "id", "type": "string", "format": "default", - "title": "Member identifier", - "description": "unique identifier for member" - }, - { - "name": "uri", - "type": "string", - "format": "uri", - "title": "Member URI", - "description": "full URI for member; member detail page on https://shakespeareandco.princeton.edu" + "title": "Creator identifier", + "description": "unique identifier for creator" }, { "name": "name", "type": "string", "format": "default", "title": "Name ", - "description": "full name; may include variant names, name as written on lending library card; for more, see https://shakespeareandco.princeton.edu/about/faq/#names" + "description": "full name; may include variant names; for more, see https://shakespeareandco.princeton.edu/about/faq/#names" }, { "name": "sort_name", @@ -619,67 +663,44 @@ "title": "Sort name ", "description": "authorized name" }, - { - "name": "title", - "type": "any", - "format": "default", - "title": "Title ", - "description": "honorific address if known, e.g. Mr., Mrs. etc." - }, { "name": "gender", "type": "string", "format": "default", - "title": "Gender ", - "description": "male, female, nonbinary, unknown; for more, see https://shakespeareandco.princeton.edu/about/faq/#gender" + "title": "Gender", + "description": "male, female, nonbinary, unknown; for more, see https://shakespeareandco.princeton.edu/about/faq/#gender\n" }, { "name": "is_organization", "type": "boolean", "format": "default", - "title": "Is an organization?", - "description": "member is an organization instead of a person (boolean)" - }, - { - "name": "has_card", - "type": "boolean", - "format": "default", - "title": "Has a member card? ", - "description": "member has an extant lending library card (boolean)" + "title": "Is an organization? ", + "description": "creator is an organization instead of a person (boolean)" }, { "name": "birth_year", - "type": "date", - "format": "%Y", - "title": "Birth year ", + "type": "integer", + "title": "Birth year", "description": "birth year, if known" }, { "name": "death_year", - "type": "date", - "format": "%Y", + "type": "integer", "title": "Death year", "description": "death year, if known" }, - { - "name": "membership_years", - "type": "string", - "format": "default", - "title": "Years of membership", - "description": "list of known active membership years (multiple, separated by semicolons)" - }, { "name": "viaf_url", "type": "string", "format": "uri", - "title": "VIAF URL", + "title": "VIAF url ", "description": "URL for Virtual Internet Authority File (VIAF, https://viaf.org/) identifier, if available" }, { "name": "wikipedia_url", "type": "string", "format": "uri", - "title": "Wikipedia URL", + "title": "Wikipedia url", "description": "URL for Wikipedia page, if available" }, { @@ -690,32 +711,11 @@ "description": "countries for known nationality (if multiple, separated by semicolons)" }, { - "name": "addresses", - "type": "string", - "format": "default", - "title": "Address(es)", - "description": "list of known addresses (if multiple, separated by semicolons)" - }, - { - "name": "postal_codes", - "type": "string", - "format": "default", - "title": "Postal code(s)", - "description": "list of postal addresses from addresses (if multiple, separated by semicolons; order matches addresses)" - }, - { - "name": "arrondissements", - "type": "string", - "format": "default", - "title": "Arrondissement(s)", - "description": "list of Paris arrondissements (integer; if multiple, separated by semicolons; order matches addresses)" - }, - { - "name": "coordinates", + "name": "member_uri", "type": "string", - "format": "default", - "title": "Coordinates", - "description": "list of geographical coordinates for known addresses (pairs of latitude, longitude; if multiple, separated by semicolons; order matches addresses)" + "format": "uri", + "title": "Member URI", + "description": "identifier; member detail page on https://shakespeareandco.princeton.edu" }, { "name": "notes", From 347b44d18c6e4a9c7bc1741509a933ebb8ed5306 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Nov 2024 16:58:45 -0500 Subject: [PATCH 07/11] Add logic to generate CSV data dictionary from datapackage file --- data-export-meta/readme_info.py | 40 +++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/data-export-meta/readme_info.py b/data-export-meta/readme_info.py index c72dbde9..a1e2f80a 100755 --- a/data-export-meta/readme_info.py +++ b/data-export-meta/readme_info.py @@ -10,6 +10,7 @@ import sys import argparse import pathlib +import csv import pandas as pd @@ -40,8 +41,21 @@ def readme_info(df, dp_resource, field_list=True): action=argparse.BooleanOptionalAction, default=True, ) + parser.add_argument( + "-dd", + "--data-dictionary", + help="Create a data dictionary in the specified file", + type=pathlib.Path, + ) + args = parser.parse_args() + if args.data_dictionary: + if args.data_dictionary.exists(): + print( + f"Requested data dictionary file {args.data_dictionary} already exists" + ) + raise SystemExit(1) with args.datapackage.open() as packagejson: datapackage = json.load(packagejson) @@ -52,3 +66,29 @@ def readme_info(df, dp_resource, field_list=True): with datafile.open() as csvfile: df = pd.read_csv(csvfile) readme_info(df, resource, field_list=args.field_list) + + if args.data_dictionary: + print(f"\n\nWriting data dictionary to {args.data_dictionary}") + with args.data_dictionary.open("w", encoding="utf-8") as csv_datadict: + fieldnames = [ + "Filename", + "Variable", + "Variable name", + "Description", + "Type", + "Format", + ] + csvwriter = csv.DictWriter(csv_datadict, fieldnames=fieldnames) + csvwriter.writeheader() + for resource in datapackage["resources"]: + for field in resource["schema"]["fields"]: + csvwriter.writerow( + { + "Filename": resource["path"], + "Variable": field["title"], + "Variable name": field["name"], + "Description": field["description"], + "Type": field["type"], + "Format": field.get("format"), + } + ) From 3aa767fc225731349d222613b2266bdc47496d5e Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Nov 2024 17:15:25 -0500 Subject: [PATCH 08/11] Add missing values and foreign keys where they don't break validation --- .../v2.0/SCoData_v2.0_2024_datapackage.json | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json b/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json index 40420d2a..13d822a8 100644 --- a/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json +++ b/data-export-meta/v2.0/SCoData_v2.0_2024_datapackage.json @@ -366,6 +366,23 @@ "title": "Source image", "description": "IIIF Image URL for the digitized image in the IIIF manifest documenting this event, if known (if multiple, separated by semicolons)" } + ], + "missingValues": [ + "" + ], + "foreignKeys": [ + { + "fields": "member_uris", + "reference": { + "resource": "members", + "fields": "uri" + }, + "fields": "item_uri", + "reference": { + "resource": "books", + "fields": "uri" + } + } ] } }, @@ -633,8 +650,11 @@ "title": "Latitude", "description": "latitudinal coordinate of address (numeric) " } + ], + "missingValues": [ + "" ] - } + } }, { "name": "book_creators", From b8462f38025e9eb91850ab7b1106167af3135a58 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Nov 2024 17:15:44 -0500 Subject: [PATCH 09/11] Include constraints in data dictionary file --- data-export-meta/readme_info.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/data-export-meta/readme_info.py b/data-export-meta/readme_info.py index a1e2f80a..10631db3 100755 --- a/data-export-meta/readme_info.py +++ b/data-export-meta/readme_info.py @@ -77,6 +77,7 @@ def readme_info(df, dp_resource, field_list=True): "Description", "Type", "Format", + "Constraints", ] csvwriter = csv.DictWriter(csv_datadict, fieldnames=fieldnames) csvwriter.writeheader() @@ -90,5 +91,8 @@ def readme_info(df, dp_resource, field_list=True): "Description": field["description"], "Type": field["type"], "Format": field.get("format"), + "Constraints": field["constraints"]["pattern"] + if "constraints" in field + else "", } ) From ee6ba27db51078fac1ab6c604d89906968c3680d Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Nov 2024 17:16:09 -0500 Subject: [PATCH 10/11] Add more documentation about what is in this folder and how to use it --- data-export-meta/README.rst | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/data-export-meta/README.rst b/data-export-meta/README.rst index c50b54b0..7ef82681 100644 --- a/data-export-meta/README.rst +++ b/data-export-meta/README.rst @@ -2,7 +2,27 @@ Data exports ============ This directory contains [Frictionless data](https://frictionlessdata.io/) [data package](https://specs.frictionlessdata.io/data-package/) -files to describe and validate Project data exports. +files to describe and validate Project data exports, along with utility scripts for auto-generating portions of dataset readmes, data dictionaries, and list of members and books changes from previous published versions of the datasets. -They are currently generated and maintained manually; they should be updated -for deposit with revised data exports as needed. \ No newline at end of file +Datapackage files are currently generated and maintained manually; they should be updated +for deposit with revised data exports as needed. + +Validation +^^^^^^^^^^ + +To validate datapackage files and associated data files, use frictionless: + +1. `pip install frictionless` +2. `frictionless validate vX.X/datapakage.json` + +This will report any errors in the datapackage file as well as any validation errors where the types or pattern constraints specified in the data package file do not match the data in the associated CSV files. + + +Scripts +^^^^^^^ + +All scripts require pandas (`pip install pandas`). + +- `readme_info.py` - use to generate dataset summary information for inclusion in plain-text readme (number of fields, number of rows, optional list of fields with descriptions); can also be used to generate a CSV data dictionary. Takes a path to the datapackage file; resource paths referenced in the datapackage must resolve. +- `member_changes.py` - for members in an old version not in the new version, creates a csv of changes with new ids for member ids that changed; requires pandas. Must be updated for new versions and should be added to changes from previous versions. +- `book_changes.py` - same as above, but for book ids \ No newline at end of file From a674c975c8e466789021f84f0b307024ae5b4e96 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 09:30:39 -0500 Subject: [PATCH 11/11] Drop constraints from data dictionary --- data-export-meta/readme_info.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/data-export-meta/readme_info.py b/data-export-meta/readme_info.py index 10631db3..a1e2f80a 100755 --- a/data-export-meta/readme_info.py +++ b/data-export-meta/readme_info.py @@ -77,7 +77,6 @@ def readme_info(df, dp_resource, field_list=True): "Description", "Type", "Format", - "Constraints", ] csvwriter = csv.DictWriter(csv_datadict, fieldnames=fieldnames) csvwriter.writeheader() @@ -91,8 +90,5 @@ def readme_info(df, dp_resource, field_list=True): "Description": field["description"], "Type": field["type"], "Format": field.get("format"), - "Constraints": field["constraints"]["pattern"] - if "constraints" in field - else "", } )