-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
code to automatically create resource documentation on Hugging Face
- Loading branch information
1 parent
8bb43e0
commit 7773a75
Showing
4 changed files
with
174 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import requests | ||
|
||
def get_bibtex_from_doi(doi: str): | ||
try: | ||
doi_backend = "https://api.datacite.org/dois/application/x-bibtex" | ||
response = requests.get(f"{doi_backend}/{doi}") | ||
except requests.exceptions.HTTPError as err: | ||
raise SystemExit(err) | ||
return response.content | ||
|
||
|
||
def get_value(resource_info : dict, key : str): | ||
short_description = resource_info[key] | ||
print(short_description) | ||
if short_description.get('eng', None): | ||
return short_description['eng'] | ||
elif short_description.get('swe', None): | ||
return short_description['swe'] | ||
else: | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# {{title}} | ||
|
||
## Dataset Description | ||
|
||
- **Homepage:** [The official homepage of Språkbanken](https://spraakbanken.gu.se/resurser/) | ||
- **Repository:** {{url}} | ||
- **Point of Contact:**[[email protected]]([email protected]) | ||
|
||
### Dataset Summary | ||
|
||
{{description}} | ||
|
||
### Citation Information | ||
|
||
``` | ||
{{bibtex}} | ||
``` | ||
|
||
# Disclaimer | ||
|
||
This repository has been automatically created using the [sb2hf](https://github.com/felixhultin/sb2hf/tree/main) tool. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
""" | ||
A module to automatically generate a Hugging Face README for an SBX resource | ||
""" | ||
|
||
import codecs | ||
import logging | ||
import requests | ||
import yaml | ||
|
||
from jinja2 import Template | ||
|
||
from helpers import get_value | ||
from urlparser import URLReader | ||
|
||
|
||
def write_readme(url_reader: URLReader, metadata : dict, fp : str): | ||
TEMP_LINK = "https://ws.spraakbanken.gu.se/ws/metadata-dev/" # TODO: remove this when endpoint is in production | ||
bibtex_query = f"{TEMP_LINK}/bibtex?resource={url_reader.resource_name}&type={metadata['type']}" | ||
logging.info(f"Fetching bibtex from {bibtex_query}") | ||
bibtex = requests.get(bibtex_query).json()['bibtex'] | ||
template_variables = { | ||
'description': get_value(metadata, 'description'), | ||
'title' : get_value(metadata, 'name'), | ||
'bibtex': bibtex, | ||
'url': url_reader.url | ||
} | ||
with open('hf_gen/README.md', 'r') as file: | ||
template = Template(file.read(),trim_blocks=True) | ||
rendered_file = template.render(**template_variables) | ||
#output the file | ||
hf_metadata = create_hf_metadata_yaml(metadata) | ||
output_file = codecs.open(fp, "w", "utf-8") | ||
readme = f"{hf_metadata}\n{rendered_file}" | ||
output_file.write(readme) | ||
output_file.close() | ||
|
||
def create_hf_metadata_yaml(metadata: dict): | ||
yaml_content = yaml.dump({ | ||
'language': [l['code'] for l in metadata['languages']], | ||
'pretty_name': get_value(metadata, 'short_description') | ||
}, | ||
allow_unicode=True | ||
) | ||
return f"---\n{yaml_content}---" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters