From f94b823db1139bcbb005b221364473d2d38a5216 Mon Sep 17 00:00:00 2001
From: ErnestaP <ernesta.petraityte@yahoo.com>
Date: Tue, 26 Sep 2023 14:13:41 +0200
Subject: [PATCH] hinawi final fields

---
 .../Hindawi/hindawi_fields_mapping.md         | 122 ++++++++++++++++++
 documentation/IOP/iop_fields_mapping.md       |   4 +-
 2 files changed, 124 insertions(+), 2 deletions(-)
diff --git a/documentation/Hindawi/hindawi_fields_mapping.md b/documentation/Hindawi/hindawi_fields_mapping.md
index aecfd240..d36a5ce7 100644
--- a/documentation/Hindawi/hindawi_fields_mapping.md
+++ b/documentation/Hindawi/hindawi_fields_mapping.md
@@ -1,3 +1,117 @@
+# [Final fields](#final_fields)
+
+| Field                | Processed                                                                                                                          | Subfield       | Subsubfield |
+| -------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -------------- | ----------- |
+| dois                 | <a href="#generic_parsing">generic_parsing</a> : <a href="#33">[33]</a>                                                            | value          |             |
+| arxiv_eprints        | <a href="#enricher">enricher</a> : <a href="#67">[67]</a>                                                                          | value          |             |
+|                      |                                                                                                                                    | categories     |             |
+| page_nr              | <a href="#parsing">parsing</a> : <a href="#6">[6]</a>                                                                             |                |             |
+| authors              | <a href="#parsing">parsing</a> : <a href="#6">[6]</a> <br/><a href="#generic_parsing">generic_parsing</a> : <a href="#22">[22]</a> | surname        |             |
+|                      |                                                                                                                                    | given_names    |             |
+|                      |                                                                                                                                    | full_name      |             |
+|                      |                                                                                                                                    | affiliations   | country     |
+|                      |                                                                                                                                    |                | institution |
+| collections       | <a href="#parsing">parsing</a> <a href="#12">[12]</a>                                                              |           |             |
+| license              | <a href="#parsing">parsing</a> <a href="#11">[11]</a>                                                                              | url            |             |
+|                      |                                                                                                                                    | license        |             |
+| publication_info     | <a href="#generic_parsing">generic_parsing</a> : <a href="#40">[40]]</a>                                                              | journal_title  |             |
+|                      |                                                                                                                                    | journal_volume |             |
+|                      |                                                                                                                                    | year           |             |
+| abstracts            | <a href="#enhancer">enhancer</a> : <a href="#46">[46]</a>                                                                          | value          |             |
+| acquisition_source   | <a href="#enhancer">enhancer</a> : <a href="#49">[49]</a>                                                                          | source         |             |
+|                      |                                                                                                                                    | method         |             |
+|                      |                                                                                                                                    | date           |             |
+| copyright            | <a href="#enhancer">enhancer</a> : <a href="#50">[50]</a>                                                                          | year           |             |
+|                      |                                                                                                                                    | statement      |             |
+| imprints             | <a href="#enhancer">enhancer</a> : <a href="#51">[51]</a>                                                                          | date           |             |
+|                      |                                                                                                                                    | publisher      |             |
+| record_creation_date | <a href="#enhancer">enhancer</a> : <a href="#50">[50]</a>                                                                          |                |             |
+| titles               | <a href="#enhancer">enhancer</a> : <a href="#51">[51]</a>                                                                          | title          |             |
+|                      |                                                                                                                                    | source         |             |
+| $schema              | <a href="#enricher">enricher</a> : <a href="#66">[66]</a>                                                                          |                |             |
+
+
+# [Enricher](#enricher)
+|                                |               |                                                       |
+| ------------------------------ | ------------- | ----------------------------------------------------- |
+| Reference                      | Field         | Enricher                                              |
+| <a id="66" href="#66">[66]</a> | schema        | <a href="#_get_schema">\_get_schema</a>               |
+| <a id="67" href="#67">[67]</a> | arxiv_eprints | <a href="#_get_arxiv_eprints">\_get_arxiv_eprints</a> |
+
+### [\_get_schema](#_get_schema)
+
+| Reference | Subfield | Value                                                                            | Default value |
+| --------- | -------- | -------------------------------------------------------------------------------- | ------------- |
+|           |          | <code>os.getenv("REPO_URL", "http://repo.qa.scoap3.org/schemas/hep.json")</code> |               |
+
+### [\_get_arxiv_eprints](#_get_arxiv_eprints)
+
+| Reference                      | Subfield   | Processing                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| ------------------------------ | ---------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| <a id="68" href="#68">[68]</a> | categories | 1. Need to take arxiv id value from arxiv_eprints.value <br/> 2. Make a request to arxiv API: <code>f'https://export.arxiv.org/api/query?search_query=id:{arxiv_eprints.value}'</code>if:{ arxiv_eprints.value}' <br/> 3. From XML response, take the categories by path: arxiv:primary_category and rest of the categories by path: /w3:category. <br/>xml_namespaces = { "arxiv": "http://arxiv.org/schemas/atom", "w3": "http://www.w3.org/2005/Atom", } |
+| <a id="69" href="#69">[69]</a> | value      | Cleans blank space                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+
+# [Enhancer](#enhancer)
+
+| Reference                      | Field                | Enhancer                                                                           |
+| ------------------------------ | -------------------- | ---------------------------------------------------------------------------------- |
+| <a id="46" href="#46">[46]</a> | abstracts            | <a href="#__construct_abstracts">\_\_construct_abstracts</a>                       |
+| <a id="47" href="#47">[47]</a> | acquisition_source   | <a href="#__construct_acquisition_source">\_\_construct_acquisition_source</a>     |
+| <a id="48" href="#48">[48]</a> | copyright            | <a href="#__construct_copyright">\_\_construct_copyright</a>                       |
+| <a id="49" href="#49">[49]</a> | imprints             | <a href="#__construct_imprints">\_\_construct_imprints</a>                         |
+| <a id="50" href="#50">[50]</a> | record_creation_date | <a href="#__construct_record_creation_date">\_\_construct_record_creation_date</a> |
+| <a id="51" href="#51">[51]</a> | titles               | <a href="#__construct_titles">\_\_construct_titles</a>                             |
+| <a id="52" href="#52">[52]</a> |                      | <a href="#__remove_country">\_\_remove_country</a>                                 |
+
+### [\_\_construct_abstracts](#__construct_abstracts)
+
+| Reference                      | Subfield | Value                                                                          |
+| ------------------------------ | -------- | ------------------------------------------------------------------------------ |
+| <a id="53" href="#53">[53]</a> | value    | Take value from <a href="#generic_parsing">generic parsing</a> abstract <a href="#23">[23]</a> |
+| <a id="54" href="#54">[54]</a> | source   | Constant: Hindawi                                                                  |
+
+### [\_\_construct_acquisition_source](#__construct_acquisition_source)
+
+| Reference                      | Subfield | Value                                            |
+| ------------------------------ | -------- | ------------------------------------------------ |
+| <a id="55" href="#55">[55]</a> | source   | Constant: Hindawi                               |
+| <a id="56" href="#56">[56]</a> | method   | Constant: Hindawi                               |
+| <a id="57" href="#57">[57]</a> | date     | <code>datetime.datetime.now().isoformat()</code> |
+
+### [\_\_construct_copyright](#__construct_copyright)
+
+| Reference                      | Subfield  | Value                                                                                     |
+| ------------------------------ | --------- | ----------------------------------------------------------------------------------------- |
+| <a id="58" href="#58">[58]</a> | year      | Take value from <a href="#parsing">parsing</a> copyright_year <a href="#10">[10]</a>        |
+| <a id="59" href="#59">[59]</a> | statement | Take value from <a href="#parsing">parsing</a> copyright_statement <a href="#9">[9]</a> |
+
+### [\_\_construct_imprints](#__construct_imprints)
+
+| Reference                      | Subfield  | Value                                                                                                |
+| ------------------------------ | --------- | ---------------------------------------------------------------------------------------------------- |
+| <a id="60" href="#60">[60]</a> | date      | Take value from <a href="#generic_parsing">generic_parsing</a> date_published <a href="#29">[29]</a> |
+| <a id="61" href="#61">[61]</a> | publisher | constant: IOP                                                                                        |
+
+### [\_\_construct_record_creation_date](#__construct_record_creation_date)
+
+| Reference                      | Subfield | Value                                            |
+| ------------------------------ | -------- | ------------------------------------------------ |
+| <a id="62" href="#62">[62]</a> |          | <code>datetime.datetime.now().isoformat()</code> |
+
+### [\_\_construct_titles](#__construct_titles)
+
+| Reference                      | Subfield | Value                                                                                                |
+| ------------------------------ | -------- | ---------------------------------------------------------------------------------------------------- |
+| <a id="63" href="#63">[63]</a> | title    | removed fn tags. `FN_REGEX = re.compile(r"")`<br/> `FN_REGEX.sub("", item.pop("title", "")).strip()` |
+| <a id="64" href="#64">[64]</a> | source   | constant: IOP                                                                                        |
+
+### [\_\_remove_country](#__remove_country)
+
+|                                |                                                                                          |       |                                              |
+| ------------------------------ | ---------------------------------------------------------------------------------------- | ----- | -------------------------------------------- |
+| Reference                      | Field                                                                                    | Value | Processing                                   |
+| <a id="65" href="#65">[65]</a> | from <a href="#parsing">parsed</a> affiliation <a href="#55">country [55]</a> |       | removes county if the value has:  |
+
 # [Generic parsing](#generic_parsing)
 
 | Reference | Field                  | Subfield             | Processing                                                                                                                           | Default value |
@@ -80,3 +194,11 @@
 |-----------|---------|---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | <a href="#20" id="20">[20]</a> | url     | License urls: "ns0:metadata/ns1:record/ns0:datafield/[@tag='540']/ns0:subfield/[@code='u']" |                                                                                                                                                                                                                            |
 | <a href="#21" id="21">[21]</a> | license | license text = ns0:metadata/ns1:record/ns0:datafield/[@tag='540']/ns0:subfield/[@code='a']  | <code>url_parts = license_url.text.split("/")<br/> clean_url_parts = list(filter(bool, url_parts))<br/> version = clean_url_parts.pop()<br/>license_type = clean_url_parts.pop()<br/>f"CC-{license_type}-{version}"</code> |
+
+### [_get_affiliations](#_get_affiliations)
+
+| Reference | Field        | Source                                                                                                                                                                     | Parsing                                                             |
+|-----------|--------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------|
+| <a href="#53" id="53">[53]</a> | value        | ns0:metadata/ns1:record/ns0:datafield/[@tag='100']/ns0:subfield[@code='u'] <br/> ns0:metadata/ns1:record/ns0:datafield/[@tag='700']/ns0:subfield[@code='u']                |                                                                     |
+| <a href="#54" id="54">[54]</a> | organization | same as value: ns0:metadata/ns1:record/ns0:datafield/[@tag='100']/ns0:subfield[@code='u'] <br/> ns0:metadata/ns1:record/ns0:datafield/[@tag='700']/ns0:subfield[@code='u'] | takes the string before the last comma                              |
+| <a href="#55" id="55">[55]</a> | country      | same as value: ns0:metadata/ns1:record/ns0:datafield/[@tag='100']/ns0:subfield[@code='u'] <br/> ns0:metadata/ns1:record/ns0:datafield/[@tag='700']/ns0:subfield[@code='u'] | takes the last string after comma, which starts with a capital letter |
diff --git a/documentation/IOP/iop_fields_mapping.md b/documentation/IOP/iop_fields_mapping.md
index 9b3a6476..f713d35b 100644
--- a/documentation/IOP/iop_fields_mapping.md
+++ b/documentation/IOP/iop_fields_mapping.md
@@ -80,8 +80,8 @@
 
 | Reference                      | Subfield | Value                                            |
 | ------------------------------ | -------- | ------------------------------------------------ |
-| <a id="57" href="#57">[57]</a> | source   | Constant: Springer                               |
-| <a id="58" href="#58">[58]</a> | method   | Constant: Springer                               |
+| <a id="57" href="#57">[57]</a> | source   | Constant: IOP                               |
+| <a id="58" href="#58">[58]</a> | method   | Constant: IOP                               |
 | <a id="59" href="#59">[59]</a> | date     | <code>datetime.datetime.now().isoformat()</code> |
 
 ### [\_\_construct_copyright](#__construct_copyright)