-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfix_tech_report_eprints.py
72 lines (65 loc) · 2.48 KB
/
fix_tech_report_eprints.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import sys, os, csv, json
import requests
from irdm import eprint2rdm
from caltechdata_api import caltechdata_edit, get_metadata
token = os.environ["CTATOK"]
completed = []
infile = open("completed.csv", "r")
reader = csv.reader(infile)
for row in reader:
completed.append(row[0])
with open("tech_report.csv") as infile:
reader = csv.DictReader(infile)
to_update = []
for row in reader:
to_update.append(row["eprintid"])
eprint_ids = {}
with open("migrated_records.csv") as infile:
reader = csv.DictReader(infile)
for row in reader:
epid = row["eprintid"]
if row["record_status"] != "restricted-duplicate":
if epid in to_update:
if epid in eprint_ids:
eprint_ids[epid].append(row["rdmid"])
else:
eprint_ids[epid] = [row["rdmid"]]
# record ids in to_update that are not in eprint_ids are deleted records
for eprintid in eprint_ids.keys():
rdmid_list = eprint_ids[eprintid]
for rdmid in rdmid_list:
if rdmid not in completed:
record = get_metadata(rdmid, token=token, authors=True)
eprint_data = eprint2rdm(eprintid)[0]["metadata"]
pub_date = None
incorrect = True
for dates in eprint_data["dates"]:
if dates["type"]["id"] == "completed":
pub_date = dates["date"]
if dates["type"]["id"] == "submitted":
pub_date = dates["date"]
if dates["type"]["id"] == "published":
incorrect = False
if dates["type"]["id"] == "pub_date":
incorrect = False
if pub_date and incorrect:
record["metadata"]["publication_date"] = pub_date
elif not pub_date and incorrect:
print("No pub date for " + eprintid)
exit()
if eprint_data["resource_type"]["id"] == "publication-report":
record["metadata"]["resource_type"] = {
"id": "publication-technicalnote"
}
else:
record["metadata"]["resource_type"] = eprint_data["resource_type"]
caltechdata_edit(
rdmid,
metadata=record,
token=token,
production=True,
publish=True,
authors=True,
)
outfile = open("completed.csv", "a")
outfile.write(rdmid + "\n")