forked from ipeirotis/scholar_update
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
84 lines (70 loc) · 2.95 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import functions_framework
import json
import logging
from datetime import datetime
from google.cloud import storage
from scholarly import scholarly
@functions_framework.http
def update_scholar_profile(request):
"""HTTP Cloud Function.
Args:
request (flask.Request): The request object.
Returns:
The response text, or any set of values that can be turned into a
Response object using `make_response`.
"""
request_json = request.get_json(silent=True)
request_args = request.args
author_name = request_json.get("author_name", request_args.get("author_name"))
filename = request_json.get("filename", request_args.get("filename"))
if not author_name or not filename:
return "Missing author name or filename", 400
author, publications = get_scholar_data(author_name)
if author is None or publications is None:
return "Error getting data from Google Scholar", 500
result = store_data_on_bucket(filename, author, publications)
if result is None:
return "Error storing data on Google Bucket", 500
return f"Updated entry for author {author_name} with filename {filename}", 200
def get_scholar_data(author_name):
try:
# Query for author and fill in the details
search_query = scholarly.search_author(author_name)
author = scholarly.fill(next(search_query))
except Exception:
logging.exception("Error getting data from Google Scholar")
return None, None
# We want to keep track of the last time we updated the file
now = datetime.now()
timestamp = int(datetime.timestamp(now))
date_str = now.strftime("%Y-%m-%d %H:%M:%S")
# Bookkeeping with publications
publications = []
for pub in author["publications"]:
pub["citedby"] = pub.pop("num_citations")
pub["last_updated_ts"] = timestamp
pub["last_updated"] = date_str
publications.append(pub)
# Add last-updated information in the dictionary
author["last_updated_ts"] = timestamp
author["last_updated"] = date_str
# Remove the publications entries, which are not needed in the JSON
del author["publications"]
return author, publications
def store_data_on_bucket(filename, author, publications):
try:
client = storage.Client()
bucket_name = "publications_scholar"
bucket = client.bucket(bucket_name)
# Save the author profile in a JSON file
author_filename = f"{filename}.json"
blob = bucket.blob(str(author_filename))
blob.upload_from_string(json.dumps(author), content_type="application/json")
# Save the publications in a JSON file
publications_filename = f"{filename}_pubs.json"
blob = bucket.blob(str(publications_filename))
blob.upload_from_string(json.dumps(publications), content_type="application/json")
except Exception:
logging.exception("Error storing data on Google Bucket")
return None
return True