Skip to content

Commit

Permalink
use the words before colon only if applicable and use 2 full words an…
Browse files Browse the repository at this point in the history
…d another 2 initials, on: scripts/prepare_upload_bibtex.py
  • Loading branch information
manathan1984 committed Nov 27, 2024
1 parent 9210a5c commit 47cf10e
Showing 1 changed file with 17 additions and 9 deletions.
26 changes: 17 additions & 9 deletions scripts/prepare_upload_bibtex.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,17 @@ def upload_document_json(access_token, group_id, document_data):
duplicate_keys_map = {}

if (len(sys.argv) == 0):
print ("Do not run me as a standalone script! Use ./prepare_bibtex.sh")
print ("Do not run me as a standalone script! Use ./prepare_upload_new_bibtex.sh")
exit(-1)

if (len(sys.argv) == 3):
if (sys.argv[2]=="-f"):
print ("Running prepare_bibtex from ./prepare_bibtex.sh.\n")
print ("Running prepare_upload_new_bibtex from ./prepare_upload_new_bibtex.sh.\n")
else:
print ("Do not run me as a standalone script! Use ./prepare_bibtex.sh")
print ("Do not run me as a standalone script! Use ./prepare_upload_new_bibtex.sh")
exit(-1)
else:
print ("Do not run me as a standalone script! Use ./prepare_bibtex.sh")
print ("Do not run me as a standalone script! Use ./prepare_upload_new_bibtex.sh")
exit(-1)

with open(recipes_file) as df:
Expand Down Expand Up @@ -123,8 +123,8 @@ def upload_document_json(access_token, group_id, document_data):
e['title']=e['title'].replace("{", "")
e['title']=e['title'].replace("}", "")
e['title']=e['title'].replace("\n", " ")
print(e['author'])
print(format_authors(format_authors(e['author'])))
# print(e['author'])
# print(format_authors(format_authors(e['author'])))
scores = list(map(lambda x: fuzz.ratio(x.get('title','') + x.get('author', ''), e['title'] + format_authors(e.get('author', ''))), existing_entries))
max_score = max(scores)
max_score_title = existing_entries[scores.index(max_score)]['title'] if max_score > 0 else "None"
Expand Down Expand Up @@ -191,11 +191,19 @@ def upload_document_json(access_token, group_id, document_data):
temparray=first_author.split(" ")
lastname=temparray[len(temparray)-1].strip()
common_words = {'a', 'an', 'the', 'and', 'or', 'but', 'if', 'then', 'else', 'when', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now', 'case'}
title_words = [word for word in e['title'].split() if word.lower() not in common_words][:3]
title = e['title']
if ':' in e['title']:
title_before_colon = e['title'].split(':')[0]
title = title_before_colon
title = title.replace("-", " ")
#capitalize only if all letters are small (e.g., keep all capitals if used by the title)
title_words = [word.capitalize() if word.islower() else word for word in title.split() if word.lower() not in common_words][:4]
lastname = ''.join(filter(str.isalpha, lastname))
title_words[0] = ''.join(filter(str.isalpha, title_words[0]))
candidate_key = lastname + e['year'] + title_words[0].capitalize()
for word in title_words[1:]:
print(title_words)
print(''.join(title_words[0:2]))
candidate_key = lastname + e['year'] + ''.join(title_words[0:2])
for word in title_words[2:]:
candidate_key += word[0].upper()
print (" ==> Proposed key for \""+ e['title'] +"\": \"" + candidate_key + "\"")
key_exists = any(ex_e['ID'] == candidate_key for ex_e in existing_entries)
Expand Down

0 comments on commit 47cf10e

Please sign in to comment.