Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scripts used for cryptobib/db #1

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 8 additions & 11 deletions add.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
folders "lib" and "db"
"""

import sys
import argparse
import logging
import os
import shutil
import sys
import time

scriptdir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(scriptdir, "..", "lib"))
Expand All @@ -15,13 +19,8 @@
import mybibtex.generator
import confs_years

import logging
import shutil
import argparse
import time

import config
from config import *
from config import confs_missing_years

mybibtex.generator.config = config
logging.basicConfig(level=logging.DEBUG)
Expand All @@ -31,10 +30,8 @@ def add(filenames: list[str]):
parser = mybibtex.parser.Parser()
parser.parse_file("db/abbrev0.bib")
parser.parse_file("db/crypto_db.bib")
db = parser.parse_file("db/crypto_conf_list.bib")

for filename in filenames:
db = parser.parse_file(filename)
parser.parse_file("db/crypto_conf_list.bib")
db = parser.parse_files(filenames)

conf_years = confs_years.get_confs_years_inter(db, confs_missing_years)

Expand Down
17 changes: 8 additions & 9 deletions add_doi_crossref.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,15 @@
folders "lib" and "db"
"""

import sys
import argparse
import json
import logging
import os
import shutil
import sys
import time
import urllib.parse
import urllib.request

scriptdir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(scriptdir, "..", "lib"))
Expand All @@ -16,14 +23,6 @@
import mybibtex.generator
import confs_years

import logging
import shutil
import argparse
import time
import urllib.request, urllib.parse, urllib.error
import urllib.request, urllib.error, urllib.parse
import json

import config
from config import *

Expand Down
20 changes: 6 additions & 14 deletions check_many_authors_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,21 @@
folders "lib" and "db"
"""

import collections
import sys
import argparse
import logging
import os
import re
import sys

from pybtex.bibtex.utils import split_name_list

scriptdir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(scriptdir, "..", "lib"))
sys.path.append(os.path.join(scriptdir, "..", "db"))

import mybibtex.parser
import mybibtex.database
import mybibtex.generator
import confs_years

import argparse
import logging
import re
import shutil
import time

from pybtex.bibtex.utils import split_name_list

import config
from config import *

mybibtex.generator.config = config
logging.basicConfig(level=logging.DEBUG)
Expand Down
126 changes: 126 additions & 0 deletions fix_lineage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/usr/bin/env python3
"""
This script needs to be run in the root folder containing the
folders "lib" and "db"
"""

import argparse
import os
import re
import sys
# import shutil
# import time

scriptdir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(scriptdir, "..", "lib"))
sys.path.append(os.path.join(scriptdir, "..", "db"))

import mybibtex.parser
import mybibtex.database
import mybibtex.generator
from confs_years import get_confs_years_inter

import header
import config

mybibtex.generator.config = config


def read_database(on_db=False):
filename = "db/crypto_db.bib" if on_db else "db/test.bib"

# Read the database
parser = mybibtex.parser.Parser(encoding="utf8", person_fields=['author'])
parser.parse_files(["db/abbrev0.bib", "db/crypto_conf_list.bib"])

with open(filename, encoding="utf8") as f:
preamble = ""

cookie = f.tell()
line = f.readline()
while line == '\n' or line.startswith('%'):
preamble += line
cookie = f.tell()
line = f.readline()
f.seek(cookie)
return preamble, parser.parse_file(f)


def write_database(preamble, db, confs_years, on_db=False):
filename = ("db/crypto_db.bib" if on_db else "db/test.bib") + ".out"
with open(filename, "w") as out:
out.write(preamble)
mybibtex.generator.bibtex_gen(out, db, expand_crossrefs=False,
include_crossrefs=False,
remove_empty_fields=True)


################################################################################


def namestrip(s):
s = s.replace('{', '')
s = s.replace('}', '')
return re.sub('\\\\.', '', s).strip()


def fix_lineage(person):
lastname = person.get_part_as_text('last')
lineage = person.get_part_as_text('lineage')
orig_name = lastname + ' ' + lineage if lineage else lastname

moves = 0
while re.search(' (Jr\\.|Sr\\.|II|III|IV)}?$', lastname):
lastname = person._last[-1]
moves += 1
idx = lastname.rfind(' ')
assert idx >= 0 and lastname[-1] == '}'

person._lineage.append(lastname[idx + 1:-1])
person._last[-1] = lastname[:idx] + '}'
lastname = person.get_part_as_text('last')

if re.match('^(II|III|IV)$', lastname):
print("WARNING: Lineage is seen as lastname for ", person)

if moves > 0:
# Check if we can remove the {lastname}.
if re.match('{[A-Za-z]*}', person.get_part_as_text('last')):
assert len(person._last) == 1
person._last[0] = person._last[0][1:-1]

lastname = person.get_part_as_text('last')
lineage = person.get_part_as_text('lineage')
print("Changed \"", orig_name, "\" to \"", lastname + ', ' + lineage, "\"", sep="")


def run(db):
for entrykey, entry in db.entries.items():
if 'author' in entry.persons:
for author in entry.persons['author']:
fix_lineage(author)


def main():
parser = argparse.ArgumentParser("Fix von prefices")
parser.add_argument("--db", action="store_true",
help="Run on actual crypto_db.bib")
args = parser.parse_args()

# Make a backup
# shutil.copy("db/crypto_db.bib",
# f"db/crypto_db.bib.{int(time.time()):0>12d}")
# Run the command

print("Reading bibtex source file...", file=sys.stderr, flush=True)
preamble, db = read_database(args.db)
print("Bibtex source file is read!", file=sys.stderr, flush=True)

run(db)

confs_years = get_confs_years_inter(db, config.confs_missing_years)
write_database(preamble, db, confs_years, args.db)


if __name__ == "__main__":
main()
134 changes: 134 additions & 0 deletions fix_von_prefix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/env python3
"""
This script needs to be run in the root folder containing the
folders "lib" and "db"
"""

import argparse
import os
import re
import sys
# import shutil
# import time

scriptdir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(scriptdir, "..", "lib"))
sys.path.append(os.path.join(scriptdir, "..", "db"))

import mybibtex.parser
import mybibtex.database
import mybibtex.generator
from confs_years import get_confs_years_inter

import header
import config
from config import confs_missing_years

mybibtex.generator.config = config


def read_database(on_db=False):
filename = "db/crypto_db.bib" if on_db else "db/test.bib"

# Read the database
parser = mybibtex.parser.Parser(encoding="utf8", person_fields=['author'])
parser.parse_files(["db/abbrev0.bib", "db/crypto_conf_list.bib"])

with open(filename, encoding="utf8") as f:
preamble = ""

cookie = f.tell()
line = f.readline()
while line == '\n' or line.startswith('%'):
preamble += line
cookie = f.tell()
line = f.readline()
f.seek(cookie)
return preamble, parser.parse_file(f)


def write_database(preamble, db, confs_years, on_db=False):
filename = ("db/crypto_db.bib" if on_db else "db/test.bib") + ".out"
with open(filename, "w") as out:
out.write(preamble)
mybibtex.generator.bibtex_gen(out, db, expand_crossrefs=False,
include_crossrefs=False,
remove_empty_fields=True)


################################################################################


def namestrip(s):
s = s.replace('{', '')
s = s.replace('}', '')
return re.sub('\\\\.', '', s).strip()


def fix_von_prefix_person(person):
prelast = person.get_part_as_text('prelast')
lastname = person.get_part_as_text('last')
orig_name = prelast + ' ' + lastname if prelast else lastname

"""
Current exceptions are:
Exception: {abhi} {shelat}
Exception: Bruno {d'Ausbourg}
Exception: Sabah {al-Binali}
"""

moves = 0
while re.match('^{\'?[a-z]', lastname):
moves += 1
idx = person._last[0].find(' ')
if idx == -1:
# Should be one of the names above.
print("Exception: ", person)
return

person._prelast.append(person._last[0][1:idx]) # person._last[0]
person._last[0] = '{' + person._last[0][idx + 1:]
# person._last = person._last[1:]
lastname = person.get_part_as_text('last')

if moves > 0:
# Check if we can remove the {lastname}.
if re.match('{[A-Za-z]*}', person.get_part_as_text('last')):
person._last[0] = person._last[0][1:]
person._last[-1] = person._last[-1][:-1]

prelast = person.get_part_as_text('prelast')
lastname = person.get_part_as_text('last')
print("Changed \"", orig_name, "\" to \"", prelast + ' ' + lastname, "\"", sep="")


def run(db):
for entrykey, entry in db.entries.items():
if 'author' in entry.persons:
for author in entry.persons['author']:
fix_von_prefix_person(author)


def main():
parser = argparse.ArgumentParser("Fix von prefices")
parser.add_argument("--db", action="store_true",
help="Run on actual crypto_db.bib")
args = parser.parse_args()

# Make a backup
# shutil.copy("db/crypto_db.bib",
# f"db/crypto_db.bib.{int(time.time()):0>12d}")
# Run the command

print("Reading bibtex source file...", file=sys.stderr, flush=True)
preamble, db = read_database(args.db)
print("Bibtex source file is read!", file=sys.stderr, flush=True)

run(db)

confs_years = get_confs_years_inter(db, confs_missing_years)
write_database(preamble, db, confs_years, args.db)


if __name__ == "__main__":
main()
Loading