diff --git a/requirements.txt b/requirements.txt
index b9ea72e4e78..b6805856860 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,6 +17,8 @@ isbnlib==3.10.14
 luqum==0.11.0
 lxml==4.9.4
 multipart==0.2.4
+mwparserfromhell==0.6.6
+nameparser==1.1.3
 Pillow==10.4.0
 psycopg2==2.9.6
 pydantic==2.4.0
@@ -30,3 +32,4 @@ sentry-sdk==2.8.0
 simplejson==3.19.1
 statsd==4.0.1
 validate_email==1.3
+wikitextparser==0.56.1
diff --git a/scripts/providers/import_wikisource.py b/scripts/providers/import_wikisource.py
new file mode 100644
index 00000000000..6c888defc79
--- /dev/null
+++ b/scripts/providers/import_wikisource.py
@@ -0,0 +1,827 @@
+"""
+To Run:
+
+PYTHONPATH=. python ./scripts/providers/import_wikisource.py /olsystem/etc/openlibrary.yml
+"""
+
+import itertools
+import json
+import logging
+import re
+import time
+from dataclasses import dataclass, field
+from typing import Any
+from urllib.parse import parse_qsl, quote, unquote, urlencode, urlparse, urlunparse
+
+# Using both mwparserfromhell and wikitextparser because the former doesn't have a markup stripper
+# and the latter doesn't have a method to get a template prop by key.
+import mwparserfromhell as mw
+import requests
+import wikitextparser as wtp
+from nameparser import HumanName
+
+from openlibrary.config import load_config
+from openlibrary.utils import uniq
+from scripts.solr_builder.solr_builder.fn_to_cli import FnToCLI
+
+logger = logging.getLogger("openlibrary.importer.wikisource")
+
+
+def update_url_with_params(url: str, new_params: dict[str, str]):
+    url_parts = list(urlparse(url))
+    query = dict(parse_qsl(url_parts[4]))
+    query.update(new_params)
+    url_parts[4] = urlencode(query, quote_via=quote)
+    return urlunparse(url_parts)
+
+
+def extract_year(date_string: str) -> str | None:
+    match = re.match(r'(\d{4})', date_string)
+    return match.group(1) if match else None
+
+
+# Exclude Wikidata results which are direct instances of these things.
+EXCLUDED_WIKIDATA_INSTANCES = [
+    "Q386724",  # raw works
+    "Q5185279",  # poem
+    "Q10870555",  # reports
+    "Q49848",  # documents
+    "Q47461344",  # written work - this is meant to be a parent class of documents, manuscripts, etc, things that aren't books
+    "Q697279",  # petitions
+    "Q660651",  # memoranda
+    "Q327611",  # flyers
+    "Q2085515",  # minutes
+    "Q190399",  # pamphlets
+    "Q15916459",  # plea
+]
+
+# Exclude Wikidata results which belong to subclasses of these things.
+EXCLUDED_WIKIDATA_SUBCLASSES = [
+    "Q191067",  # articles
+    "Q4502142",  # visual artwork
+    "Q1784733",  # correspondences
+    "Q35760",  # essays
+    "Q6087062",  # legal proceedings
+    "Q52943",  # interviews
+    "Q814441",  # certifications
+    "Q861911",  # orations
+    "Q2135540",  # legal actions
+    "Q133492",  # letters
+    "Q3150005",  # legal instruments
+    "Q18120378",  # lab measurements
+    "Q1572600",  # proclamations
+    "Q820655",  # statutes
+    "Q2571972",  # decrees
+    "Q253623",  # patents
+    "Q108163",  # propositions
+    "Q628523",  # messages
+    "Q5962346",  # classification scheme
+]
+
+# Exclude Wikidata results which belong to these genres.
+EXCLUDED_WIKIDATA_GENRES = [
+    "Q603773",  # lectures
+    "Q861911",  # orations
+    "Q35760",  # essays
+    "Q60797",  # sermons
+    "Q133492",  # letters
+]
+
+WIKIDATA_API_URL = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"
+
+
+def get_wd_item_id(string: str):
+    return string.split('/')[-1]
+
+
+@dataclass
+class LangConfig:
+    langcode: str
+    ol_langcode: str
+    category_prefix: str
+    included_category_names: list[str]
+    excluded_category_names: list[str]
+
+    def _catformat(self, category: str) -> str:
+        return f"{self.category_prefix}:{category}"
+
+    def _sparql_query(self, category: str) -> str:
+        # This gets the wikisource page names and wikidata item IDs from a Wikisource generator.
+        # The generator is a pretty heavy lift, so fetching more metadata will be done in a separate query and in batches.
+        return (
+            '''SELECT DISTINCT
+  ?page
+  ?item
+WHERE {
+  SERVICE wikibase:mwapi {
+    bd:serviceParam wikibase:endpoint "'''
+            + self.langcode
+            + '''.wikisource.org";
+                    wikibase:api "Generator";
+                    mwapi:generator "categorymembers";
+                    mwapi:gcmtitle "'''
+            + self._catformat(category)
+            + '''".
+    ?page wikibase:apiOutput mwapi:title.
+    ?item wikibase:apiOutputItem mwapi:item .
+  }
+
+  ?item wdt:P31/wdt:P279* ?instanceOf.
+  '''
+            + ''.join(
+                [
+                    f"FILTER NOT EXISTS {{ ?item wdt:P31/wdt:P279* wd:{type}. }}\n  "
+                    for type in EXCLUDED_WIKIDATA_SUBCLASSES
+                ]
+            )
+            + ''.join(
+                [
+                    f"FILTER NOT EXISTS {{ ?item wdt:P31 wd:{type}. }}\n  "
+                    for type in EXCLUDED_WIKIDATA_INSTANCES
+                ]
+            )
+            + ''.join(
+                [
+                    f"FILTER NOT EXISTS {{ ?item wdt:P136 wd:{type}. }}\n  "
+                    for type in EXCLUDED_WIKIDATA_GENRES
+                ]
+            )
+            + '''
+  FILTER (!CONTAINS(STR(?page), "/"))
+  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],mul,'''
+            + self.langcode
+            + '''". }
+}'''
+        )
+
+    def _sparql_url(self, category: str) -> str:
+        params = {"format": "json", "query": self._sparql_query(category)}
+        return update_url_with_params(WIKIDATA_API_URL, params)
+
+    @property
+    def wikisource_api_url(self) -> str:
+        return f"https://{self.langcode}.wikisource.org/w/api.php"
+
+    @property
+    def all_wikidata_category_urls(self) -> list[str]:
+        return [self._sparql_url(c) for c in self.included_category_names]
+
+    @property
+    def excluded_categories(self) -> list[str]:
+        return [self._catformat(c) for c in self.excluded_category_names]
+
+
+# Each version of wikisource has different category names and prefixes,
+# so the pool of categories to search within and the categories to filter out
+# will have different names per wikisource version.
+# We need to explicitly exclude irrelevant categories because Wikisource does not have a unique category for books.
+# You can add more Wikisource languages here as desired.
+ws_languages = [
+    LangConfig(
+        langcode="en",
+        ol_langcode="eng",
+        category_prefix="Category",
+        included_category_names=["Validated_texts"],
+        excluded_category_names=[
+            "Subpages",
+            "Posters",
+            "Memoranda",
+            "Legislation-CAGov",
+            "Constitutional documents",
+            "National constitutions",
+            "Manuscripts",
+            "Political tracts",
+            "Proclamations",
+            "Declarations of independence",
+            "Pamphlets",
+            "Forms of government",
+            "PD-USGov",
+            "PD-CAGov",
+            "PD-UKGov",
+            "Early modern speeches",
+            "Sermons",
+            "PD-EdictGov",
+            "Film",
+        ],
+    )
+]
+
+
+def format_contributor(raw_name: str) -> str:
+    name = HumanName(raw_name)
+    fn = f"{name.first} " if name.first != "" else ""
+    mid = f"{name.middle} " if name.middle != "" else ""
+    ln = name.last
+    suf = f" {name.suffix}" if name.suffix != "" else ""
+    return f"{fn}{mid}{ln}{suf}"
+
+
+@dataclass
+class Author:
+    friendly_name: str | None = None
+    birth_date: str | None = None
+    death_date: str | None = None
+
+    def __hash__(self):
+        return hash((self.friendly_name, self.birth_date, self.death_date))
+
+
+@dataclass
+class BookRecord:
+    langconfig: LangConfig
+    wikisource_page_title: str
+    title: str | None = None
+    publish_date: str | None = None
+    edition: str | None = None
+    authors: list[Author] = field(default_factory=list)
+    illustrators: list[str] = field(default_factory=list)
+    description: str | None = None
+    subjects: list[str] = field(default_factory=list)
+    cover: str | None = None
+    publishers: list[str] = field(default_factory=list)
+    imagename: str | None = None
+    categories: list[str] = field(default_factory=list)
+    ia_id: str | None = None
+    publish_places: list[str] = field(default_factory=list)
+    page_count: int | None = None
+    oclcs: list[str] = field(default_factory=list)
+    lccn: str | None = None
+    isbn10: str | None = None
+    isbn13: str | None = None
+
+    def add_publishers(self, publishers: list[str]) -> None:
+        self.publishers = uniq(self.publishers + publishers)
+
+    def add_publish_place(self, places: list[str]) -> None:
+        self.publish_places = uniq(self.publish_places + places)
+
+    def add_authors(self, authors: list[Author]) -> None:
+        self.authors = uniq(
+            self.authors + authors, key=lambda author: author.friendly_name
+        )
+
+    def add_illustrators(self, illustrators: list[str]) -> None:
+        self.illustrators = uniq(self.illustrators + illustrators)
+
+    def add_subjects(self, subjects: list[str]) -> None:
+        self.subjects = uniq(self.subjects + subjects)
+
+    def add_categories(self, categories: list[str]) -> None:
+        self.categories = uniq(self.categories + categories)
+
+    def add_oclcs(self, oclcs: list[str]) -> None:
+        self.oclcs = uniq(self.oclcs + oclcs)
+
+    @property
+    def wikisource_id(self) -> str:
+        return f"{self.langconfig.langcode}:{self.wikisource_page_title}"
+
+    @property
+    def source_records(self) -> list[str]:
+        records = [f"wikisource:{self.wikisource_id}"]
+        if self.ia_id is not None:
+            records.insert(0, f"ia:{self.ia_id}")
+        return records
+
+    def to_dict(self):
+        publishers = ["Wikisource"]
+        publishers.extend(self.publishers)
+        output = {
+            "title": self.title,
+            "source_records": self.source_records,
+            "identifiers": {"wikisource": [self.wikisource_id]},
+            "languages": [self.langconfig.ol_langcode],
+            "ia_id": self.source_records[0],
+        }
+        if self.publish_date is not None:
+            output["publish_date"] = self.publish_date
+        if self.edition is not None:
+            output["edition_name"] = self.edition
+        if self.authors:
+            output["authors"] = [
+                {
+                    "name": author.friendly_name,
+                    "birth_date": author.birth_date,
+                    "death_date": author.death_date,
+                }
+                for author in self.authors
+            ]
+        if self.description is not None:
+            output["description"] = self.description
+        if self.subjects:
+            output["subjects"] = self.subjects
+        if self.cover is not None:
+            output["cover"] = self.cover
+        if publishers:
+            output["publishers"] = publishers
+        if self.publish_places:
+            output["publish_places"] = self.publish_places
+        if self.page_count:
+            output["pagination"] = self.page_count
+        if self.oclcs:
+            output["oclc_numbers"] = self.oclcs
+        if self.lccn:
+            output["lccn"] = self.lccn
+        if self.illustrators:
+            output["contributions"] = self.illustrators
+        if self.isbn10:
+            output["isbn_10"] = self.isbn10
+        if self.isbn13:
+            output["isbn_13"] = self.isbn13
+
+        return output
+
+
+def fetch_until_successful(url: str) -> dict:
+    for _ in range(5):
+        try:
+            response = requests.get(url, stream=True)
+            return response.json()
+        except requests.exceptions.RequestException as error:
+            # If too many requests error, or API overloaded, wait 10 seconds and try again
+            # In testing this feature, this could return a 429 error, 503 error, or an empty response body
+            if error.response is None or error.response.status_code in (429, 503):
+                time.sleep(10)
+            else:
+                raise SystemExit(error)
+    raise SystemExit(
+        f"could not fetch {url} after 5 tries. You may be rate limited, try again later"
+    )
+
+
+def update_record_with_wikisource_metadata(
+    book: BookRecord, book_id: str, new_data: dict, author_map: dict[str, list[str]]
+):
+    if "categories" in new_data:
+        book.add_categories([cat["title"] for cat in new_data["categories"]])
+
+    # Parse other params from the infobox
+    revision_data = new_data.get("revisions", [])
+    infobox = next(
+        (
+            d["slots"]["main"]["*"]
+            for d in revision_data
+            if "slots" in d and "main" in d["slots"] and "*" in d["slots"]["main"]
+        ),
+        None,
+    )
+    # Exit if infobox doesn't exist
+    if infobox is None:
+        return
+    wikicode = mw.parse(infobox)
+    templates = wikicode.filter_templates()
+    if not templates:
+        return
+    template = next(
+        (template for template in templates if template.name.strip() == "header"), None
+    )
+    if template is None:
+        return
+
+    # If infobox DOES exist, extract book data from it. These are in try-catches.
+    # I didn't see a method for the MW parser that checks if a key exists or not
+    # instead of throwing an error if it doesn't.
+    # Infobox params do not change from language to language as far as I can tell.
+    # i.e. "year" will always be "year".
+
+    if book.publish_date is None:
+        try:
+            yr = template.get("year").value.strip()
+            book.publish_date = extract_year(yr)
+        except ValueError:
+            pass
+
+    if not [b for b in author_map if book_id in author_map[b]] and not book.authors:
+        try:
+            author = template.get("author").value.strip()
+            if author != "":
+                authors = re.split(r"(?:\sand\s|,\s?)", author)
+                if authors:
+                    book.add_authors(
+                        [Author(friendly_name=format_contributor(a)) for a in authors]
+                    )
+        except ValueError:
+            pass
+
+    if not book.illustrators:
+        try:
+            illustrator = template.get("illustrator").value.strip()
+            if illustrator != "":
+                illustrators = re.split(r"(?:\sand\s|,\s?)", illustrator)
+                if illustrators:
+                    book.add_illustrators([format_contributor(a) for a in illustrators])
+        except ValueError:
+            pass
+
+    if book.description is None:
+        try:
+            # Replace HTML and markup newlines with \n, because they get stripped otherwise
+            raw = template.get("notes").value.strip()
+            raw_spaced = re.sub(r"(:?<br/>|\{\{rule\}\})", "\n", raw)
+            notes = wtp.remove_markup(raw_spaced)
+            if notes != "":
+                book.description = notes
+        except ValueError:
+            pass
+
+    try:
+        subject: str = template.get("portal").value.strip()
+        if subject != "":
+            book.add_subjects(subject.split("/"))
+    except ValueError:
+        pass
+
+
+def print_records(records: list[BookRecord]):
+    for rec in records:
+        r = rec.to_dict()
+        print(json.dumps(r))
+
+
+def scrape_wikisource_api(
+    url: str,
+    imports: dict[str, BookRecord],
+    author_map: dict[str, list[str]],
+):
+    cont_url = url
+
+    # Continue until you've reached the end of paginated results
+    while True:
+        data = fetch_until_successful(cont_url)
+
+        if "query" not in data or "pages" not in data["query"]:
+            break
+
+        results = data["query"]["pages"]
+
+        for page in results.values():
+            page_identifier = quote(page["title"].replace(' ', '_'))
+
+            key = next(
+                (
+                    key
+                    for key in imports
+                    if imports[key].wikisource_page_title == page_identifier
+                ),
+                None,
+            )
+            if not key:
+                print(f"{page_identifier} not found in result set")
+                continue
+            book = imports[key]
+            # MediaWiki's API paginates through pages, page categories, and page images separately.
+            # This means that when you hit this API requesting both revision (infobox) and image data,
+            # sequential paginated API responses might contain the same Wikisource book entries, but with different subsets of its properties.
+            # i.e. Page 1 might give you 50 books where only the first 10 have image data,
+            # and page 2 might give you the same 50 books but only the last 10 have image data.
+            update_record_with_wikisource_metadata(book, key, page, author_map)
+
+        # Proceed to next page of API results
+        if "continue" not in data:
+            break
+        cont_url = update_url_with_params(url, data["continue"])
+
+
+def update_import_with_wikidata_api_response(
+    impt: BookRecord, book_id: str, obj: Any, author_map: dict[str, list[str]]
+):
+
+    # Author ID: Fetch more data about authors at a later time. WD query times out if we include author data
+    if "author" in obj and "value" in obj["author"]:
+        author_id = get_wd_item_id(obj["author"]["value"])
+        if author_id not in author_map:
+            author_map[author_id] = []
+        author_map[author_id].append(book_id)
+    # If author isn't a WD object, add them as plaintext
+    elif "authorLabel" in obj and "value" in obj["authorLabel"]:
+        impt.add_authors(
+            [Author(friendly_name=format_contributor(obj["authorLabel"]["value"]))]
+        )
+
+    # Illustrators
+    if "illustratorLabel" in obj and "value" in obj["illustratorLabel"]:
+        impt.add_illustrators([format_contributor(obj["illustratorLabel"]["value"])])
+
+    # Publisher
+    if ("publisher" in obj and "value" in obj["publisher"]) or (
+        "publisherName" in obj and "value" in obj["publisherName"]
+    ):
+        impt.add_publishers(
+            [
+                (
+                    obj["publisherName"]["value"]
+                    if "publisherLabel" not in obj
+                    else obj["publisherLabel"]["value"]
+                )
+            ]
+        )
+
+    # Edition
+    if "editionLabel" in obj and "value" in obj["editionLabel"]:
+        impt.edition = obj["editionLabel"]["value"]
+
+    # Subject
+    if "subjectLabel" in obj and "value" in obj["subjectLabel"]:
+        impt.add_subjects([obj["subjectLabel"]["value"]])
+
+    # Date
+    if "date" in obj and "value" in obj["date"]:
+        impt.publish_date = extract_year(obj["date"]["value"])
+
+    # IA ID
+    if "iaId" in obj and "value" in obj["iaId"]:
+        impt.ia_id = obj["iaId"]["value"]
+
+    # Publish place
+    if "publicationPlaceLabel" in obj and "value" in obj["publicationPlaceLabel"]:
+        impt.add_publish_place([obj["publicationPlaceLabel"]["value"]])
+
+    # OCLC
+    if "oclcLabel" in obj and "value" in obj["oclcLabel"]:
+        impt.add_oclcs([obj["oclcLabel"]["value"]])
+
+    # LCCN
+    if "lccn" in obj and "value" in obj["lccn"]:
+        impt.lccn = obj["lccn"]["value"]
+
+    # ISBN10
+    if "isbn10" in obj and "value" in obj["isbn10"]:
+        impt.isbn10 = obj["isbn10"]["value"]
+
+    # ISBN13
+    if "isbn13" in obj and "value" in obj["isbn13"]:
+        impt.isbn13 = obj["isbn13"]["value"]
+
+
+def scrape_wikidata_api(
+    url: str,
+    cfg: LangConfig,
+    imports: dict[str, BookRecord],
+    author_map: dict[str, list[str]],
+):
+    # Unsure if this is supposed to be paginated. Validated Texts only returns one page of JSON results.
+    # The "while true" here is simply to retry requests that fail due to API limits.
+    data = fetch_until_successful(url)
+
+    if "results" not in data or "bindings" not in data["results"]:
+        print("Invalid Wikidata response. Exiting.")
+        return
+
+    item_ids = []
+
+    for binding in data["results"]["bindings"]:
+        if "value" not in binding.get('item', {}):
+            print("no value in binding:", binding)
+            continue
+
+        item_id = get_wd_item_id(binding["item"]["value"])
+        item_ids.append(item_id)
+        imports[item_id] = BookRecord(
+            wikisource_page_title=quote(binding["page"]["value"].replace(' ', '_')),
+            langconfig=cfg,
+        )
+
+    if not item_ids:
+        print("Exiting.")
+        return
+
+    # Get book metadata from the wikidata API using 50 wikidata book IDs at a time
+    for batch in itertools.batched(item_ids, 50):
+
+        # "Title" and "page" (retrieved from the previous query) are often similar, but sometimes not exactly the same.
+        # "Page" (the wikisource page ID) will sometimes contain extra info like the year of publishing, etc,
+        # and is used to hyperlink back to Wikisource.
+        # "Title" on the other hand is the actual title of the work that we would call it on OL.
+        # Publisher data is weird. This book https://www.wikidata.org/wiki/Q51423720 for example
+        # returns a weird URL for publisherLabel if retrieved with wdt:P123 instead of p:P123
+        # but it has a qualifier property, pq:P1932, which contains the raw text (non wikidata item) publisher name if it exists.
+        query = (
+            '''SELECT DISTINCT
+  ?item
+  ?itemLabel
+  ?title
+  ?author
+  ?authorLabel
+  ?illustrator
+  ?illustratorLabel
+  ?publisher
+  ?publisherName
+  ?publicationPlaceLabel
+  ?editionLabel
+  ?pageCount
+  ?date
+  ?subjectLabel
+  ?imageUrl
+  ?iaId
+  ?oclcLabel
+  ?lccn
+  ?isbn10
+  ?isbn13
+WHERE {
+  VALUES ?item {'''
+            + ''.join([f"wd:{id}\n    " for id in batch])
+            + '''}
+  OPTIONAL { ?item wdt:P1476 ?title. }
+  OPTIONAL { ?item wdt:P50 ?author. }
+  OPTIONAL { ?item wdt:P110 ?illustrator. }
+  OPTIONAL {
+    ?item p:P123 ?publisherStatement.
+    ?publisherStatement ps:P123 ?publisher.
+    ?publisherStatement pq:P1932 ?publisherName.
+  }
+  OPTIONAL { ?item wdt:P291 ?publicationPlace. }
+  OPTIONAL { ?item wdt:P393 ?edition. }
+  OPTIONAL { ?item wdt:P577 ?date. }
+  OPTIONAL { ?item wdt:P921 ?subject. }
+  OPTIONAL { ?item wdt:P18 ?image. }
+  OPTIONAL { ?item wdt:P724 ?iaId. }
+  OPTIONAL { ?item wdt:P1104 ?pageCount. }
+  OPTIONAL { ?item wdt:P243 ?oclc. }
+  OPTIONAL { ?item wdt:P244 ?lccn. }
+  OPTIONAL { ?item wdt:P957 ?isbn10. }
+  OPTIONAL { ?item wdt:P212 ?isbn13. }
+  BIND(CONCAT("https://commons.wikimedia.org/wiki/Special:FilePath/", REPLACE(STR(?image), "^.*[/#]", "")) AS ?imageUrl)
+  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],mul,'''
+            + cfg.langcode
+            + '''". }
+}'''
+        )
+        # Get most metadata from wikidata
+        metadata_url = update_url_with_params(
+            WIKIDATA_API_URL, {"format": "json", "query": query}
+        )
+
+        data = fetch_until_successful(metadata_url)
+
+        if "results" not in data or "bindings" not in data["results"]:
+            continue
+
+        ids_for_wikisource_api = []
+
+        results = [
+            obj
+            for obj in data["results"]["bindings"]
+            if "item" in obj
+            and "value" in obj["item"]
+            and not (
+                # skip if no title or item label exists in the result
+                (
+                    ("title" not in obj or "value" not in obj["title"])
+                    and ("itemLabel" not in obj or "value" not in obj["itemLabel"])
+                )
+                # skip if duplicate result of an existing record with a different language title
+                or (
+                    "title" in obj
+                    and "xml:lang" in obj["title"]
+                    and obj["title"]["xml:lang"] != cfg.langcode
+                )
+            )
+        ]
+
+        for obj in results:
+            title: str = (
+                obj["title"]["value"]
+                if "title" in obj and "value" in obj["title"]
+                else obj["itemLabel"]["value"]
+            )
+            book_id = get_wd_item_id(obj["item"]["value"])
+            impt = imports[book_id]
+
+            impt.title = title
+            ids_for_wikisource_api.append(impt.wikisource_page_title)
+
+            update_import_with_wikidata_api_response(impt, book_id, obj, author_map)
+
+        # For some reason, querying 50 titles can sometimes bring back more than 50 results,
+        # so we'll still explicitly do wikisource scraping in chunks of exactly 50.
+        for ws_batch in itertools.batched(ids_for_wikisource_api, 50):
+
+            # Get more info from Wikisource infoboxes that Wikidata statements don't have, like subjects and descriptions
+            ws_api_url = update_url_with_params(
+                cfg.wikisource_api_url,
+                {
+                    "action": "query",
+                    # these are already urlencoded, decode them before they get urlencoded again
+                    "titles": "|".join([unquote(id) for id in ws_batch]),
+                    # Relevant page data. The inclusion of |revisions, and rvprop/rvslots, are used to get book info from the page's infobox.
+                    "prop": "categories|revisions",
+                    "rvprop": "content",
+                    "rvslots": "main",
+                    # Include as many categories per response as possible
+                    "cllimit": "max",
+                    "format": "json",
+                },
+            )
+
+            scrape_wikisource_api(ws_api_url, imports, author_map)
+
+        # Use wikidata image URL if it exists
+        for obj in results:
+            book_id = get_wd_item_id(obj["item"]["value"])
+            impt = imports[book_id]
+
+            if (
+                impt.imagename is None
+                and "imageUrl" in obj
+                and "value" in obj["imageUrl"]
+            ):
+                impt.imagename = obj["imageUrl"]["value"]
+                impt.cover = obj["imageUrl"]["value"]
+
+
+def fix_contributor_data(
+    imports: dict[str, BookRecord], map: dict[str, list[str]], cfg: LangConfig
+):
+    contributor_ids = list(map.keys())
+    for batch in itertools.batched(contributor_ids, 50):
+        query = (
+            '''SELECT DISTINCT
+  ?contributor
+  ?contributorLabel
+  ?birthDate
+  ?deathDate
+WHERE {
+  VALUES ?contributor {'''
+            + ''.join([f"wd:{id}\n    " for id in batch])
+            + '''}
+  OPTIONAL { ?contributor wdt:P569 ?birthDate. }
+  OPTIONAL { ?contributor wdt:P570 ?deathDate. }
+  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],mul,'''
+            + cfg.langcode
+            + '''". }
+}'''
+        )
+        metadata_url = update_url_with_params(
+            WIKIDATA_API_URL, {"format": "json", "query": query}
+        )
+
+        data = fetch_until_successful(metadata_url)
+
+        if "results" not in data or "bindings" not in data["results"]:
+            continue
+
+        results = [
+            obj
+            for obj in data["results"]["bindings"]
+            if "contributor" in obj and "value" in obj["contributor"]
+        ]
+
+        for obj in results:
+            contributor_id = get_wd_item_id(obj["contributor"]["value"])
+
+            # Don't include author if their name is incomplete, for whatever reason
+            if "contributorLabel" in obj and "value" in obj["contributorLabel"]:
+                contributor = Author(
+                    friendly_name=format_contributor(obj["contributorLabel"]["value"])
+                )
+
+                if "birthDate" in obj and "value" in obj["birthDate"]:
+                    contributor.birth_date = obj["birthDate"]["value"]
+
+                if "deathDate" in obj and "value" in obj["deathDate"]:
+                    contributor.death_date = obj["deathDate"]["value"]
+
+                if contributor_id in map:
+                    book_ids = map[contributor_id]
+                    for book_id in book_ids:
+                        imports[book_id].add_authors([contributor])
+
+
+# If we want to process all Wikisource pages in more than one category, we have to do one API call per category per language.
+def process_all_books(cfg: LangConfig):
+    imports: dict[str, BookRecord] = {}
+    author_map: dict[str, list[str]] = {}
+
+    for url in cfg.all_wikidata_category_urls:
+        scrape_wikidata_api(url, cfg, imports, author_map)
+
+    fix_contributor_data(imports, author_map, cfg)
+
+    batch: list[BookRecord] = []
+
+    for book in list(imports.values()):
+        # Skip if the book belongs to an ignored Wikisource page category, such as subpages (chapters), posters, etc
+        excluded_categories = [
+            c for c in book.categories if c in cfg.excluded_categories
+        ]
+        if excluded_categories:
+            continue
+
+        batch.append(book)
+
+    if batch:
+        print_records(batch)
+
+
+def main(ol_config: str):
+    """
+    :param str ol_config: Path to openlibrary.yml file
+    """
+    load_config(ol_config)
+
+    for ws_language in ws_languages:
+        process_all_books(ws_language)
+
+
+if __name__ == "__main__":
+    FnToCLI(main).run()