diff --git a/README.md b/README.md index c90eaac..4db707f 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,11 @@ Shows Kumakyuu in dark mode and Kumayuru in light mode. +[**Installation**](#installation) | +[**Features**](#features) | +[**Common Issues**](#common-issues) | + + Kuma Browser enables a few functionalities from the [jpdb.io](jpdb.io) search engine directly within Anki. > [!CAUTION] @@ -59,7 +64,7 @@ To start the Kuma Browser, select `Tools` → `Kuma Browser` - The `Reposition` tab allows to reposition cards based on the frequency field for a given deck. - Only the `New` cards will be repositioned. -- + ### The JPDB API Vocabulary List Tab - The `JPDB VocabList` tab allows to create Anki notes from JPDB vocabulary lists using the JPDB API. @@ -85,3 +90,19 @@ I provide my own template, but it can be freely modified from the `config/` fold *You can change between くまきゅう and くまゆる by using light or dark theme.* + +## Common Issues + +### JPDB Vocabulary List Tab fails + +This method to create notes is known to fail a lot. It is supposedly due to jpdb.io restricting users so that they cannot do too many requests too fast 😶 ([#18](https://github.com/Raffaelbdl/kuma-browser/issues/18)). If it happens, you may have been temporarily banned. Wait a bit before trying either the JPDB Vocabulary List scraper or the JPDB Vocabulary List API. + +One way to solve this is to use the API: it should not be limited like the web scraping method. + +If you want the example sentences however, you will need to set a delay between each requests. This can be done by setting the parameter `sleep_time` in `config/vl.json`. For example: + +```json +{ "sleep_time": 0.1 } +``` + +Do not change the key `sleep_time` as there is no self-repair mechanism 😆 \ No newline at end of file diff --git a/kuma/config/vl.json b/kuma/config/vl.json new file mode 100644 index 0000000..6941d6f --- /dev/null +++ b/kuma/config/vl.json @@ -0,0 +1,3 @@ +{ + "sleep_time": 0.0 +} \ No newline at end of file diff --git a/kuma/jpdb.py b/kuma/jpdb.py index 17c1858..3d520aa 100644 --- a/kuma/jpdb.py +++ b/kuma/jpdb.py @@ -116,6 +116,8 @@ class JPDB_Note: @classmethod def from_jpdb(cls, url: Url): jpdb_soup = load_url(url) + if jpdb_soup is None: + return None expression = jpdb_soup.find("title").text.split(" ")[0] part_of_speech = extract_part_of_speech(jpdb_soup) diff --git a/kuma/widget.py b/kuma/widget.py index 07110b1..81873b7 100644 --- a/kuma/widget.py +++ b/kuma/widget.py @@ -3,6 +3,7 @@ import json import os from pathlib import Path +import time from typing import Optional, List import aqt @@ -232,15 +233,19 @@ class VLSearchThread(aqt.QThread): finished = aqt.pyqtSignal(list) next_page = aqt.pyqtSignal(int) - def __init__(self, url: str): + def __init__(self, url: str, sleep_time: float): super().__init__() self.url = url + self.sleep_time = sleep_time + def run(self): entries = self.get_all_entries_from_vocab_list(self.url) self.finished.emit(entries) def get_all_entries_from_vocab_list(self, vl_url): + time.sleep(self.sleep_time) + try: self.next_page.emit(int(vl_url.split("=")[-1])) except ValueError: @@ -271,11 +276,13 @@ class VLGenerationThread(aqt.QThread): finished = aqt.pyqtSignal() generated = aqt.pyqtSignal(int) - def __init__(self, current_deck: str, urls: List[str]): + def __init__(self, current_deck: str, urls: List[str], sleep_time: float): super().__init__() self.current_deck = current_deck self.urls = urls + self.sleep_time = sleep_time + def run(self): # cannot be multithreaded due to JPDB constraints for i, url in enumerate(self.urls): @@ -285,8 +292,14 @@ def run(self): if is_in_deck(self.current_deck, note_id): continue + time.sleep(self.sleep_time) jpdb_note = JPDB_Note.from_jpdb(url) + + if jpdb_note is None: + print(f"url {url} was not loaded and skipped") + continue # skip KumaAnki.add_note(jpdb_note, self.current_deck) + self.finished.emit() @@ -325,6 +338,12 @@ def __init__(self, parent: aqt.QWidget, *, previous_query: Optional[str] = None) self.last_query = "" + # help avoid throttle ? + self.path_to_config = Path(__file__).resolve().parent / "config" / "vl.json" + if not self.path_to_config.exists(): + json.dump({"sleep_time": 0.0}, self.path_to_config.open("r")) + self.sleep_time = json.load(open(self.path_to_config, "r"))["sleep_time"] + def layout_init(self): self._layout.addRow("Query: ", self.query_lineEdit) self._layout.addRow("Get all notes: ", self.search_button) @@ -377,7 +396,7 @@ def search(self) -> None: self.wait_label.show() - self.search_worker = VLSearchThread(query) + self.search_worker = VLSearchThread(query, self.sleep_time) self.search_worker.next_page.connect(self._on_searching) self.search_worker.finished.connect(self._on_search_finished) self.search_worker.finished.connect(self.search_worker.quit) @@ -408,7 +427,7 @@ def generate_or_update(self) -> None: self.prog_bar.setValue(0) self.generation_worker = VLGenerationThread( - self.current_deck, self.query_results + self.current_deck, self.query_results, self.sleep_time ) self.generation_worker.generated.connect(self._on_generating) self.generation_worker.finished.connect(self._on_generation_finished)