Skip to content

Commit

Permalink
Merge pull request #19 from Raffaelbdl/limit-rate
Browse files Browse the repository at this point in the history
Limit rate to avoid JPDB limitations
  • Loading branch information
Raffaelbdl authored Oct 21, 2024
2 parents 737cbfb + 05593b5 commit c7a913e
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 5 deletions.
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
<img alt="Shows Kumakyuu in dark mode and Kumayuru in light mode." src="./resources/black.png" align="right" width="40%">
</picture>

[**Installation**](#installation) |
[**Features**](#features) |
[**Common Issues**](#common-issues) |


Kuma Browser enables a few functionalities from the [jpdb.io](jpdb.io) search engine directly within Anki.

> [!CAUTION]
Expand Down Expand Up @@ -59,7 +64,7 @@ To start the Kuma Browser, select `Tools` → `Kuma Browser`
- The `Reposition` tab allows to reposition cards based on the frequency field for a given deck.

- Only the `New` cards will be repositioned.
-

### The JPDB API Vocabulary List Tab

- The `JPDB VocabList` tab allows to create Anki notes from JPDB vocabulary lists using the JPDB API.
Expand All @@ -85,3 +90,19 @@ I provide my own template, but it can be freely modified from the `config/` fold


*You can change between くまきゅう and くまゆる by using light or dark theme.*

## Common Issues

### JPDB Vocabulary List Tab fails

This method to create notes is known to fail a lot. It is supposedly due to jpdb.io restricting users so that they cannot do too many requests too fast 😶 ([#18](https://github.com/Raffaelbdl/kuma-browser/issues/18)). If it happens, you may have been temporarily banned. Wait a bit before trying either the JPDB Vocabulary List scraper or the JPDB Vocabulary List API.

One way to solve this is to use the API: it should not be limited like the web scraping method.

If you want the example sentences however, you will need to set a delay between each requests. This can be done by setting the parameter `sleep_time` in `config/vl.json`. For example:

```json
{ "sleep_time": 0.1 }
```

Do not change the key `sleep_time` as there is no self-repair mechanism 😆
3 changes: 3 additions & 0 deletions kuma/config/vl.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"sleep_time": 0.0
}
2 changes: 2 additions & 0 deletions kuma/jpdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ class JPDB_Note:
@classmethod
def from_jpdb(cls, url: Url):
jpdb_soup = load_url(url)
if jpdb_soup is None:
return None

expression = jpdb_soup.find("title").text.split(" ")[0]
part_of_speech = extract_part_of_speech(jpdb_soup)
Expand Down
27 changes: 23 additions & 4 deletions kuma/widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import os
from pathlib import Path
import time
from typing import Optional, List

import aqt
Expand Down Expand Up @@ -232,15 +233,19 @@ class VLSearchThread(aqt.QThread):
finished = aqt.pyqtSignal(list)
next_page = aqt.pyqtSignal(int)

def __init__(self, url: str):
def __init__(self, url: str, sleep_time: float):
super().__init__()
self.url = url

self.sleep_time = sleep_time

def run(self):
entries = self.get_all_entries_from_vocab_list(self.url)
self.finished.emit(entries)

def get_all_entries_from_vocab_list(self, vl_url):
time.sleep(self.sleep_time)

try:
self.next_page.emit(int(vl_url.split("=")[-1]))
except ValueError:
Expand Down Expand Up @@ -271,11 +276,13 @@ class VLGenerationThread(aqt.QThread):
finished = aqt.pyqtSignal()
generated = aqt.pyqtSignal(int)

def __init__(self, current_deck: str, urls: List[str]):
def __init__(self, current_deck: str, urls: List[str], sleep_time: float):
super().__init__()
self.current_deck = current_deck
self.urls = urls

self.sleep_time = sleep_time

def run(self):
# cannot be multithreaded due to JPDB constraints
for i, url in enumerate(self.urls):
Expand All @@ -285,8 +292,14 @@ def run(self):
if is_in_deck(self.current_deck, note_id):
continue

time.sleep(self.sleep_time)
jpdb_note = JPDB_Note.from_jpdb(url)

if jpdb_note is None:
print(f"url {url} was not loaded and skipped")
continue # skip
KumaAnki.add_note(jpdb_note, self.current_deck)

self.finished.emit()


Expand Down Expand Up @@ -325,6 +338,12 @@ def __init__(self, parent: aqt.QWidget, *, previous_query: Optional[str] = None)

self.last_query = ""

# help avoid throttle ?
self.path_to_config = Path(__file__).resolve().parent / "config" / "vl.json"
if not self.path_to_config.exists():
json.dump({"sleep_time": 0.0}, self.path_to_config.open("r"))
self.sleep_time = json.load(open(self.path_to_config, "r"))["sleep_time"]

def layout_init(self):
self._layout.addRow("Query: ", self.query_lineEdit)
self._layout.addRow("Get all notes: ", self.search_button)
Expand Down Expand Up @@ -377,7 +396,7 @@ def search(self) -> None:

self.wait_label.show()

self.search_worker = VLSearchThread(query)
self.search_worker = VLSearchThread(query, self.sleep_time)
self.search_worker.next_page.connect(self._on_searching)
self.search_worker.finished.connect(self._on_search_finished)
self.search_worker.finished.connect(self.search_worker.quit)
Expand Down Expand Up @@ -408,7 +427,7 @@ def generate_or_update(self) -> None:
self.prog_bar.setValue(0)

self.generation_worker = VLGenerationThread(
self.current_deck, self.query_results
self.current_deck, self.query_results, self.sleep_time
)
self.generation_worker.generated.connect(self._on_generating)
self.generation_worker.finished.connect(self._on_generation_finished)
Expand Down

0 comments on commit c7a913e

Please sign in to comment.