Skip to content

Commit

Permalink
feat(#27): add select command to select between articles
Browse files Browse the repository at this point in the history
Fixes #27
  • Loading branch information
MartinBernstorff committed Feb 5, 2024
1 parent 8d4a213 commit 885bad1
Show file tree
Hide file tree
Showing 17 changed files with 177 additions and 135 deletions.
20 changes: 16 additions & 4 deletions memorymarker/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import pytz
import typer

from memorymarker.highlight_providers.omnivore import Omnivore
from memorymarker.cli.document_selector import select_documents
from memorymarker.document_providers.omnivore import Omnivore
from memorymarker.persist_questions.markdown import write_qa_prompt_to_md
from memorymarker.question_generator.question_generator import (
highlights_to_questions,
Expand All @@ -33,7 +34,7 @@ def update_timestamp(self) -> None:

def get_timestamp(self) -> dt.datetime | None:
try:
dt.datetime.fromisoformat(self.filepath.read_text())
return dt.datetime.fromisoformat(self.filepath.read_text())
except FileNotFoundError:
return None

Expand All @@ -53,17 +54,27 @@ def typer_cli(
only_new: bool = typer.Option(
True, help="Only generate questions from highlights since last run"
),
select: bool = typer.Option(
False, help="Prompt to select which documents to generate questions from"
),
) -> None:
output_dir.mkdir(exist_ok=True, parents=True)
last_run_timestamper = TimestampHandler(output_dir / ".memorymarker")
last_run_timestamp = last_run_timestamper.get_timestamp()

typer.echo("Fetching new highlights...")
highlights = Omnivore().get_highlights()
typer.echo("Fetching documents")
documents = Omnivore().get_documents().filter(lambda _: len(_.highlights) > 0)

if select:
documents = select_documents(documents)

typer.echo("Processing to highlights")
highlights = documents.map(lambda _: _.get_highlights()).flatten()

if only_new:
if not last_run_timestamp:
typer.echo("No last run timestamp found, exiting")
last_run_timestamper.update_timestamp()
return

typer.echo(
Expand All @@ -88,6 +99,7 @@ def typer_cli(

typer.echo("Writing questions to markdown...")
for question in questions:
typer.echo(f"Writing question to {question.title}")
write_qa_prompt_to_md(save_dir=output_dir, prompt=question)


Expand Down
12 changes: 12 additions & 0 deletions memorymarker/cli/document_selector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import questionary
from iterpy._iter import Iter

from ..document_providers.Document import Document


def select_documents(docs: Iter[Document]) -> Iter[Document]:
doc_titles = docs.map(lambda d: d.title).to_list()
selected_doc_names = questionary.checkbox(
message="Select documents", choices=doc_titles
).ask()
return docs.filter(lambda d: d.title in selected_doc_names)
25 changes: 25 additions & 0 deletions memorymarker/document_providers/ContextualizedHighlight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from pydantic import BaseModel


import datetime as dt


class ContextualizedHighlight(BaseModel):
source_doc_title: str
source_doc_uri: str

prefix: str | None
highlighted_text: str
suffix: str | None

source_highlight_uri: str | None = None
updated_at: dt.datetime

@property
def context(self) -> str:
context = ""
context += self.prefix or ""
context += self.highlighted_text
context += self.suffix or ""

return context
27 changes: 27 additions & 0 deletions memorymarker/document_providers/Document.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import Any, Mapping, Sequence

from iterpy._iter import Iter
from pydantic import BaseModel

from .ContextualizedHighlight import ContextualizedHighlight


class Document(BaseModel):
title: str
uri: str
slug: str
highlights: Sequence[Mapping[str, Any]]

def _parse_highlight(self, highlight: Mapping[str, str]) -> ContextualizedHighlight:
return ContextualizedHighlight(
source_doc_title=self.title,
source_doc_uri=self.uri,
highlighted_text=highlight["quote"],
prefix=highlight["prefix"],
suffix=highlight["suffix"],
updated_at=highlight["updatedAt"], # type: ignore
source_highlight_uri=f"https://omnivore.app/me/{self.slug}#{highlight["id"]}",
)

def get_highlights(self) -> Iter[ContextualizedHighlight]:
return Iter(self.highlights).map(self._parse_highlight)
File renamed without changes.
34 changes: 34 additions & 0 deletions memorymarker/document_providers/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import datetime as dt
from dataclasses import dataclass
from pathlib import Path
from typing import Protocol, Sequence

from iterpy._iter import Iter

from memorymarker.document_providers.ContextualizedHighlight import (
ContextualizedHighlight,
)

from .Document import Document


@dataclass(frozen=True)
class OrphanHighlight:
highlight: str
uri: str
title: str


class DocumentProvider(Protocol):
def get_documents(self) -> Iter[Document]:
...


class HighlightManager(Protocol):
timestamp_file: Path
source: DocumentProvider

def get_highlights_since_update(
self, date: dt.datetime
) -> Sequence[ContextualizedHighlight]:
...
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import requests
from bs4 import BeautifulSoup, NavigableString, Tag
from joblib import Memory
from memorymarker.document_providers.ContextualizedHighlight import (
ContextualizedHighlight,
)

from memorymarker.highlight_providers.base import (
HydratedHighlight,
from memorymarker.document_providers.base import (
OrphanHighlight,
)

Expand Down Expand Up @@ -80,8 +82,8 @@ def __init__(self, soup_downloader: Callable[[str], BeautifulSoup]) -> None:
def hydrate_highlights(
self,
highlights: Sequence[OrphanHighlight],
) -> Sequence[HydratedHighlight | None]:
hydrated_highlights: list[HydratedHighlight | None] = []
) -> Sequence[ContextualizedHighlight | None]:
hydrated_highlights: list[ContextualizedHighlight | None] = []
for highlight in highlights:
try:
page = urlopen(highlight.uri)
Expand All @@ -96,7 +98,7 @@ def hydrate_highlights(
highlight=highlight.highlight,
)
hydrated_highlights.append(
HydratedHighlight(
ContextualizedHighlight(
highlighted_text=highlight.highlight,
source_doc_uri=highlight.uri,
source_doc_title=highlight.title,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from bs4 import BeautifulSoup

from memorymarker.highlight_providers.hydrator.main import ContextParser
from memorymarker.document_providers.hydrator.main import ContextParser


def test_context_parser():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dotenv import load_dotenv
from pydantic import BaseModel

from memorymarker.highlight_providers.base import (
from memorymarker.document_providers.base import (
OrphanHighlight,
)

Expand Down
36 changes: 36 additions & 0 deletions memorymarker/document_providers/omnivore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
from dataclasses import dataclass
from typing import Mapping

from iterpy._iter import Iter
from omnivoreql import OmnivoreQL

from memorymarker.document_providers.Document import Document

from .base import DocumentProvider


@dataclass
class Omnivore(DocumentProvider):
def __post_init__(self):
omnivore_api_key = os.getenv("OMNIVORE_API_KEY")
if not omnivore_api_key:
raise ValueError("OMNIVORE_API_KEY environment variable not set")
self.client = OmnivoreQL(omnivore_api_key)

def _parse_doc(self, document: Mapping[str, str]) -> Document:
return Document(
title=document["title"],
uri=document["url"],
highlights=document["highlights"], # type: ignore
slug=document["slug"],
)

def get_documents(self) -> Iter[Document]:
documents = (
Iter(self.client.get_articles(limit=1000)["search"]["edges"])
.map(lambda a: a["node"])
.map(self._parse_doc)
.flatten()
)
return documents
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@


def test_omnivore():
highlights = Omnivore().get_highlights()
highlights = Omnivore().get_documents()
assert highlights.count() > 0
49 changes: 0 additions & 49 deletions memorymarker/highlight_providers/base.py

This file was deleted.

61 changes: 0 additions & 61 deletions memorymarker/highlight_providers/omnivore.py

This file was deleted.

4 changes: 2 additions & 2 deletions memorymarker/persist_questions/test_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import memorymarker.persist_questions.markdown as markdown
from memorymarker.question_generator.question_generator import QAPrompt

from ..highlight_providers.base import HydratedHighlight
from ..document_providers.ContextualizedHighlight import ContextualizedHighlight


class FakeHydratedHighlight(HydratedHighlight):
class FakeHydratedHighlight(ContextualizedHighlight):
source_doc_title: str = "The Hitchhiker's Guide to the Galaxy"
source_doc_uri: str = (
"https://en.wikipedia.org/wiki/The_Hitchhiker%27s_Guide_to_the_Galaxy"
Expand Down
Loading

0 comments on commit 885bad1

Please sign in to comment.