Skip to content

Commit

Permalink
feat: add collapsible highlight with context to output (#24)
Browse files Browse the repository at this point in the history
- [ ] I have considered whether this PR needs review, and requested a review if necessary.

Fixes issue #

# Notes for reviewers
Reviewers can skip X, but should pay attention to Y.
  • Loading branch information
MartinBernstorff authored Feb 3, 2024
1 parent 814bb48 commit c82a334
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 18 deletions.
31 changes: 23 additions & 8 deletions memorymarker/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,26 @@ class TimestampHandler:
filepath: Path

def update_timestamp(self) -> None:
if not self.filepath.exists():
self.filepath.touch()

self.filepath.write_text(dt.datetime.now(pytz.UTC).isoformat())

def get_timestamp(self) -> dt.datetime:
return dt.datetime.fromisoformat(self.filepath.read_text())
def get_timestamp(self) -> dt.datetime | None:
try:
dt.datetime.fromisoformat(self.filepath.read_text())
except FileNotFoundError:
return None


@app.command()
def typer_cli(
output_dir: Path = typer.Argument(
Path("questions"), help="Directory to save the generated questions to"
Path("questions"),
help="Directory to save the generated questions to",
file_okay=False,
dir_okay=True,
writable=True,
),
max_n: int = typer.Argument(
1, help="Maximum number of questions to generate from highlights"
Expand All @@ -44,17 +54,22 @@ def typer_cli(
True, help="Only generate questions from highlights since last run"
),
) -> None:
output_dir.mkdir(exist_ok=True, parents=True)
last_run_timestamper = TimestampHandler(output_dir / ".memorymarker")
typer.echo(
f"Last run at UTC {last_run_timestamper.get_timestamp().strftime('%Y-%m-%d %H:%M:%S')}"
)
last_run_timestamp = last_run_timestamper.get_timestamp()

typer.echo("Fetching new highlights...")
highlights = Omnivore().get_highlights()

if only_new:
highlights = highlights.filter(
lambda _: _.updated_at > last_run_timestamper.get_timestamp()
if not last_run_timestamp:
typer.echo("No last run timestamp found, exiting")
return

typer.echo(
f"Last run at UTC {last_run_timestamp.strftime('%Y-%m-%d %H:%M:%S')}"
)
highlights = highlights.filter(lambda _: _.updated_at > last_run_timestamp)
last_run_timestamper.update_timestamp()

if highlights.count() == 0:
Expand Down
14 changes: 13 additions & 1 deletion memorymarker/highlight_providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,23 @@ class OrphanHighlight:
class HydratedHighlight(BaseModel):
source_doc_title: str
source_doc_uri: str

prefix: str | None
highlighted_text: str
context: str
suffix: str | None

source_highlight_uri: str | None = None
updated_at: dt.datetime

@property
def context(self) -> str:
context = ""
context += self.prefix or ""
context += self.highlighted_text
context += self.suffix or ""

return context


class HighlightSource(Protocol):
def get_highlights(self) -> tuple[OrphanHighlight]:
Expand Down
3 changes: 2 additions & 1 deletion memorymarker/highlight_providers/hydrator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ def hydrate_highlights(
highlighted_text=highlight.highlight,
source_doc_uri=highlight.uri,
source_doc_title=highlight.title,
context=context,
prefix=context[:100],
suffix=context[-100:],
), # type: ignore
)

Expand Down
3 changes: 2 additions & 1 deletion memorymarker/highlight_providers/omnivore.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def _parse_highlight(self, highlight: Mapping[str, str]) -> HydratedHighlight:
source_doc_title=self.title,
source_doc_uri=self.uri,
highlighted_text=highlight["quote"],
context=f"{highlight['prefix']} {highlight['quote']} {highlight['suffix']}",
prefix=highlight["prefix"],
suffix=highlight["suffix"],
updated_at=highlight["updatedAt"], # type: ignore
source_highlight_uri=f"https://omnivore.app/me/{self.slug}#{highlight["id"]}",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
'''
Q. What is the meaning of life?
A. 42
[Highlight](https://en.wikipedia.org/wiki/The_Hitchhiker%27s_Guide_to_the_Galaxy#meaning_of_life)

> [!NOTE]- Highlight
> ==42==
> [Link](https://en.wikipedia.org/wiki/The_Hitchhiker%27s_Guide_to_the_Galaxy#meaning_of_life)


'''
Expand Down
8 changes: 5 additions & 3 deletions memorymarker/persist_questions/markdown.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import re
from pathlib import Path


from memorymarker.question_generator.question_generator import QAPrompt


Expand All @@ -11,15 +10,18 @@ def clean_filename(filename: str) -> str:


def q_to_markdown(prompt: QAPrompt) -> str:
highlight = prompt.hydrated_highlight
return f"""Q. {prompt.question}
A. {prompt.answer}
[Highlight]({prompt.hydrated_highlight.source_highlight_uri})
> [!NOTE]- Highlight
> {highlight.prefix or ""}=={highlight.highlighted_text}=={highlight.suffix.strip() if highlight.suffix is not None else ""}
> [Link]({highlight.source_highlight_uri})
\n"""


def write_md(contents: str, file_title: str, save_dir: Path) -> None:
"""Write markdown to file. Append if exists"""
save_dir.mkdir(exist_ok=True, parents=True)
with (save_dir / f"{clean_filename(file_title)}.md").open(mode="a") as f:
f.write(contents + "\n")

Expand Down
2 changes: 2 additions & 0 deletions memorymarker/persist_questions/test_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class FakeHydratedHighlight(HydratedHighlight):
context: str = "The meaning of life is 42"
source_highlight_uri: str = "https://en.wikipedia.org/wiki/The_Hitchhiker%27s_Guide_to_the_Galaxy#meaning_of_life"
updated_at: dt.datetime = dt.datetime.now()
prefix: str = ""
suffix: str = ""


@pytest.fixture(scope="module")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ def model() -> h2q.ChatOpenAI:
@pytest.fixture(scope="module")
def hydrated_highlight() -> HydratedHighlight:
return HydratedHighlight(
context="Mitochondria is the powerhouse of the cell",
prefix="",
suffix=" is the powerhouse of the cell",
highlighted_text="Mitochondria",
source_doc_uri="https://en.wikipedia.org/wiki/Mitochondrion",
source_doc_title="Mitochondrion - Wikipedia",
Expand All @@ -40,14 +41,16 @@ async def test_model_response(
async def test_multi_response(model: h2q.ChatOpenAI) -> None:
highlights = [
HydratedHighlight(
context="Mitochondria is the powerhouse of the cell",
prefix="",
suffix=" is the powerhouse of the cell",
highlighted_text="Mitochondria",
source_doc_uri="https://en.wikipedia.org/wiki/Mitochondrion",
source_doc_title="Mitochondrion - Wikipedia",
updated_at=datetime.now(),
),
HydratedHighlight(
context="The first rule of Fight Club is that you don't talk about Fight Club",
prefix="The first rule of ",
suffix=" is that you don't talk about Fight Club",
highlighted_text="Fight Club",
source_doc_uri="https://en.wikipedia.org/wiki/Fight_Club",
source_doc_title="Fight Club - Wikipedia",
Expand Down

0 comments on commit c82a334

Please sign in to comment.