Skip to content

Commit

Permalink
Merge branch 'master' into law_2
Browse files Browse the repository at this point in the history
  • Loading branch information
fzowl authored Apr 23, 2024
2 parents 6469e07 + 1c89e45 commit 5d86755
Show file tree
Hide file tree
Showing 27 changed files with 313 additions and 164 deletions.
2 changes: 1 addition & 1 deletion docs/api_reference/guide_imports.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"\n",
"This notebook shows how to prevent prompt injection attacks using the text classification model from `HuggingFace`.\n",
"\n",
"By default, it uses a *[laiyer/deberta-v3-base-prompt-injection](https://huggingface.co/laiyer/deberta-v3-base-prompt-injection)* model trained to identify prompt injections. \n",
"By default, it uses a *[protectai/deberta-v3-base-prompt-injection-v2](https://huggingface.co/protectai/deberta-v3-base-prompt-injection-v2)* model trained to identify prompt injections. \n",
"\n",
"In this notebook, we will use the ONNX version of the model to speed up the inference. "
]
Expand Down Expand Up @@ -49,11 +49,15 @@
"from optimum.onnxruntime import ORTModelForSequenceClassification\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"# Using https://huggingface.co/laiyer/deberta-v3-base-prompt-injection\n",
"model_path = \"laiyer/deberta-v3-base-prompt-injection\"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_path)\n",
"tokenizer.model_input_names = [\"input_ids\", \"attention_mask\"] # Hack to run the model\n",
"model = ORTModelForSequenceClassification.from_pretrained(model_path, subfolder=\"onnx\")\n",
"# Using https://huggingface.co/protectai/deberta-v3-base-prompt-injection-v2\n",
"model_path = \"laiyer/deberta-v3-base-prompt-injection-v2\"\n",
"revision = None # We recommend specifiying the revision to avoid breaking changes or supply chain attacks\n",
"tokenizer = AutoTokenizer.from_pretrained(\n",
" model_path, revision=revision, model_input_names=[\"input_ids\", \"attention_mask\"]\n",
")\n",
"model = ORTModelForSequenceClassification.from_pretrained(\n",
" model_path, revision=revision, subfolder=\"onnx\"\n",
")\n",
"\n",
"classifier = pipeline(\n",
" \"text-classification\",\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/vectorstores/neo4jvector.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"\n",
">[Neo4j](https://neo4j.com/) is an open-source graph database with integrated support for vector similarity search\n",
"\n",
"It supports:\n",
"It supports:\n\n",
"- approximate nearest neighbor search\n",
"- Euclidean similarity and cosine similarity\n",
"- Hybrid search combining vector and keyword searches\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
" ) -> List[Document]:\n",
" \"\"\"Sync implementations for retriever.\"\"\"\n",
" matching_documents = []\n",
" for document in documents:\n",
" for document in self.documents:\n",
" if len(matching_documents) > self.k:\n",
" return matching_documents\n",
"\n",
Expand Down
48 changes: 4 additions & 44 deletions libs/community/langchain_community/chat_message_histories/file.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,5 @@
import json
import logging
from pathlib import Path
from typing import List
from langchain_core.chat_history import FileChatMessageHistory

from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.messages import (
BaseMessage,
messages_from_dict,
messages_to_dict,
)

logger = logging.getLogger(__name__)


class FileChatMessageHistory(BaseChatMessageHistory):
"""
Chat message history that stores history in a local file.
Args:
file_path: path of the local file to store the messages.
"""

def __init__(self, file_path: str):
self.file_path = Path(file_path)
if not self.file_path.exists():
self.file_path.touch()
self.file_path.write_text(json.dumps([]))

@property
def messages(self) -> List[BaseMessage]: # type: ignore
"""Retrieve the messages from the local file"""
items = json.loads(self.file_path.read_text())
messages = messages_from_dict(items)
return messages

def add_message(self, message: BaseMessage) -> None:
"""Append the message to the record in the local file"""
messages = messages_to_dict(self.messages)
messages.append(messages_to_dict([message])[0])
self.file_path.write_text(json.dumps(messages))

def clear(self) -> None:
"""Clear session memory from the local file"""
self.file_path.write_text(json.dumps([]))
__all__ = [
"FileChatMessageHistory",
]
Original file line number Diff line number Diff line change
@@ -1,31 +1,5 @@
from typing import List, Sequence
from langchain_core.chat_history import InMemoryChatMessageHistory as ChatMessageHistory

from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.messages import BaseMessage
from langchain_core.pydantic_v1 import BaseModel, Field


class ChatMessageHistory(BaseChatMessageHistory, BaseModel):
"""In memory implementation of chat message history.
Stores messages in an in memory list.
"""

messages: List[BaseMessage] = Field(default_factory=list)

async def aget_messages(self) -> List[BaseMessage]:
return self.messages

def add_message(self, message: BaseMessage) -> None:
"""Add a self-created message to the store"""
self.messages.append(message)

async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None:
"""Add messages to the store"""
self.add_messages(messages)

def clear(self) -> None:
self.messages = []

async def aclear(self) -> None:
self.clear()
__all__ = [
"ChatMessageHistory",
]
1 change: 1 addition & 0 deletions libs/community/langchain_community/llms/llamafile.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def _param_fieldnames(self) -> List[str]:
"streaming",
"tags",
"verbose",
"custom_get_token_ids",
]
attrs = [
k for k in get_pydantic_field_names(self.__class__) if k not in ignore_keys
Expand Down
36 changes: 35 additions & 1 deletion libs/core/langchain_core/caches.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any, Optional, Sequence
from typing import Any, Dict, Optional, Sequence, Tuple

from langchain_core.outputs import Generation
from langchain_core.runnables import run_in_executor
Expand Down Expand Up @@ -105,3 +105,37 @@ async def aupdate(
async def aclear(self, **kwargs: Any) -> None:
"""Clear cache that can take additional keyword arguments."""
return await run_in_executor(None, self.clear, **kwargs)


class InMemoryCache(BaseCache):
"""Cache that stores things in memory."""

def __init__(self) -> None:
"""Initialize with empty cache."""
self._cache: Dict[Tuple[str, str], RETURN_VAL_TYPE] = {}

def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
"""Look up based on prompt and llm_string."""
return self._cache.get((prompt, llm_string), None)

def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
"""Update cache based on prompt and llm_string."""
self._cache[(prompt, llm_string)] = return_val

def clear(self, **kwargs: Any) -> None:
"""Clear cache."""
self._cache = {}

async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
"""Look up based on prompt and llm_string."""
return self.lookup(prompt, llm_string)

async def aupdate(
self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
) -> None:
"""Update cache based on prompt and llm_string."""
self.update(prompt, llm_string, return_val)

async def aclear(self, **kwargs: Any) -> None:
"""Clear cache."""
self.clear()
63 changes: 63 additions & 0 deletions libs/core/langchain_core/chat_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,20 @@
""" # noqa: E501
from __future__ import annotations

import json
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Sequence, Union

from langchain_core.messages import (
AIMessage,
BaseMessage,
HumanMessage,
get_buffer_string,
messages_from_dict,
messages_to_dict,
)
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import run_in_executor


Expand Down Expand Up @@ -184,3 +189,61 @@ async def aclear(self) -> None:
def __str__(self) -> str:
"""Return a string representation of the chat history."""
return get_buffer_string(self.messages)


class InMemoryChatMessageHistory(BaseChatMessageHistory, BaseModel):
"""In memory implementation of chat message history.
Stores messages in an in memory list.
"""

messages: List[BaseMessage] = Field(default_factory=list)

async def aget_messages(self) -> List[BaseMessage]:
return self.messages

def add_message(self, message: BaseMessage) -> None:
"""Add a self-created message to the store"""
self.messages.append(message)

async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None:
"""Add messages to the store"""
self.add_messages(messages)

def clear(self) -> None:
self.messages = []

async def aclear(self) -> None:
self.clear()


class FileChatMessageHistory(BaseChatMessageHistory):
"""Chat message history that stores history in a local file."""

def __init__(self, file_path: str) -> None:
"""Initialize the file path for the chat history.
Args:
file_path: The path to the local file to store the chat history.
"""
self.file_path = Path(file_path)
if not self.file_path.exists():
self.file_path.touch()
self.file_path.write_text(json.dumps([]))

@property
def messages(self) -> List[BaseMessage]: # type: ignore
"""Retrieve the messages from the local file"""
items = json.loads(self.file_path.read_text())
messages = messages_from_dict(items)
return messages

def add_message(self, message: BaseMessage) -> None:
"""Append the message to the record in the local file"""
messages = messages_to_dict(self.messages)
messages.append(messages_to_dict([message])[0])
self.file_path.write_text(json.dumps(messages))

def clear(self) -> None:
"""Clear session memory from the local file"""
self.file_path.write_text(json.dumps([]))
10 changes: 9 additions & 1 deletion libs/core/langchain_core/language_models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Mapping,
Expand Down Expand Up @@ -97,6 +98,10 @@ class BaseLanguageModel(
"""Tags to add to the run trace."""
metadata: Optional[Dict[str, Any]] = Field(default=None, exclude=True)
"""Metadata to add to the run trace."""
custom_get_token_ids: Optional[Callable[[str], List[int]]] = Field(
default=None, exclude=True
)
"""Optional encoder to use for counting tokens."""

@validator("verbose", pre=True, always=True)
def set_verbose(cls, verbose: Optional[bool]) -> bool:
Expand Down Expand Up @@ -310,7 +315,10 @@ def get_token_ids(self, text: str) -> List[int]:
A list of ids corresponding to the tokens in the text, in order they occur
in the text.
"""
return _get_token_ids_default_method(text)
if self.custom_get_token_ids is not None:
return self.custom_get_token_ids(text)
else:
return _get_token_ids_default_method(text)

def get_num_tokens(self, text: str) -> int:
"""Get the number of tokens present in the text.
Expand Down
Loading

0 comments on commit 5d86755

Please sign in to comment.