Skip to content

Commit

Permalink
ClickHouse: Make it possible to not specify a vector index
Browse files Browse the repository at this point in the history
  • Loading branch information
mneedham committed Mar 5, 2024
1 parent de2d944 commit 69fc31d
Showing 1 changed file with 47 additions and 15 deletions.
62 changes: 47 additions & 15 deletions libs/community/langchain_community/vectorstores/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class ClickhouseSettings(BaseSettings):
username: Optional[str] = None
password: Optional[str] = None

index_type: str = "annoy"
index_type: Optional[str] = "annoy"
# Annoy supports L2Distance and cosineDistance.
index_param: Optional[Union[List, Dict]] = ["'L2Distance'", 100]
index_query_params: Dict[str, str] = {}
Expand Down Expand Up @@ -177,18 +177,8 @@ def __init__(
else self.config.index_param
)

self.schema = f"""\
CREATE TABLE IF NOT EXISTS {self.config.database}.{self.config.table}(
{self.config.column_map['id']} Nullable(String),
{self.config.column_map['document']} Nullable(String),
{self.config.column_map['embedding']} Array(Float32),
{self.config.column_map['metadata']} JSON,
{self.config.column_map['uuid']} UUID DEFAULT generateUUIDv4(),
CONSTRAINT cons_vec_len CHECK length({self.config.column_map['embedding']}) = {dim},
INDEX vec_idx {self.config.column_map['embedding']} TYPE \
{self.config.index_type}({index_params}) GRANULARITY 1000
) ENGINE = MergeTree ORDER BY uuid SETTINGS index_granularity = 8192\
"""
self.schema = self._schema(dim, index_params)

self.dim = dim
self.BS = "\\"
self.must_escape = ("\\", "'")
Expand All @@ -205,10 +195,52 @@ def __init__(
)
# Enable JSON type
self.client.command("SET allow_experimental_object_type=1")
# Enable index
self.client.command(f"SET allow_experimental_{self.config.index_type}_index=1")
if self.config.index_type:
# Enable index
self.client.command(
f"SET allow_experimental_{self.config.index_type}_index=1")
self.client.command(self.schema)

def _schema(self, dim, index_params):
"""Create table schema
:param dim: dimension of embeddings
:param index_params: parameters used for index
This function returns a `CREATE TABLE` statement based on the value of
`self.config.index_type`.
If an index type is specified that index will be created, otherwise
no index will be created.
In the case of there being no index, a linear scan will be performed
when the embedding field is queried.
"""

if self.config.index_type:
return f"""\
CREATE TABLE IF NOT EXISTS {self.config.database}.{self.config.table}(
{self.config.column_map['id']} Nullable(String),
{self.config.column_map['document']} Nullable(String),
{self.config.column_map['embedding']} Array(Float32),
{self.config.column_map['metadata']} JSON,
{self.config.column_map['uuid']} UUID DEFAULT generateUUIDv4(),
CONSTRAINT cons_vec_len CHECK length(
{self.config.column_map['embedding']}) = {dim},
INDEX vec_idx {self.config.column_map['embedding']} TYPE \
{self.config.index_type}({index_params}) GRANULARITY 1000
) ENGINE = MergeTree ORDER BY uuid SETTINGS index_granularity = 8192\
"""
else:
return f"""\
CREATE TABLE IF NOT EXISTS {self.config.database}.{self.config.table}(
{self.config.column_map['id']} Nullable(String),
{self.config.column_map['document']} Nullable(String),
{self.config.column_map['embedding']} Array(Float32),
{self.config.column_map['metadata']} JSON,
{self.config.column_map['uuid']} UUID DEFAULT generateUUIDv4(),
CONSTRAINT cons_vec_len CHECK length({
self.config.column_map['embedding']}) = {dim}
) ENGINE = MergeTree ORDER BY uuid
"""

@property
def embeddings(self) -> Embeddings:
return self.embedding_function
Expand Down

0 comments on commit 69fc31d

Please sign in to comment.