Skip to content

Commit

Permalink
Import dense-retrieval
Browse files Browse the repository at this point in the history
  • Loading branch information
rejasupotaro committed Nov 4, 2024
1 parent 97a9f45 commit fabe9e4
Show file tree
Hide file tree
Showing 29 changed files with 3,149 additions and 78 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ TEMPLATES_DIR:=templates
.PHONY: lint
lint:
python -m ruff check --fix --unsafe-fixes --show-fixes
python -m mypy src/dense-retrieval/src --explicit-package-bases --namespace-packages
python -m mypy src/amazon-product-search/src --explicit-package-bases --namespace-packages
python -m mypy src/training/src --explicit-package-bases --namespace-packages

Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ extend-select = ["I"]
[tool.ruff.lint.isort]
known-first-party = [
"amazon_product_search",
"amazon_product_search_dense_retrieval",
"dense_retrieval",
"training",
]

[tool.ruff.lint.pydocstyle]
Expand Down
60 changes: 29 additions & 31 deletions src/amazon-product-search/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/amazon-product-search/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ unidic-lite = "^1.0.8"
polars = {version = "^0.16.0", extras = ["numpy", "pandas", "pyarrow"]}
types-requests = "^2.28.11.17"
docker = "^7.0.0"
amazon-product-search-dense-retrieval = {git = "https://github.com/rejasupotaro/amazon-product-search-dense-retrieval.git"}
dense-retrieval = { path = "../dense-retrieval", develop = true }
gcsfs = "^2023.6.0"
jinja2 = "^3.1.2"
google-cloud-bigquery = {extras = ["pandas"], version = "^3.12.0"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from amazon_product_search.core.retrieval.query_vector_cache import QueryVectorCache
from amazon_product_search.core.source import Locale
from amazon_product_search.core.synonyms.synonym_dict import SynonymDict, expand_synonyms
from amazon_product_search_dense_retrieval.encoders import SBERTEncoder
from dense_retrieval.encoders import SBERTEncoder


class QueryBuilder:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from tqdm import tqdm

from amazon_product_search.constants import HF
from amazon_product_search_dense_retrieval.encoders import SBERTEncoder
from dense_retrieval.encoders import SBERTEncoder


class SimilarityFilter:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from amazon_product_search.core.retrieval.query_vector_cache import QueryVectorCache
from amazon_product_search.core.source import Locale
from amazon_product_search.core.synonyms.synonym_dict import SynonymDict
from amazon_product_search_dense_retrieval.encoders import SBERTEncoder
from dense_retrieval.encoders import SBERTEncoder

Operator = Literal["and", "weakAnd"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from amazon_product_search.core.nlp.normalizer import normalize_query
from amazon_product_search.core.source import Locale
from amazon_product_search.indexing.options import IndexerOptions
from amazon_product_search_dense_retrieval.encoders import SBERTEncoder
from amazon_product_search_dense_retrieval.encoders.modules.pooler import PoolingMode
from dense_retrieval.encoders import SBERTEncoder
from dense_retrieval.encoders.modules.pooler import PoolingMode


def get_input_source(data_dir: str, locale: Locale, nrows: int = -1) -> PTransform:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
InferInput,
)

from amazon_product_search_dense_retrieval.encoders import SBERTEncoder
from amazon_product_search_dense_retrieval.encoders.modules.pooler import PoolingMode
from dense_retrieval.encoders import SBERTEncoder
from dense_retrieval.encoders.modules.pooler import PoolingMode


def _product_to_text(product: Dict[str, Any], fields: list[str]) -> str:
Expand Down
2 changes: 1 addition & 1 deletion src/amazon-product-search/tasks/vespa_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import amazon_product_search.core.vespa.service as vespa_service
from amazon_product_search.constants import HF, VESPA_DIR
from amazon_product_search.core.vespa.vespa_client import VespaClient
from amazon_product_search_dense_retrieval.encoders import SBERTEncoder
from dense_retrieval.encoders import SBERTEncoder

"""
To run Vespa locally, execute the following commands:
Expand Down

This file was deleted.

19 changes: 19 additions & 0 deletions src/dense-retrieval/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Dense Retrieval - Amazon Product Search

## Installation

```shell
$ pyenv install 3.11.8
$ pyenv local 3.11.8
$ pip install poetry
$ poetry env use python
$ poetry install
```

The following libraries are necessary for Japanese text processing.

```shell
# For macOS
$ brew install mecab mecab-ipadic
$ poetry run python -m unidic download
```
Loading

0 comments on commit fabe9e4

Please sign in to comment.