diff --git a/README.md b/README.md index fb125a04..3a8100c7 100644 --- a/README.md +++ b/README.md @@ -131,3 +131,5 @@ algorithms / models* [Creating a custom system script](docs/creating_a_custom_system_script.md) [Developer environment setup](docs/developer_setup.md) + +[Accessing the Llama-2 models](docs/using_llama_2.md) diff --git a/align_system/algorithms/llama_index.py b/align_system/algorithms/llama_index.py index 5c57ae21..826fa66c 100644 --- a/align_system/algorithms/llama_index.py +++ b/align_system/algorithms/llama_index.py @@ -34,6 +34,8 @@ def __init__(self, domain_docs_dir=None, device="cuda", model_name="falcon", ret def load_model(self): if self.model_name == 'falcon': self.model_name = 'tiiuae/falcon-7b-instruct' + elif self.model_name == 'llama-2': + self.model_name = 'meta-llama/Llama-2-7b-chat-hf' if self.device == 'cuda': model_kwargs = {"torch_dtype": torch.float16, @@ -81,6 +83,6 @@ def load_model(self): def run_inference(self, prompt): if self.retrieval_enabled: - return self.query_engine.query(prompt) + return str(self.query_engine.query(prompt)).strip() else: - return self.hf_predictor.predict(prompt)[0] + return self.hf_predictor.predict(prompt)[0].strip() diff --git a/docs/accessing_llama_2.md b/docs/accessing_llama_2.md new file mode 100644 index 00000000..0e524575 --- /dev/null +++ b/docs/accessing_llama_2.md @@ -0,0 +1,6 @@ +## Accessing the Llama-2 model + +To access the llama-2 models through huggingface +1. Request model access through [meta's website](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) with the same email address you used with your huggingface account +2. Request model access on [huggingface](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) +3. Add your hugginface token with the huggingface cli on the machine you are working on \ No newline at end of file