Skip to content

Commit

Permalink
Merge branch 'master' into issue_27485_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
VedatAlpGoktepe authored Nov 7, 2024
2 parents a6c08b9 + 2cb3927 commit 38a6ce5
Show file tree
Hide file tree
Showing 51 changed files with 2,968 additions and 545 deletions.
2 changes: 2 additions & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/.github/ @efriis @baskaryan @ccurme
/libs/packages.yml @efriis
6 changes: 0 additions & 6 deletions .github/workflows/_integration_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,6 @@ jobs:
shell: bash
run: poetry run pip install "boto3<2" "google-cloud-aiplatform<2"

- name: 'Authenticate to Google Cloud'
id: 'auth'
uses: google-github-actions/auth@v2
with:
credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'

- name: Run integration tests
shell: bash
env:
Expand Down
23 changes: 11 additions & 12 deletions .github/workflows/_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,19 @@ jobs:
if [ -z "$PREV_TAG" ]; then
REGEX="^$PKG_NAME==\\d+\\.\\d+\\.\\d+\$"
echo $REGEX
PREV_TAG=$(git tag --sort=-creatordate | grep -P $REGEX || true | head -1)
PREV_TAG=$(git tag --sort=-creatordate | (grep -P $REGEX || true) | head -1)
fi
# confirm prev-tag actually exists in git repo with git tag
GIT_TAG_RESULT=$(git tag -l "$PREV_TAG")
if [ -z "$GIT_TAG_RESULT" ]; then
echo "Previous tag $PREV_TAG not found in git repo"
exit 1
# if PREV_TAG is empty, let it be empty
if [ -z "$PREV_TAG" ]; then
echo "No previous tag found - first release"
else
# confirm prev-tag actually exists in git repo with git tag
GIT_TAG_RESULT=$(git tag -l "$PREV_TAG")
if [ -z "$GIT_TAG_RESULT" ]; then
echo "Previous tag $PREV_TAG not found in git repo"
exit 1
fi
fi
Expand Down Expand Up @@ -262,12 +267,6 @@ jobs:
make tests
working-directory: ${{ inputs.working-directory }}

- name: 'Authenticate to Google Cloud'
id: 'auth'
uses: google-github-actions/auth@v2
with:
credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'

- name: Import integration test dependencies
run: poetry install --with test,test_integration
working-directory: ${{ inputs.working-directory }}
Expand Down
120 changes: 110 additions & 10 deletions docs/docs/how_to/graph_constructing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
Expand Down Expand Up @@ -105,7 +108,7 @@
"os.environ[\"NEO4J_USERNAME\"] = \"neo4j\"\n",
"os.environ[\"NEO4J_PASSWORD\"] = \"password\"\n",
"\n",
"graph = Neo4jGraph()"
"graph = Neo4jGraph(refresh_schema=False)"
]
},
{
Expand Down Expand Up @@ -149,8 +152,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Nodes:[Node(id='Marie Curie', type='Person'), Node(id='Pierre Curie', type='Person'), Node(id='University Of Paris', type='Organization')]\n",
"Relationships:[Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Pierre Curie', type='Person'), type='MARRIED'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='University Of Paris', type='Organization'), type='PROFESSOR')]\n"
"Nodes:[Node(id='Marie Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Organization', properties={})]\n",
"Relationships:[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='MARRIED', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='PROFESSOR', properties={})]\n"
]
}
],
Expand Down Expand Up @@ -191,8 +194,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Nodes:[Node(id='Marie Curie', type='Person'), Node(id='Pierre Curie', type='Person'), Node(id='University Of Paris', type='Organization')]\n",
"Relationships:[Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Pierre Curie', type='Person'), type='SPOUSE'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='University Of Paris', type='Organization'), type='WORKED_AT')]\n"
"Nodes:[Node(id='Marie Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Organization', properties={})]\n",
"Relationships:[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='SPOUSE', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='WORKED_AT', properties={})]\n"
]
}
],
Expand All @@ -209,6 +212,44 @@
"print(f\"Relationships:{graph_documents_filtered[0].relationships}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To define the graph schema more precisely, consider using a three-tuple approach for relationships. In this approach, each tuple consists of three elements: the source node, the relationship type, and the target node."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nodes:[Node(id='Marie Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Organization', properties={})]\n",
"Relationships:[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='SPOUSE', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='WORKED_AT', properties={})]\n"
]
}
],
"source": [
"allowed_relationships = [\n",
" (\"Person\", \"SPOUSE\", \"Person\"),\n",
" (\"Person\", \"NATIONALITY\", \"Country\"),\n",
" (\"Person\", \"WORKED_AT\", \"Organization\"),\n",
"]\n",
"\n",
"llm_transformer_tuple = LLMGraphTransformer(\n",
" llm=llm,\n",
" allowed_nodes=[\"Person\", \"Country\", \"Organization\"],\n",
" allowed_relationships=allowed_relationships,\n",
")\n",
"llm_transformer_tuple = llm_transformer_filtered.convert_to_graph_documents(documents)\n",
"print(f\"Nodes:{graph_documents_filtered[0].nodes}\")\n",
"print(f\"Relationships:{graph_documents_filtered[0].relationships}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -229,15 +270,15 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nodes:[Node(id='Marie Curie', type='Person', properties={'born_year': '1867'}), Node(id='Pierre Curie', type='Person'), Node(id='University Of Paris', type='Organization')]\n",
"Relationships:[Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Pierre Curie', type='Person'), type='SPOUSE'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='University Of Paris', type='Organization'), type='WORKED_AT')]\n"
"Nodes:[Node(id='Marie Curie', type='Person', properties={'born_year': '1867'}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Organization', properties={}), Node(id='Poland', type='Country', properties={}), Node(id='France', type='Country', properties={})]\n",
"Relationships:[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Poland', type='Country', properties={}), type='NATIONALITY', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='France', type='Country', properties={}), type='NATIONALITY', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='SPOUSE', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='WORKED_AT', properties={})]\n"
]
}
],
Expand All @@ -264,12 +305,71 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"graph.add_graph_documents(graph_documents_props)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Most graph databases support indexes to optimize data import and retrieval. Since we might not know all the node labels in advance, we can handle this by adding a secondary base label to each node using the `baseEntityLabel` parameter."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"graph.add_graph_documents(graph_documents, baseEntityLabel=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Results will look like:\n",
"\n",
"![graph_construction3.png](../../static/img/graph_construction3.png)\n",
"\n",
"The final option is to also import the source documents for the extracted nodes and relationships. This approach lets us track which documents each entity appeared in."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"graph.add_graph_documents(graph_documents, include_source=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Graph will have the following structure:\n",
"\n",
"![graph_construction4.png](../../static/img/graph_construction4.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In this visualization, the source document is highlighted in blue, with all entities extracted from it connected by `MENTIONS` relationships."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -288,7 +388,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
"version": "3.11.5"
}
},
"nbformat": 4,
Expand Down
99 changes: 97 additions & 2 deletions docs/docs/integrations/chat/openai.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,101 @@
"output_message.content"
]
},
{
"cell_type": "markdown",
"id": "5c35d0a4-a6b8-4d35-a02b-a37a8bda5692",
"metadata": {},
"source": [
"## Predicted output\n",
"\n",
":::info\n",
"Requires `langchain-openai>=0.2.6`\n",
":::\n",
"\n",
"Some OpenAI models (such as their `gpt-4o` and `gpt-4o-mini` series) support [Predicted Outputs](https://platform.openai.com/docs/guides/latency-optimization#use-predicted-outputs), which allow you to pass in a known portion of the LLM's expected output ahead of time to reduce latency. This is useful for cases such as editing text or code, where only a small part of the model's output will change.\n",
"\n",
"Here's an example:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "88fee1e9-58c1-42ad-ae23-24b882e175e7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/// <summary>\n",
"/// Represents a user with a first name, last name, and email.\n",
"/// </summary>\n",
"public class User\n",
"{\n",
" /// <summary>\n",
" /// Gets or sets the user's first name.\n",
" /// </summary>\n",
" public string FirstName { get; set; }\n",
"\n",
" /// <summary>\n",
" /// Gets or sets the user's last name.\n",
" /// </summary>\n",
" public string LastName { get; set; }\n",
"\n",
" /// <summary>\n",
" /// Gets or sets the user's email.\n",
" /// </summary>\n",
" public string Email { get; set; }\n",
"}\n",
"{'token_usage': {'completion_tokens': 226, 'prompt_tokens': 166, 'total_tokens': 392, 'completion_tokens_details': {'accepted_prediction_tokens': 49, 'audio_tokens': None, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 107}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_45cf54deae', 'finish_reason': 'stop', 'logprobs': None}\n"
]
}
],
"source": [
"code = \"\"\"\n",
"/// <summary>\n",
"/// Represents a user with a first name, last name, and username.\n",
"/// </summary>\n",
"public class User\n",
"{\n",
" /// <summary>\n",
" /// Gets or sets the user's first name.\n",
" /// </summary>\n",
" public string FirstName { get; set; }\n",
"\n",
" /// <summary>\n",
" /// Gets or sets the user's last name.\n",
" /// </summary>\n",
" public string LastName { get; set; }\n",
"\n",
" /// <summary>\n",
" /// Gets or sets the user's username.\n",
" /// </summary>\n",
" public string Username { get; set; }\n",
"}\n",
"\"\"\"\n",
"\n",
"llm = ChatOpenAI(model=\"gpt-4o\")\n",
"query = (\n",
" \"Replace the Username property with an Email property. \"\n",
" \"Respond only with code, and with no markdown formatting.\"\n",
")\n",
"response = llm.invoke(\n",
" [{\"role\": \"user\", \"content\": query}, {\"role\": \"user\", \"content\": code}],\n",
" prediction={\"type\": \"content\", \"content\": code},\n",
")\n",
"print(response.content)\n",
"print(response.response_metadata)"
]
},
{
"cell_type": "markdown",
"id": "2ee1b26d-a388-4e7c-9f40-bfd1388ecc03",
"metadata": {},
"source": [
"Note that currently predictions are billed as additional tokens and may increase your usage and costs in exchange for this reduced latency."
]
},
{
"cell_type": "markdown",
"id": "feb4a499",
Expand Down Expand Up @@ -601,7 +696,7 @@
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -615,7 +710,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.10.4"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 38a6ce5

Please sign in to comment.