Skip to content

Commit

Permalink
Merge branch 'microsoft:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
KylinMountain authored Jul 25, 2024
2 parents 6e9d072 + 61b5eea commit ddc4dbb
Show file tree
Hide file tree
Showing 46 changed files with 592 additions and 365 deletions.
13 changes: 13 additions & 0 deletions .github/ISSUE_TEMPLATE/bug_report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ title: "[Bug]: <title>"
labels: ["bug", "triage"]

body:
- type: checkboxes
id: existingcheck
attributes:
label: Is there an existing issue for this?
description: Please search to see if an issue already exists for the bug you encountered.
options:
- label: I have searched the existing issues
- label: I have checked [#657](https://github.com/microsoft/graphrag/issues/657) to validate if my issue is covered by community support
- type: textarea
id: description
attributes:
Expand Down Expand Up @@ -34,6 +42,11 @@ body:
label: GraphRAG Config Used
description: The GraphRAG configuration used for the run.
placeholder: The settings.yaml content or GraphRAG configuration
value: |
```yaml
# Paste your config here
```
- type: textarea
id: screenshotslogs
attributes:
Expand Down
13 changes: 13 additions & 0 deletions .github/ISSUE_TEMPLATE/general_issue.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ title: "[Issue]: <title> "
labels: ["triage"]

body:
- type: checkboxes
id: existingcheck
attributes:
label: Is there an existing issue for this?
description: Please search to see if an issue already exists for the bug you encountered.
options:
- label: I have searched the existing issues
- label: I have checked [#657](https://github.com/microsoft/graphrag/issues/657) to validate if my issue is covered by community support
- type: textarea
id: description
attributes:
Expand All @@ -28,6 +36,11 @@ body:
label: GraphRAG Config Used
description: The GraphRAG configuration used for the run.
placeholder: The settings.yaml content or GraphRAG configuration
value: |
```yaml
# Paste your config here
```
- type: textarea
id: screenshotslogs
attributes:
Expand Down
24 changes: 24 additions & 0 deletions .github/workflows/issues-autoresolve.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Close inactive issues
on:
schedule:
- cron: "30 1 * * *"

jobs:
close-issues:
runs-on: ubuntu-latest
permissions:
issues: write
pull-requests: write
steps:
- uses: actions/stale@v5
with:
days-before-issue-stale: 7
days-before-issue-close: 5
stale-issue-label: "stale"
close-issue-label: "autoresolved"
stale-issue-message: "This issue has been marked stale due to inactivity after repo maintainer or community member responses that request more information or suggest a solution. It will be closed after five additional days."
close-issue-message: "This issue has been closed after being marked as stale for five days. Please reopen if needed."
exempt-issue-label: "triage"
days-before-pr-stale: -1
days-before-pr-close: -1
repo-token: ${{ secrets.GITHUB_TOKEN }}
7 changes: 5 additions & 2 deletions .github/workflows/python-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
python-ci:
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11"] # add 3.12 once gensim supports it. TODO: watch this issue - https://github.com/piskvorky/gensim/issues/3510
os: [ubuntu-latest, windows-latest]
env:
DEBUG: 1
Expand Down Expand Up @@ -79,7 +79,10 @@ jobs:

- name: Install dependencies
shell: bash
run: poetry self add setuptools && poetry run python -m pip install gensim && poetry install
run: |
poetry self add setuptools wheel
poetry run python -m pip install gensim
poetry install
- name: Check Semversioner
run: |
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ jobs:
shell: bash
run: poetry install

- name: Export Publication Version
run: echo "version=`poetry version --short`" >> $GITHUB_OUTPUT

- name: Build Distributable
shell: bash
run: poetry build
Expand Down
94 changes: 94 additions & 0 deletions .semversioner/0.2.0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
{
"changes": [
{
"description": "Add content-based KNN for selecting prompt tune few shot examples",
"type": "minor"
},
{
"description": "Add dynamic community report rating to the prompt tuning engine",
"type": "minor"
},
{
"description": "Add Minute-based Rate Limiting and fix rpm, tpm settings",
"type": "patch"
},
{
"description": "Add N parameter support",
"type": "patch"
},
{
"description": "Add cli flag to overlay default values onto a provided config.",
"type": "patch"
},
{
"description": "Add exception handling on file load",
"type": "patch"
},
{
"description": "Add language support to prompt tuning",
"type": "patch"
},
{
"description": "Add llm params to local and global search",
"type": "patch"
},
{
"description": "Fix broken prompt tuning link on docs",
"type": "patch"
},
{
"description": "Fix delta none on query calls",
"type": "patch"
},
{
"description": "Fix docsite base url",
"type": "patch"
},
{
"description": "Fix encoding model parameter on prompt tune",
"type": "patch"
},
{
"description": "Fix for --limit exceeding the dataframe length",
"type": "patch"
},
{
"description": "Fix for Ruff 0.5.2",
"type": "patch"
},
{
"description": "Fixed an issue where base OpenAI embeddings can't work with Azure OpenAI LLM",
"type": "patch"
},
{
"description": "Modify defaults for CHUNK_SIZE, CHUNK_OVERLAP and GLEANINGS to reduce time and LLM calls",
"type": "patch"
},
{
"description": "fix community_report doesn't work in settings.yaml",
"type": "patch"
},
{
"description": "fix llm response content is None in query",
"type": "patch"
},
{
"description": "fix the organization parameter is ineffective during queries",
"type": "patch"
},
{
"description": "remove duplicate file read",
"type": "patch"
},
{
"description": "support non-open ai model config to prompt tune",
"type": "patch"
},
{
"description": "use binary io processing for all file io operations",
"type": "patch"
}
],
"created_at": "2024-07-25T02:01:38+00:00",
"version": "0.2.0"
}
4 changes: 0 additions & 4 deletions .semversioner/next-release/minor-20240710183748086411.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240701233152787373.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240703152422358587.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240703182750529114.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240704181236015699.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240705184142723331.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240705235656897489.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240707063053679262.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240709225514193665.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240710114442871595.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240710165603516866.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240711004716103302.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240711092703710242.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240711223132221685.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240712035356859335.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240712210400518089.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240712235357550877.json

This file was deleted.

4 changes: 0 additions & 4 deletions .semversioner/next-release/patch-20240716225953784804.json

This file was deleted.

31 changes: 31 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Changelog
Note: version releases in the 0.x.y range may introduce breaking changes.

## 0.2.0

- minor: Add content-based KNN for selecting prompt tune few shot examples
- minor: Add dynamic community report rating to the prompt tuning engine
- patch: Add Minute-based Rate Limiting and fix rpm, tpm settings
- patch: Add N parameter support
- patch: Add cli flag to overlay default values onto a provided config.
- patch: Add exception handling on file load
- patch: Add language support to prompt tuning
- patch: Add llm params to local and global search
- patch: Fix broken prompt tuning link on docs
- patch: Fix delta none on query calls
- patch: Fix docsite base url
- patch: Fix encoding model parameter on prompt tune
- patch: Fix for --limit exceeding the dataframe length
- patch: Fix for Ruff 0.5.2
- patch: Fixed an issue where base OpenAI embeddings can't work with Azure OpenAI LLM
- patch: Modify defaults for CHUNK_SIZE, CHUNK_OVERLAP and GLEANINGS to reduce time and LLM calls
- patch: fix community_report doesn't work in settings.yaml
- patch: fix llm response content is None in query
- patch: fix the organization parameter is ineffective during queries
- patch: remove duplicate file read
- patch: support non-open ai model config to prompt tune
- patch: use binary io processing for all file io operations

## 0.1.0

- minor: Initial Release
2 changes: 1 addition & 1 deletion docsite/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ GraphRAG builds upon our prior [research](https://www.microsoft.com/en-us/workla

### Index

- Slice up an input corpus into a series of TextUnits, which act as analyzable units for the rest of the process, and provide fine-grained references into our outputs.
- Slice up an input corpus into a series of TextUnits, which act as analyzable units for the rest of the process, and provide fine-grained references in our outputs.
- Extract all entities, relationships, and key claims from the TextUnits using an LLM.
- Perform a hierarchical clustering of the graph using the [Leiden technique](https://arxiv.org/pdf/1810.08473.pdf). To see this visually, check out Figure 1 above. Each circle is an entity (e.g., a person, place, or organization), with the size representing the degree of the entity, and the color representing its community.
- Generate summaries of each community and its constituents from the bottom-up. This aids in holistic understanding of the dataset.
Expand Down
4 changes: 3 additions & 1 deletion graphrag/config/models/claim_extraction_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ def resolved_strategy(self, root_dir: str) -> dict:
"type": ExtractClaimsStrategyType.graph_intelligence,
"llm": self.llm.model_dump(),
**self.parallelization.model_dump(),
"extraction_prompt": (Path(root_dir) / self.prompt).read_text()
"extraction_prompt": (Path(root_dir) / self.prompt)
.read_bytes()
.decode(encoding="utf-8")
if self.prompt
else None,
"claim_description": self.description,
Expand Down
4 changes: 3 additions & 1 deletion graphrag/config/models/community_reports_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ def resolved_strategy(self, root_dir) -> dict:
"type": CreateCommunityReportsStrategyType.graph_intelligence,
"llm": self.llm.model_dump(),
**self.parallelization.model_dump(),
"extraction_prompt": (Path(root_dir) / self.prompt).read_text()
"extraction_prompt": (Path(root_dir) / self.prompt)
.read_bytes()
.decode(encoding="utf-8")
if self.prompt
else None,
"max_report_length": self.max_length,
Expand Down
4 changes: 3 additions & 1 deletion graphrag/config/models/entity_extraction_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ def resolved_strategy(self, root_dir: str, encoding_model: str) -> dict:
"type": ExtractEntityStrategyType.graph_intelligence,
"llm": self.llm.model_dump(),
**self.parallelization.model_dump(),
"extraction_prompt": (Path(root_dir) / self.prompt).read_text()
"extraction_prompt": (Path(root_dir) / self.prompt)
.read_bytes()
.decode(encoding="utf-8")
if self.prompt
else None,
"max_gleanings": self.max_gleanings,
Expand Down
4 changes: 3 additions & 1 deletion graphrag/config/models/summarize_descriptions_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def resolved_strategy(self, root_dir: str) -> dict:
"type": SummarizeStrategyType.graph_intelligence,
"llm": self.llm.model_dump(),
**self.parallelization.model_dump(),
"summarize_prompt": (Path(root_dir) / self.prompt).read_text()
"summarize_prompt": (Path(root_dir) / self.prompt)
.read_bytes()
.decode(encoding="utf-8")
if self.prompt
else None,
"max_summary_length": self.max_length,
Expand Down
Loading

0 comments on commit ddc4dbb

Please sign in to comment.