diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..efe878529 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,35 @@ +{ + "customizations": { + "vscode": { + "extensions": [ + "ms-vscode-remote.vscode-remote-extensionpack", + "charliermarsh.ruff", + "ms-azuretools.vscode-docker", + "ms-toolsai.jupyter", + "ms-python.mypy-type-checker", + "ms-vsliveshare.vsliveshare", + "ms-python.python", + "eamodio.gitlens", + "github.vscode-pull-request-github" + ], + "settings": { + "git.autofetch": true, + "python.testing.pytestEnabled": true, + "terminal.integrated.defaultProfile.linux": "zsh" + } + } + }, + "features": { + "ghcr.io/devcontainers-contrib/features/tmux-apt-get:1": {}, + "ghcr.io/devcontainers-contrib/features/zsh-plugins:0": { + "omzPlugins": "https://github.com/zsh-users/zsh-syntax-highlighting.git https://github.com/zsh-users/zsh-autosuggestions.git", + "plugins": "zsh-syntax-highlighting zsh-autosuggestions" + }, + "ghcr.io/devcontainers/features/git:1": {}, + "ghcr.io/devcontainers/features/github-cli:1": {}, + "ghcr.io/schlich/devcontainer-features/powerlevel10k:1": {} + }, + "image": "mcr.microsoft.com/devcontainers/python:3.9", + "name": "Python 3", + "postCreateCommand": "chmod +x ./.devcontainer/post_create.sh && ./.devcontainer/post_create.sh" +} diff --git a/.devcontainer/post_create.sh b/.devcontainer/post_create.sh new file mode 100755 index 000000000..fbf49ab34 --- /dev/null +++ b/.devcontainer/post_create.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +set -e # Exit immediately if a command exits with a non-zero status. + +git config --global --add safe.directory /workspaces/dspy + +pip install poetry==1.7.1 +poetry config installer.max-workers 4 + +poetry install --with dev + +sudo apt update +sudo apt-get -y install python3-distutils + +poetry run pre-commit install --install-hooks + +personalization_script="./.devcontainer/.personalization.sh" + +# Developers can place a personalization script in the location specified above +# to further customize their dev container +if [ -f "$personalization_script" ]; then + echo "File $personalization_script exists. Running the script..." + chmod +x "$personalization_script" + $personalization_script +fi diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 96e81e381..07062257a 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -8,7 +8,7 @@ on: types: [opened, synchronize, reopened] env: - POETRY_VERSION: "1.6.1" + POETRY_VERSION: "1.7.1" jobs: fix: diff --git a/.gitignore b/.gitignore index 7d9919d9a..a2aad6c35 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,4 @@ finetuning_ckpts/ assertion.log *.log *.db +/.devcontainer/.personalization.sh diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 544f08594..484026ec4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,19 +1,31 @@ # Contributing -## Finding Issues +## Setting-up -### Bounty Board +### Preferred method - VSCode Dev Container -The bounty board will have various features, issues, and requests that are up for grabs. We are still working on this. Come to the discord and ask for the current bounties. +VSCode dev containers are a great way to containerize not only the necessary requirements but also recommended IDE extensions as well as settings such as pre-commit hooks and linting preferences. Using this will allow you to jump in to the perfect DSPY contribution environment without having to do much. Additionally, you'll be able to contribute through the web browser using Github Codespaces! -See the spreadsheet [here](https://docs.google.com/spreadsheets/d/1psHSfFXENAxhQTd5veKRzKydVubD2Ov62aKQHiYC-CQ/edit?usp=sharing) for the current bounties. +To use our dev container: -## Setting-up +1. Download Docker Desktop +2. Download VSCode +3. Within VSCode, install the Remote Development extension (ms-vscode-remote.vscode-remote-extensionpack) +4. Open the VSCode command palette (cmd / ctrl + shift + p) +5. Select `Dev Containers: Rebuild and Reopen in container`. A new VSCode window should open up and it should begin setting up your environment. Once it's done, you can open up a new terminal and start running tests or contributing! +6. To test that your environment is set up correctly, open a new terminal and run the command `pytest`. You should be able to run all tests and see them pass. Alternatively, you can open up the testing panel, which looks like a beaker, and click the play button to run all of our tests. +7. After the initial build, you should now be able to leave and re-enter the container any time without needing to rebuild. To do this, open the command palette and select `Dev Containers: Reopen in container`. This will not rebuild the container if you've done it correctly. + +NOTE: If you use this method, your default shell will be the poetry shell which will contain all the necessary requirements in your terminal. You shouldn't need to prefix python commands with poetry as you're already using the correct poetry virtual environment. + +### Alternative method To run the tests, you need to first clone the repository. Then install the package through poetry: -Note - You may need to install poetry. See [here](https://python-poetry.org/docs/#installing-with-the-official-installer) +Note - You may need to install poetry. You likely will just need to run `pip install poetry`. See [here](https://python-poetry.org/docs/#installing-with-the-official-installer) + +After installing poetry, use it to install our development requirements. ```bash poetry install --with dev @@ -41,8 +53,26 @@ You may need the `--container-architecture linux/amd64` flag if you are on an M1 Commit message format must be respected, with the following regex: -This ends up looking like feature(dspy): added new feature +This ends up looking like `feature(dspy): added new feature` or `enh(devcontainer): decreased size of image ``` ^(break|build|ci|docs|feat|fix|perf|refactor|style|test|ops|hotfix|release|maint|init|enh|revert)\([a-z,A-Z,0-9,\-,\_,\/,:]+\)(:)\s{1}([\w\s]+) ``` + +Detailed Breakdown +^: Asserts the start of a line. This means the pattern must match from the beginning of the string. + +(break|build|ci|docs|feat|fix|perf|refactor|style|test|ops|hotfix|release|maint|init|enh|revert): This is a capture group that matches any one of the listed keywords. These keywords represent various types of commits, such as feat (feature), fix (bug fix), docs (documentation), etc. + +\( and \): Matches the literal parentheses ( and ). These are escaped with a backslash because parentheses are special characters in regular expressions, used for defining capture groups. + +[a-z,A-Z,0-9,\-,\_,\/,:]+: Matches one or more characters inside the square brackets. It includes lowercase and uppercase letters (a-z, A-Z), digits (0-9), and specific special characters (-, \_, /, :). The comma (,) here is likely intended as a separator in the explanation but is actually being treated as a literal character to match, which might be a mistake unless the comma is an expected character in this context. + +(:): Captures the colon character. This is another literal match, but it's also captured into a group because of the parentheses. + +\s{1}: Matches exactly one whitespace character. {1} is technically redundant since the default behavior without specifying a quantity is to match exactly one. + +([\w\s]+): This capture group matches one or more word characters (\w, which includes letters, digits, and underscores) or whitespace characters (\s). This part is likely intended to capture the commit message that follows the initial keyword and scope. + +Summary +Putting it all together, this regex is used to enforce a structured format for commit messages, starting with a keyword indicating the commit type, followed by a scope enclosed in parentheses, a colon, a single space, and then the descriptive message. The scope part allows for various characters, including letters, numbers, and a few special characters, to accommodate different naming conventions. diff --git a/poetry.lock b/poetry.lock index d46e4b17d..4101cd486 100644 --- a/poetry.lock +++ b/poetry.lock @@ -520,6 +520,17 @@ files = [ [package.dependencies] pycparser = "*" +[[package]] +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] + [[package]] name = "charset-normalizer" version = "3.3.2" @@ -941,6 +952,17 @@ files = [ graph = ["objgraph (>=1.7.2)"] profile = ["gprof2dot (>=2022.7.29)"] +[[package]] +name = "distlib" +version = "0.3.8" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, + {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, +] + [[package]] name = "distro" version = "1.9.0" @@ -1711,6 +1733,20 @@ files = [ {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, ] +[[package]] +name = "identify" +version = "2.5.35" +description = "File identification library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "identify-2.5.35-py2.py3-none-any.whl", hash = "sha256:c4de0081837b211594f8e877a6b4fad7ca32bbfc1a9307fdd61c28bfe923f13e"}, + {file = "identify-2.5.35.tar.gz", hash = "sha256:10a7ca245cfcd756a554a7288159f72ff105ad233c7c4b9c6f0f4d108f5f6791"}, +] + +[package.extras] +license = ["ukkonen"] + [[package]] name = "idna" version = "3.6" @@ -2746,6 +2782,20 @@ doc = ["nb2plots (>=0.7)", "nbconvert (<7.9)", "numpydoc (>=1.6)", "pillow (>=9. extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.11)", "sympy (>=1.10)"] test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] +[[package]] +name = "nodeenv" +version = "1.8.0" +description = "Node.js virtual environment builder" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" +files = [ + {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"}, + {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"}, +] + +[package.dependencies] +setuptools = "*" + [[package]] name = "numpy" version = "1.26.4" @@ -3421,6 +3471,24 @@ dev = ["black", "flake8", "flake8-print", "isort", "pre-commit"] sentry = ["django", "sentry-sdk"] test = ["coverage", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)", "pylint", "pytest", "pytest-timeout"] +[[package]] +name = "pre-commit" +version = "3.7.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.9" +files = [ + {file = "pre_commit-3.7.0-py2.py3-none-any.whl", hash = "sha256:5eae9e10c2b5ac51577c3452ec0a490455c45a0533f7960f993a0d01e59decab"}, + {file = "pre_commit-3.7.0.tar.gz", hash = "sha256:e209d61b8acdcf742404408531f0c37d49d2c734fd7cff2d6076083d191cb060"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + [[package]] name = "prompt-toolkit" version = "3.0.43" @@ -4643,7 +4711,7 @@ torch = ["safetensors[numpy]", "torch (>=1.10)"] name = "setuptools" version = "69.2.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "setuptools-69.2.0-py3-none-any.whl", hash = "sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c"}, @@ -5705,6 +5773,26 @@ files = [ [package.extras] test = ["coverage", "flake8 (>=3.7)", "mypy", "pretend", "pytest"] +[[package]] +name = "virtualenv" +version = "20.25.1" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.7" +files = [ + {file = "virtualenv-20.25.1-py3-none-any.whl", hash = "sha256:961c026ac520bac5f69acb8ea063e8a4f071bcc9457b9c1f28f6b085c511583a"}, + {file = "virtualenv-20.25.1.tar.gz", hash = "sha256:e08e13ecdca7a0bd53798f356d5831434afa5b07b93f0abdf0797b7a06ffe197"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<5" + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + [[package]] name = "watchdog" version = "4.0.0" @@ -6302,4 +6390,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "5700b44c71cf1527f92273b3c0ac0122d11e633648cf0bff5ae7b0ad59e6df31" +content-hash = "8ebefcbe706bfffbdc30518bc8c6cada8214d126dc625824b481113b109b4ec1" diff --git a/pyproject.toml b/pyproject.toml index cbd75e7dc..85ea9a169 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -121,6 +121,7 @@ torch = "^2.2.1" pytest-mock = "^3.12.0" ruff = "^0.3.0" black = "^24.2.0" +pre-commit = "^3.7.0" [tool.poetry.extras] chromadb = ["chromadb"]