diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 36bccce0507..00000000000 --- a/.dockerignore +++ /dev/null @@ -1,11 +0,0 @@ -.tox -.git -.vscode -scripts -.mypy_cache -.benchmarks -docker -packages/syft/src/target -packages/grid/apps/domain/src/nodedatabase.db -packages/grid/apps/network/src/nodedatabase.db -packages/grid/apps/worker/src/nodedatabase.db diff --git a/.github/workflows/container-scan.yml b/.github/workflows/container-scan.yml index f7b5df009ae..303eb11bc40 100644 --- a/.github/workflows/container-scan.yml +++ b/.github/workflows/container-scan.yml @@ -274,30 +274,30 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Set up Snyk CLI to check for security issues - # Snyk can be used to break the build when it detects security issues. - # In this case we want to upload the SAST issues to GitHub Code Scanning - uses: snyk/actions/setup@master - env: - # This is where you will need to introduce the Snyk API token created with your Snyk account - SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} - - - name: Snyk auth - shell: bash - run: snyk config set api=$SNYK_TOKEN - env: - # This is where you will need to introduce the Snyk API token created with your Snyk account - SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} - - name: Snyk Container test + uses: snyk/actions/docker@master continue-on-error: true - shell: bash - run: snyk container test mongo:7.0.0 --sarif --sarif-file-output=snyk-code.sarif env: # This is where you will need to introduce the Snyk API token created with your Snyk account SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + with: + image: mongo:7.0.0 + args: --sarif-file-output=snyk-code.sarif + + # Replace any "undefined" security severity values with 0. The undefined value is used in the case + # of license-related findings, which do not do not indicate a security vulnerability. + # See https://github.com/github/codeql-action/issues/2187 for more context. + - name: Post-process sarif output + run: | + sed -i 's/"security-severity": "undefined"/"security-severity": "0"/g' snyk-code.sarif + + # Replace any "null" security severity values with 0. The undefined value is used in the case + # the NVD CVSS Score is not available. + # See https://github.com/Erikvl87/docker-languagetool/issues/90 and https://github.com/github/codeql-action/issues/2187 for more context. + - name: Post-process sarif output for security severities set to "null" + run: | + sed -i 's/"security-severity": "null"/"security-severity": "0"/g' snyk-code.sarif - # Push the Snyk Code results into GitHub Code Scanning tab - name: Upload result to GitHub Code Scanning uses: github/codeql-action/upload-sarif@v3 with: @@ -352,29 +352,29 @@ jobs: actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - name: Set up Snyk CLI to check for security issues - # Snyk can be used to break the build when it detects security issues. - # In this case we want to upload the SAST issues to GitHub Code Scanning - uses: snyk/actions/setup@master - env: - # This is where you will need to introduce the Snyk API token created with your Snyk account - SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} - - - name: Snyk auth - shell: bash - run: snyk config set api=$SNYK_TOKEN - env: - # This is where you will need to introduce the Snyk API token created with your Snyk account - SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} - - name: Snyk Container test + uses: snyk/actions/docker@master continue-on-error: true - shell: bash - run: snyk container test traefik:v2.11.0 --sarif --sarif-file-output=snyk-code.sarif env: # This is where you will need to introduce the Snyk API token created with your Snyk account SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + with: + image: traefik:v2.11.0 + args: --sarif-file-output=snyk-code.sarif + + # Replace any "undefined" security severity values with 0. The undefined value is used in the case + # of license-related findings, which do not do not indicate a security vulnerability. + # See https://github.com/github/codeql-action/issues/2187 for more context. + - name: Post-process sarif output + run: | + sed -i 's/"security-severity": "undefined"/"security-severity": "0"/g' snyk-code.sarif + + # Replace any "null" security severity values with 0. The undefined value is used in the case + # the NVD CVSS Score is not available. + # See https://github.com/Erikvl87/docker-languagetool/issues/90 and https://github.com/github/codeql-action/issues/2187 for more context. + - name: Post-process sarif output for security severities set to "null" + run: | + sed -i 's/"security-severity": "null"/"security-severity": "0"/g' snyk-code.sarif # Push the Snyk Code results into GitHub Code Scanning tab - name: Upload result to GitHub Code Scanning diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index 32e227f6c3a..34620e3fa80 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -82,15 +82,78 @@ jobs: run: | tox -e backend.test.basecpu - pr-tests-stack-k8s: + pr-tests-syft-integration: strategy: max-parallel: 99 matrix: - # os: [ubuntu-latest, macos-latest, windows-latest, windows] - # os: [om-ci-16vcpu-ubuntu2204] os: [ubuntu-latest] python-version: ["3.12"] - pytest-modules: ["frontend network"] + pytest-modules: ["local_node"] + fail-fast: false + + runs-on: ${{matrix.os}} + + steps: + - uses: actions/checkout@v4 + + - name: Check for file changes + uses: dorny/paths-filter@v3 + id: changes + with: + base: ${{ github.ref }} + token: ${{ github.token }} + filters: .github/file-filters.yml + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + if: steps.changes.outputs.stack == 'true' + with: + python-version: ${{ matrix.python-version }} + + - name: Upgrade pip + if: steps.changes.outputs.stack == 'true' + run: | + pip install --upgrade pip uv==0.1.35 + uv --version + + - name: Get pip cache dir + if: steps.changes.outputs.stack == 'true' + id: pip-cache + shell: bash + run: | + echo "dir=$(uv cache dir)" >> $GITHUB_OUTPUT + + - name: pip cache + uses: actions/cache@v4 + if: steps.changes.outputs.stack == 'true' + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: ${{ runner.os }}-uv-py${{ matrix.python-version }} + restore-keys: | + ${{ runner.os }}-uv-py${{ matrix.python-version }} + + - name: Install tox + if: steps.changes.outputs.stack == 'true' + run: | + pip install --upgrade tox tox-uv==1.5.1 + + - name: Run Syft Integration Tests + if: steps.changes.outputs.stack == 'true' + timeout-minutes: 60 + env: + PYTEST_MODULES: "${{ matrix.pytest-modules }}" + GITHUB_CI: true + shell: bash + run: | + tox -e syft.test.integration + + pr-tests-integration-k8s: + strategy: + max-parallel: 99 + matrix: + os: [ubuntu-latest] + python-version: ["3.12"] + pytest-modules: ["frontend network container_workload"] fail-fast: false runs-on: ${{matrix.os}} @@ -171,15 +234,6 @@ jobs: chmod +x kubectl sudo install kubectl /usr/local/bin; - - name: Install k9s - if: steps.changes.outputs.stack == 'true' - run: | - # install k9s - wget https://github.com/derailed/k9s/releases/download/v0.32.4/k9s_Linux_amd64.tar.gz - tar -xvf k9s_Linux_amd64.tar.gz - chmod +x k9s - sudo install k9s /usr/local/bin; - - name: Install helm if: steps.changes.outputs.stack == 'true' run: | @@ -192,9 +246,9 @@ jobs: if: steps.changes.outputs.stack == 'true' timeout-minutes: 60 env: - HAGRID_ART: false PYTEST_MODULES: "${{ matrix.pytest-modules }}" GITHUB_CI: true + AZURE_BLOB_STORAGE_KEY: "${{ secrets.AZURE_BLOB_STORAGE_KEY }}" shell: bash run: | K3D_VERSION=v5.6.3 @@ -208,6 +262,7 @@ jobs: curl -sSL https://github.com/loft-sh/devspace/releases/download/${DEVSPACE_VERSION}/devspace-linux-amd64 -o ./devspace chmod +x devspace devspace version + tox -e stack.test.integration.k8s tox -e syft.build.helm tox -e syft.package.helm @@ -224,17 +279,164 @@ jobs: shell: bash run: | mkdir -p ./k8s-logs - kubectl describe all -A --context k3d-testgateway1 --namespace syft > ./k8s-logs/testgateway1-desc-${{ steps.date.outputs.date }}.txt - kubectl describe all -A --context k3d-testdomain1 --namespace syft > ./k8s-logs/testdomain1-desc-${{ steps.date.outputs.date }}.txt - kubectl logs -l app.kubernetes.io/name!=random --prefix=true --context k3d-testgateway1 --namespace syft > ./k8s-logs/testgateway1-logs-${{ steps.date.outputs.date }}.txt - kubectl logs -l app.kubernetes.io/name!=random --prefix=true --context k3d-testdomain1 --namespace syft > ./k8s-logs/testdomain1-logs-${{ steps.date.outputs.date }}.txt + kubectl describe all -A --context k3d-test-gateway-1 --namespace syft > ./k8s-logs/test-gateway-1-desc-${{ steps.date.outputs.date }}.txt + kubectl describe all -A --context k3d-test-domain-1 --namespace syft > ./k8s-logs/test-domain-1-desc-${{ steps.date.outputs.date }}.txt + kubectl logs -l app.kubernetes.io/name!=random --prefix=true --context k3d-test-gateway-1 --namespace syft > ./k8s-logs/test-gateway-1-logs-${{ steps.date.outputs.date }}.txt + kubectl logs -l app.kubernetes.io/name!=random --prefix=true --context k3d-test-domain-1 --namespace syft > ./k8s-logs/test-domain-1-logs-${{ steps.date.outputs.date }}.txt + ls -la ./k8s-logs + + - name: Upload logs to GitHub + uses: actions/upload-artifact@master + if: steps.changes.outputs.stack == 'true' && failure() + with: + name: k8s-logs-integration-${{ matrix.os }}-${{ steps.date.outputs.date }} + path: ./k8s-logs/ + + - name: Cleanup k3d + if: steps.changes.outputs.stack == 'true' && failure() + shell: bash + run: | + export PATH=`pwd`:$PATH + k3d cluster delete test-gateway-1 || true + k3d cluster delete test-domain-1 || true + k3d registry delete k3d-registry.localhost || true + + pr-tests-notebook-k8s: + strategy: + max-parallel: 99 + matrix: + os: [ubuntu-latest] + python-version: ["3.12"] + fail-fast: false + + runs-on: ${{matrix.os}} + + steps: + - name: Permission to home directory + run: | + sudo chown -R $USER:$USER $HOME + - uses: actions/checkout@v4 + - name: Check for file changes + uses: dorny/paths-filter@v3 + id: changes + with: + base: ${{ github.ref }} + token: ${{ github.token }} + filters: .github/file-filters.yml + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + if: steps.changes.outputs.stack == 'true' + with: + python-version: ${{ matrix.python-version }} + + - name: Add K3d Registry + run: | + sudo python ./scripts/patch_hosts.py --add-k3d-registry + + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + tool-cache: true + large-packages: false + + # free 10GB of space + - name: Remove unnecessary files + if: matrix.os == 'ubuntu-latest' + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + docker image prune --all --force + docker builder prune --all --force + docker system prune --all --force + + - name: Upgrade pip + if: steps.changes.outputs.stack == 'true' + run: | + pip install --upgrade pip uv==0.1.35 + uv --version + + - name: Get pip cache dir + if: steps.changes.outputs.stack == 'true' + id: pip-cache + shell: bash + run: | + echo "dir=$(uv cache dir)" >> $GITHUB_OUTPUT + + - name: pip cache + uses: actions/cache@v4 + if: steps.changes.outputs.stack == 'true' + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: ${{ runner.os }}-uv-py${{ matrix.python-version }} + restore-keys: | + ${{ runner.os }}-uv-py${{ matrix.python-version }} + + - name: Install tox + if: steps.changes.outputs.stack == 'true' + run: | + pip install --upgrade tox tox-uv==1.5.1 + + - name: Install kubectl + if: steps.changes.outputs.stack == 'true' + run: | + # cleanup apt version + sudo apt remove kubectl || true + # install kubectl 1.27 + curl -LO https://dl.k8s.io/release/v1.27.2/bin/linux/amd64/kubectl + chmod +x kubectl + sudo install kubectl /usr/local/bin; + + - name: Install helm + if: steps.changes.outputs.stack == 'true' + run: | + # install helm + curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 + chmod 700 get_helm.sh + ./get_helm.sh + + - name: Run Notebooks Tests + if: steps.changes.outputs.stack == 'true' + timeout-minutes: 60 + env: + GITHUB_CI: true + shell: bash + run: | + K3D_VERSION=v5.6.3 + DEVSPACE_VERSION=v6.3.12 + # install k3d + wget https://github.com/k3d-io/k3d/releases/download/${K3D_VERSION}/k3d-linux-amd64 + mv k3d-linux-amd64 k3d + chmod +x k3d + export PATH=`pwd`:$PATH + k3d version + curl -sSL https://github.com/loft-sh/devspace/releases/download/${DEVSPACE_VERSION}/devspace-linux-amd64 -o ./devspace + chmod +x devspace + devspace version + tox -e stack.test.notebook.k8s + + - name: Get current timestamp + id: date + if: failure() + shell: bash + run: echo "date=$(date +%s)" >> $GITHUB_OUTPUT + + - name: Collect logs from k3d + if: steps.changes.outputs.stack == 'true' && failure() + shell: bash + run: | + mkdir -p ./k8s-logs + kubectl describe all -A --context k3d-test-gateway-1 --namespace syft > ./k8s-logs/test-gateway-1-desc-${{ steps.date.outputs.date }}.txt + kubectl describe all -A --context k3d-test-domain-1 --namespace syft > ./k8s-logs/test-domain-1-desc-${{ steps.date.outputs.date }}.txt + kubectl logs -l app.kubernetes.io/name!=random --prefix=true --context k3d-test-gateway-1 --namespace syft > ./k8s-logs/test-gateway-1-logs-${{ steps.date.outputs.date }}.txt + kubectl logs -l app.kubernetes.io/name!=random --prefix=true --context k3d-test-domain-1 --namespace syft > ./k8s-logs/test-domain-1-logs-${{ steps.date.outputs.date }}.txt ls -la ./k8s-logs - name: Upload logs to GitHub uses: actions/upload-artifact@master if: steps.changes.outputs.stack == 'true' && failure() with: - name: k8s-logs-${{ matrix.os }}-${{ steps.date.outputs.date }} + name: k8s-logs-notebook-${{ matrix.os }}-${{ steps.date.outputs.date }} path: ./k8s-logs/ - name: Cleanup k3d @@ -242,6 +444,6 @@ jobs: shell: bash run: | export PATH=`pwd`:$PATH - k3d cluster delete testgateway1 || true - k3d cluster delete testdomain1 || true + k3d cluster delete test-gateway-1 || true + k3d cluster delete test-domain-1 || true k3d registry delete k3d-registry.localhost || true diff --git a/.github/workflows/pr-tests-syft.yml b/.github/workflows/pr-tests-syft.yml index 6ce6615a138..f2bee6a78cf 100644 --- a/.github/workflows/pr-tests-syft.yml +++ b/.github/workflows/pr-tests-syft.yml @@ -39,7 +39,7 @@ jobs: # run: | # sudo chown -R $USER:$USER $HOME - name: "clean .git/config" - if: matrix.os == 'windows' + if: matrix.os == 'windows-latest' continue-on-error: true shell: bash run: | @@ -134,7 +134,7 @@ jobs: # run: | # sudo chown -R $USER:$USER $HOME - name: "clean .git/config" - if: matrix.os == 'windows' + if: matrix.os == 'windows-latest' continue-on-error: true shell: bash run: | diff --git a/notebooks/api/0.8/01-submit-code.ipynb b/notebooks/api/0.8/01-submit-code.ipynb index ec11b60af9f..761d1a96e7a 100644 --- a/notebooks/api/0.8/01-submit-code.ipynb +++ b/notebooks/api/0.8/01-submit-code.ipynb @@ -482,7 +482,7 @@ "outputs": [], "source": [ "# Once we start the project, it will submit the project along with the code request to the Domain Server\n", - "project = new_project.start()\n", + "project = new_project.send()\n", "project" ] }, @@ -599,7 +599,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.10.13" }, "toc": { "base_numbering": 1, diff --git a/notebooks/api/0.8/06-multiple-code-requests.ipynb b/notebooks/api/0.8/06-multiple-code-requests.ipynb index 6e19bc6731c..4be948cc00b 100644 --- a/notebooks/api/0.8/06-multiple-code-requests.ipynb +++ b/notebooks/api/0.8/06-multiple-code-requests.ipynb @@ -250,7 +250,7 @@ }, "outputs": [], "source": [ - "project = new_project.start()\n", + "project = new_project.send()\n", "\n", "project" ] @@ -578,7 +578,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.13" }, "toc": { "base_numbering": 1, diff --git a/notebooks/tutorials/data-owner/03-messages-and-requests.ipynb b/notebooks/tutorials/data-owner/03-messages-and-requests.ipynb index 5a59e9724f0..8e7a1618425 100644 --- a/notebooks/tutorials/data-owner/03-messages-and-requests.ipynb +++ b/notebooks/tutorials/data-owner/03-messages-and-requests.ipynb @@ -200,7 +200,7 @@ "metadata": {}, "outputs": [], "source": [ - "project = new_project.start()\n", + "project = new_project.send()\n", "project" ] }, diff --git a/notebooks/tutorials/data-scientist/05-syft-functions.ipynb b/notebooks/tutorials/data-scientist/05-syft-functions.ipynb index da524a933e1..cbee1755a3d 100644 --- a/notebooks/tutorials/data-scientist/05-syft-functions.ipynb +++ b/notebooks/tutorials/data-scientist/05-syft-functions.ipynb @@ -400,7 +400,7 @@ "metadata": {}, "outputs": [], "source": [ - "project = new_project.start()\n", + "project = new_project.send()\n", "project" ] }, diff --git a/notebooks/tutorials/data-scientist/06-messaging-and-requests.ipynb b/notebooks/tutorials/data-scientist/06-messaging-and-requests.ipynb index 3fbe3bfc055..5d7ff62fa94 100644 --- a/notebooks/tutorials/data-scientist/06-messaging-and-requests.ipynb +++ b/notebooks/tutorials/data-scientist/06-messaging-and-requests.ipynb @@ -200,7 +200,7 @@ "metadata": {}, "outputs": [], "source": [ - "project = new_project.start()\n", + "project = new_project.send()\n", "project" ] }, diff --git a/notebooks/tutorials/enclaves/Enclave-single-notebook-high-low-network.ipynb b/notebooks/tutorials/enclaves/Enclave-single-notebook-high-low-network.ipynb index d95d906f952..771f0ad4389 100644 --- a/notebooks/tutorials/enclaves/Enclave-single-notebook-high-low-network.ipynb +++ b/notebooks/tutorials/enclaves/Enclave-single-notebook-high-low-network.ipynb @@ -266,7 +266,8 @@ "source": [ "res = do_ca_client_low.connect_to_gateway(\n", " handle=gateway_node_low\n", - ") # add credentials here" + ") # add credentials here\n", + "res" ] }, { @@ -276,12 +277,10 @@ "metadata": {}, "outputs": [], "source": [ - "res = do_ca_client_low.connect_to_gateway(\n", - " handle=gateway_node_low\n", - ") # add credentials here\n", "res = do_it_client_low.connect_to_gateway(\n", " handle=gateway_node_low\n", - ") # add credentials here" + ") # add credentials here\n", + "res" ] }, { @@ -1062,7 +1061,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.2" }, "toc": { "base_numbering": 1, diff --git a/notebooks/tutorials/hello-syft/01-hello-syft.ipynb b/notebooks/tutorials/hello-syft/01-hello-syft.ipynb index b7354b469b1..8a7f6a674d2 100644 --- a/notebooks/tutorials/hello-syft/01-hello-syft.ipynb +++ b/notebooks/tutorials/hello-syft/01-hello-syft.ipynb @@ -518,6 +518,55 @@ "cell_type": "markdown", "id": "48", "metadata": {}, + "source": [ + "## Final note: autocomplete" + ] + }, + { + "cell_type": "markdown", + "id": "49", + "metadata": {}, + "source": [ + "Earlier in this tutorial, we used services defined on the client, such as `ds_client.code.request_code_execution`. To find out more about the available methods, like `.request_code_execution()`, and services, like `client.code` you can use autocomplete, simply type `ds_client.code.` or `ds_client.services.` for an example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50", + "metadata": {}, + "outputs": [], + "source": [ + "# autocompletion, but programtic. To test it out, just type client.services. instead in a new cell\n", + "autocompleter = get_ipython().Completer\n", + "_, completions1 = autocompleter.complete(text=\"ds_client.code.\")\n", + "_, completions2 = autocompleter.complete(text=\"ds_client.services.\")\n", + "_, completions3 = autocompleter.complete(text=\"ds_client.api.services.\")\n", + "_, completions4 = autocompleter.complete(text=\"ds_client.api.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51", + "metadata": {}, + "outputs": [], + "source": [ + "assert all(\n", + " [\n", + " \"ds_client.code.get_all\" in completions1,\n", + " \"ds_client.services.code\" in completions2,\n", + " \"ds_client.api.services.code\" in completions3,\n", + " \"ds_client.api.code\" in completions4,\n", + " \"ds_client.api.parse_raw\" not in completions4, # no pydantic completions on api\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "52", + "metadata": {}, "source": [ "Once you are done with this tutorial, you can safely shut down the servers as following," ] @@ -525,7 +574,7 @@ { "cell_type": "code", "execution_count": null, - "id": "49", + "id": "53", "metadata": {}, "outputs": [], "source": [ @@ -535,7 +584,7 @@ { "cell_type": "code", "execution_count": null, - "id": "50", + "id": "54", "metadata": {}, "outputs": [], "source": [] @@ -557,7 +606,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.12.2" }, "toc": { "base_numbering": 1, diff --git a/notebooks/tutorials/model-auditing/colab/01-user-log.ipynb b/notebooks/tutorials/model-auditing/colab/01-user-log.ipynb index 0a9878ba9b3..036c21f9ed6 100644 --- a/notebooks/tutorials/model-auditing/colab/01-user-log.ipynb +++ b/notebooks/tutorials/model-auditing/colab/01-user-log.ipynb @@ -481,7 +481,7 @@ "metadata": {}, "outputs": [], "source": [ - "project = audit_project.start()\n", + "project = audit_project.send()\n", "project" ] }, diff --git a/notebooks/tutorials/model-training/01-data-scientist-submit-code.ipynb b/notebooks/tutorials/model-training/01-data-scientist-submit-code.ipynb index 4d245cd6f06..3314a8d70eb 100644 --- a/notebooks/tutorials/model-training/01-data-scientist-submit-code.ipynb +++ b/notebooks/tutorials/model-training/01-data-scientist-submit-code.ipynb @@ -492,7 +492,7 @@ "metadata": {}, "outputs": [], "source": [ - "project = new_project.start()" + "project = new_project.send()" ] }, { diff --git a/notebooks/tutorials/pandas-cookbook/01-reading-from-a-csv.ipynb b/notebooks/tutorials/pandas-cookbook/01-reading-from-a-csv.ipynb index 730391a5881..d5cdc94cc9d 100644 --- a/notebooks/tutorials/pandas-cookbook/01-reading-from-a-csv.ipynb +++ b/notebooks/tutorials/pandas-cookbook/01-reading-from-a-csv.ipynb @@ -554,7 +554,7 @@ }, "outputs": [], "source": [ - "project = new_project.start()\n", + "project = new_project.send()\n", "assert isinstance(project, sy.service.project.project.Project)\n", "project" ] diff --git a/notebooks/tutorials/pandas-cookbook/02-selecting-data-finding-common-complain.ipynb b/notebooks/tutorials/pandas-cookbook/02-selecting-data-finding-common-complain.ipynb index 28587a7e3d4..09e1e25b8dc 100644 --- a/notebooks/tutorials/pandas-cookbook/02-selecting-data-finding-common-complain.ipynb +++ b/notebooks/tutorials/pandas-cookbook/02-selecting-data-finding-common-complain.ipynb @@ -760,7 +760,7 @@ }, "outputs": [], "source": [ - "project = new_project.start()\n", + "project = new_project.send()\n", "assert isinstance(project, sy.service.project.project.Project)\n", "project" ] diff --git a/notebooks/tutorials/pandas-cookbook/03-which-borough-has-the-most-noise-complaints.ipynb b/notebooks/tutorials/pandas-cookbook/03-which-borough-has-the-most-noise-complaints.ipynb index 747f7c0f792..51443872eb7 100644 --- a/notebooks/tutorials/pandas-cookbook/03-which-borough-has-the-most-noise-complaints.ipynb +++ b/notebooks/tutorials/pandas-cookbook/03-which-borough-has-the-most-noise-complaints.ipynb @@ -874,7 +874,7 @@ }, "outputs": [], "source": [ - "project = new_project.start()\n", + "project = new_project.send()\n", "assert isinstance(project, sy.service.project.project.Project)\n", "project" ] diff --git a/notebooks/tutorials/pandas-cookbook/04-weekday-bike-most-groupby-aggregate.ipynb b/notebooks/tutorials/pandas-cookbook/04-weekday-bike-most-groupby-aggregate.ipynb index 278363f5e6d..29878fd826c 100644 --- a/notebooks/tutorials/pandas-cookbook/04-weekday-bike-most-groupby-aggregate.ipynb +++ b/notebooks/tutorials/pandas-cookbook/04-weekday-bike-most-groupby-aggregate.ipynb @@ -634,7 +634,7 @@ }, "outputs": [], "source": [ - "project = new_project.start()\n", + "project = new_project.send()\n", "assert isinstance(project, sy.service.project.project.Project)\n", "project" ] diff --git a/notebooks/tutorials/pandas-cookbook/05-combining-dataframes-scraping-weather-data.ipynb b/notebooks/tutorials/pandas-cookbook/05-combining-dataframes-scraping-weather-data.ipynb index 384b8e10701..9afc01da2ec 100644 --- a/notebooks/tutorials/pandas-cookbook/05-combining-dataframes-scraping-weather-data.ipynb +++ b/notebooks/tutorials/pandas-cookbook/05-combining-dataframes-scraping-weather-data.ipynb @@ -821,7 +821,7 @@ }, "outputs": [], "source": [ - "project = new_project.start()\n", + "project = new_project.send()\n", "assert isinstance(project, sy.service.project.project.Project)\n", "project" ] diff --git a/notebooks/tutorials/pandas-cookbook/06-string-operations-which-month-was-the-snowiest.ipynb b/notebooks/tutorials/pandas-cookbook/06-string-operations-which-month-was-the-snowiest.ipynb index 404bdc30026..3544f6b82f4 100644 --- a/notebooks/tutorials/pandas-cookbook/06-string-operations-which-month-was-the-snowiest.ipynb +++ b/notebooks/tutorials/pandas-cookbook/06-string-operations-which-month-was-the-snowiest.ipynb @@ -723,7 +723,7 @@ }, "outputs": [], "source": [ - "project = new_project.start()\n", + "project = new_project.send()\n", "assert isinstance(project, sy.service.project.project.Project)\n", "project" ] diff --git a/notebooks/tutorials/pandas-cookbook/07-cleaning-up-messy-data.ipynb b/notebooks/tutorials/pandas-cookbook/07-cleaning-up-messy-data.ipynb index c5a1887d04e..f64f8728793 100644 --- a/notebooks/tutorials/pandas-cookbook/07-cleaning-up-messy-data.ipynb +++ b/notebooks/tutorials/pandas-cookbook/07-cleaning-up-messy-data.ipynb @@ -778,7 +778,7 @@ }, "outputs": [], "source": [ - "project = new_project.start()\n", + "project = new_project.send()\n", "assert isinstance(project, sy.service.project.project.Project)\n", "project" ] diff --git a/notebooks/tutorials/pandas-cookbook/08-how-to-deal-with-timestamps.ipynb b/notebooks/tutorials/pandas-cookbook/08-how-to-deal-with-timestamps.ipynb index 5bb016f1cae..6d1c11f3153 100644 --- a/notebooks/tutorials/pandas-cookbook/08-how-to-deal-with-timestamps.ipynb +++ b/notebooks/tutorials/pandas-cookbook/08-how-to-deal-with-timestamps.ipynb @@ -728,7 +728,7 @@ }, "outputs": [], "source": [ - "project = new_project.start()\n", + "project = new_project.send()\n", "assert isinstance(project, sy.service.project.project.Project)\n", "project" ] diff --git a/packages/.dockerignore b/packages/.dockerignore deleted file mode 100644 index ba9aa4b6829..00000000000 --- a/packages/.dockerignore +++ /dev/null @@ -1,11 +0,0 @@ -**/*.pyc - -grid/* -!grid/backend - -syftcli - -syft/tests -syft/README.md - -hagrid \ No newline at end of file diff --git a/packages/grid/backend/backend.dockerfile b/packages/grid/backend/backend.dockerfile index dc15ccb1d59..18b38e520fe 100644 --- a/packages/grid/backend/backend.dockerfile +++ b/packages/grid/backend/backend.dockerfile @@ -1,94 +1,70 @@ ARG PYTHON_VERSION="3.12" -ARG TZ="Etc/UTC" - -# change to USER="syftuser", UID=1000 and HOME="/home/$USER" for rootless -ARG USER="root" -ARG UID=0 -ARG USER_GRP=$USER:$USER -ARG HOME="/root" -ARG APPDIR="$HOME/app" +ARG UV_VERSION="0.1.41-r0" +ARG TORCH_VERSION="2.3.0" # ==================== [BUILD STEP] Python Dev Base ==================== # -FROM cgr.dev/chainguard/wolfi-base as python_dev +FROM cgr.dev/chainguard/wolfi-base as syft_deps ARG PYTHON_VERSION -ARG TZ -ARG USER -ARG UID +ARG UV_VERSION +ARG TORCH_VERSION # Setup Python DEV -RUN --mount=type=cache,target=/var/cache/apk,sharing=locked \ - apk update && \ - apk upgrade && \ - apk add build-base gcc tzdata python-$PYTHON_VERSION-dev-default py$PYTHON_VERSION-pip && \ - ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -# uncomment for creating rootless user -# && adduser -D -u $UID $USER - -# ==================== [BUILD STEP] Install Syft Dependency ==================== # - -FROM python_dev as syft_deps - -ARG APPDIR -ARG HOME -ARG UID -ARG USER -ARG USER_GRP - -USER $USER -WORKDIR $APPDIR -ENV PATH=$PATH:$HOME/.local/bin - -# copy skeleton to do package install -COPY --chown=$USER_GRP \ - syft/setup.py \ - syft/setup.cfg \ - syft/pyproject.toml \ - syft/MANIFEST.in \ - syft/ - -COPY --chown=$USER_GRP \ - syft/src/syft/VERSION \ - syft/src/syft/capnp \ - syft/src/syft/ - -# Install all dependencies together here to avoid any version conflicts across pkgs -RUN --mount=type=cache,id=pip-$UID,target=$HOME/.cache/pip,uid=$UID,gid=$UID,sharing=locked \ - pip install --user --default-timeout=300 torch==2.2.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html && \ - pip install --user pip-autoremove jupyterlab -e ./syft[data_science] && \ - pip-autoremove ansible ansible-core -y +RUN apk update && apk upgrade && \ + apk add build-base gcc python-$PYTHON_VERSION-dev-default uv=$UV_VERSION + +WORKDIR /root/app + +ENV UV_HTTP_TIMEOUT=600 + +# keep static deps separate to have each layer cached independently +# if amd64 then we need to append +cpu to the torch version +# uv issues: https://github.com/astral-sh/uv/issues/3437 & https://github.com/astral-sh/uv/issues/2541 +RUN --mount=type=cache,target=/root/.cache,sharing=locked \ + uv venv && \ + ARCH=$(arch | sed s/aarch64/arm64/ | sed s/x86_64/amd64/) && \ + if [[ "$ARCH" = "amd64" ]]; then TORCH_VERSION="$TORCH_VERSION+cpu"; fi && \ + uv pip install torch==$TORCH_VERSION --index-url https://download.pytorch.org/whl/cpu + +COPY syft/setup.py syft/setup.cfg syft/pyproject.toml ./syft/ + +COPY syft/src/syft/VERSION ./syft/src/syft/ + +RUN --mount=type=cache,target=/root/.cache,sharing=locked \ + # remove torch because we already have the cpu version pre-installed + sed --in-place /torch==/d ./syft/setup.cfg && \ + uv pip install -e ./syft[data_science] && \ + if uv pip freeze | grep -q ansible; then uv pip freeze | grep ansible | xargs uv pip uninstall; fi # ==================== [Final] Setup Syft Server ==================== # FROM cgr.dev/chainguard/wolfi-base as backend -# inherit from global -ARG APPDIR -ARG HOME ARG PYTHON_VERSION -ARG TZ -ARG USER -ARG USER_GRP - -# Setup Python -RUN --mount=type=cache,target=/var/cache/apk,sharing=locked \ - apk update && \ - apk upgrade && \ - apk add tzdata git bash python-$PYTHON_VERSION-default py$PYTHON_VERSION-pip && \ - ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \ - # Uncomment for rootless user - # adduser -D -u 1000 $USER && \ - mkdir -p /var/log/pygrid $HOME/data/creds $HOME/data/db $HOME/.cache $HOME/.local -# chown -R $USER_GRP /var/log/pygrid $HOME/ - -USER $USER -WORKDIR $APPDIR +ARG UV_VERSION + +RUN apk update && apk upgrade && \ + apk add --no-cache git bash python-$PYTHON_VERSION-default py$PYTHON_VERSION-pip uv=$UV_VERSION + +WORKDIR /root/app/ + +# Copy pre-built syft dependencies +COPY --from=syft_deps /root/app/.venv .venv + +# copy grid +COPY grid/backend/grid ./grid/ + +# copy syft +COPY syft ./syft/ # Update environment variables -ENV PATH=$PATH:$HOME/.local/bin \ - PYTHONPATH=$APPDIR \ - APPDIR=$APPDIR \ +ENV \ + # "activate" venv + PATH="/root/app/.venv/bin/:$PATH" \ + VIRTUAL_ENV="/root/app/.venv" \ + # Syft + APPDIR="/root/app" \ NODE_NAME="default_node_name" \ NODE_TYPE="domain" \ SERVICE_NAME="backend" \ @@ -102,16 +78,6 @@ ENV PATH=$PATH:$HOME/.local/bin \ MONGO_HOST="localhost" \ MONGO_PORT="27017" \ MONGO_USERNAME="root" \ - MONGO_PASSWORD="example" \ - CREDENTIALS_PATH="$HOME/data/creds/credentials.json" - -# Copy pre-built jupyterlab, syft dependencies -COPY --chown=$USER_GRP --from=syft_deps $HOME/.local $HOME/.local - -# copy grid -COPY --chown=$USER_GRP grid/backend/grid ./grid/ - -# copy syft -COPY --chown=$USER_GRP syft/ ./syft/ + MONGO_PASSWORD="example" CMD ["bash", "./grid/start.sh"] diff --git a/packages/grid/backend/backend.dockerfile.dockerignore b/packages/grid/backend/backend.dockerfile.dockerignore new file mode 100644 index 00000000000..2c06567a214 --- /dev/null +++ b/packages/grid/backend/backend.dockerfile.dockerignore @@ -0,0 +1,63 @@ +# Paths should be against the docker root context dir i.e. /packages + +# Syft +**/tests/ +**/*.md + +# Byte-compiled / optimized / DLL files +**/__pycache__/ +**/*.py[cod] +**/*$py.class + +# Distribution / packaging +**/.Python +**/build/ +**/develop-eggs/ +**/dist/ +**/downloads/ +**/eggs/ +**/.eggs/ +**/lib/ +**/lib64/ +**/parts/ +**/sdist/ +**/var/ +**/wheels/ +**/share/python-wheels/ +**/*.egg-info/ +**/.installed.cfg +**/*.egg +**/MANIFEST + +# Jupyter Notebook +**/.ipynb_checkpoints + +# Environments +**/.env +**/.venv +**/env/ +**/venv/ +**/ENV/ +**/env.bak/ +**/venv.bak/ + +# Unit test / coverage reports +**/htmlcov/ +**/.tox/ +**/.nox/ +**/.coverage +**/.coverage.* +**/.cache +**/nosetests.xml +**/coverage.xml +**/*.cover +**/*.py,cover +**/.hypothesis/ +**/.pytest_cache/ +**/cover/ + +# vim +**/*.swp + +# macOS +**/.DS_Store diff --git a/packages/grid/backend/grid/bootstrap.py b/packages/grid/backend/grid/bootstrap.py index 84fedc36fdf..0da833a3a39 100644 --- a/packages/grid/backend/grid/bootstrap.py +++ b/packages/grid/backend/grid/bootstrap.py @@ -26,8 +26,7 @@ def get_env(key: str, default: str = "") -> str | None: return None -DEFAULT_CREDENTIALS_PATH = os.path.expandvars("$HOME/data/creds/credentials.json") -CREDENTIALS_PATH = str(get_env("CREDENTIALS_PATH", DEFAULT_CREDENTIALS_PATH)) +CREDENTIALS_PATH = str(get_env("CREDENTIALS_PATH", "credentials.json")) NODE_PRIVATE_KEY = "NODE_PRIVATE_KEY" NODE_UID = "NODE_UID" diff --git a/packages/grid/backend/grid/core/node.py b/packages/grid/backend/grid/core/node.py index 12e083ed602..cde36f8c5fe 100644 --- a/packages/grid/backend/grid/core/node.py +++ b/packages/grid/backend/grid/core/node.py @@ -105,4 +105,5 @@ def seaweedfs_config() -> SeaweedFSConfig: smtp_port=settings.SMTP_PORT, smtp_host=settings.SMTP_HOST, association_request_auto_approval=settings.ASSOCIATION_REQUEST_AUTO_APPROVAL, + background_tasks=True, ) diff --git a/packages/grid/backend/grid/images/worker_cpu.dockerfile b/packages/grid/backend/grid/images/worker_cpu.dockerfile index 717df5817fd..1ecaa950358 100644 --- a/packages/grid/backend/grid/images/worker_cpu.dockerfile +++ b/packages/grid/backend/grid/images/worker_cpu.dockerfile @@ -18,13 +18,12 @@ ARG PIP_PACKAGES="pip --dry-run" ARG CUSTOM_CMD='echo "No custom commands passed"' # Worker specific environment variables go here -ENV SYFT_WORKER="true" -ENV SYFT_VERSION_TAG=${SYFT_VERSION_TAG} +ENV SYFT_WORKER="true" \ + SYFT_VERSION_TAG=${SYFT_VERSION_TAG} \ + UV_HTTP_TIMEOUT=600 -# Commenting this until we support built using python docker sdk or find any other alternative. -# RUN --mount=type=cache,target=/var/cache/apk,sharing=locked \ -# --mount=type=cache,target=$HOME/.cache/pip,sharing=locked \ -RUN apk update && \ - apk add ${SYSTEM_PACKAGES} && \ - pip install --user ${PIP_PACKAGES} && \ +RUN apk update && apk upgrade && \ + apk add --no-cache ${SYSTEM_PACKAGES} && \ + # if uv is present then run uv pip install else simple pip install + if [ -x "$(command -v uv)" ]; then uv pip install --no-cache ${PIP_PACKAGES}; else pip install --user ${PIP_PACKAGES}; fi && \ bash -c "$CUSTOM_CMD" diff --git a/packages/grid/backend/grid/main.py b/packages/grid/backend/grid/main.py index 2974ea29b61..9ca43dadee8 100644 --- a/packages/grid/backend/grid/main.py +++ b/packages/grid/backend/grid/main.py @@ -1,5 +1,9 @@ # stdlib +# stdlib +from contextlib import asynccontextmanager +from typing import Any + # third party from fastapi import FastAPI from fastapi.responses import JSONResponse @@ -14,9 +18,20 @@ from grid.core.node import worker from grid.logger.handler import get_log_handler + +@asynccontextmanager +async def lifespan(app: FastAPI) -> Any: + try: + yield + finally: + worker.stop() + print("Worker Stop !!!") + + app = FastAPI( title=settings.PROJECT_NAME, openapi_url=f"{settings.API_V2_STR}/openapi.json", + lifespan=lifespan, ) app.add_event_handler("startup", get_log_handler().init_logger) @@ -41,12 +56,6 @@ print(status) -@app.on_event("shutdown") -def shutdown() -> None: - worker.stop() - print("Worker Stop !!!") - - # needed for Google Kubernetes Engine LoadBalancer Healthcheck @app.get( "/", diff --git a/packages/grid/backend/grid/start.sh b/packages/grid/backend/grid/start.sh index 284cf41a268..297f242ff78 100755 --- a/packages/grid/backend/grid/start.sh +++ b/packages/grid/backend/grid/start.sh @@ -1,8 +1,7 @@ #! /usr/bin/env bash set -e -echo "Running start.sh with RELEASE=${RELEASE} and $(id)" -export GEVENT_MONKEYPATCH="False" +echo "Running Syft with RELEASE=${RELEASE} and $(id)" APP_MODULE=grid.main:app LOG_LEVEL=${LOG_LEVEL:-info} @@ -10,15 +9,9 @@ HOST=${HOST:-0.0.0.0} PORT=${PORT:-80} NODE_TYPE=${NODE_TYPE:-domain} APPDIR=${APPDIR:-$HOME/app} - RELOAD="" DEBUG_CMD="" -# For debugging permissions -ls -lisa $HOME/data -ls -lisa $APPDIR/syft/ -ls -lisa $APPDIR/grid/ - if [[ ${DEV_MODE} == "True" ]]; then echo "DEV_MODE Enabled" @@ -28,15 +21,15 @@ fi # only set by kubernetes to avoid conflict with docker tests if [[ ${DEBUGGER_ENABLED} == "True" ]]; then - pip install --user debugpy + uv pip install debugpy DEBUG_CMD="python -m debugpy --listen 0.0.0.0:5678 -m" fi -set +e +export CREDENTIALS_PATH=${CREDENTIALS_PATH:-$HOME/data/creds/credentials.json} export NODE_PRIVATE_KEY=$(python $APPDIR/grid/bootstrap.py --private_key) export NODE_UID=$(python $APPDIR/grid/bootstrap.py --uid) export NODE_TYPE=$NODE_TYPE -set -e +export GEVENT_MONKEYPATCH="False" echo "NODE_UID=$NODE_UID" echo "NODE_TYPE=$NODE_TYPE" diff --git a/packages/grid/frontend/.dockerignore b/packages/grid/frontend/.dockerignore deleted file mode 100644 index 00df28f40b9..00000000000 --- a/packages/grid/frontend/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -.DS_Store -node_modules -.svelte-kit -.pnpm-store \ No newline at end of file diff --git a/packages/grid/frontend/frontend.dockerfile.dockerignore b/packages/grid/frontend/frontend.dockerfile.dockerignore new file mode 100644 index 00000000000..449ac1c92ef --- /dev/null +++ b/packages/grid/frontend/frontend.dockerfile.dockerignore @@ -0,0 +1,15 @@ +# Paths should be relative to the context dir of this image i.e. /packages/grid/frontend/ + +# Frontend +**/*.md + +# Dependency directories +**/node_modules +**/.svelte-kit +**/.pnpm-store + +# vim +**/*.swp + +# macOS +**/.DS_Store diff --git a/packages/grid/seaweedfs/seaweedfs.dockerfile.dockerignore b/packages/grid/seaweedfs/seaweedfs.dockerfile.dockerignore new file mode 100644 index 00000000000..98a48c5b17d --- /dev/null +++ b/packages/grid/seaweedfs/seaweedfs.dockerfile.dockerignore @@ -0,0 +1,63 @@ +# Paths should be relative to the context dir of this image i.e. /packages/grid/seaweedfs/ + +# SeaweedFS +**/tests/ +**/*.md + +# Byte-compiled / optimized / DLL files +**/__pycache__/ +**/*.py[cod] +**/*$py.class + +# Distribution / packaging +**/.Python +**/build/ +**/develop-eggs/ +**/dist/ +**/downloads/ +**/eggs/ +**/.eggs/ +**/lib/ +**/lib64/ +**/parts/ +**/sdist/ +**/var/ +**/wheels/ +**/share/python-wheels/ +**/*.egg-info/ +**/.installed.cfg +**/*.egg +**/MANIFEST + +# Jupyter Notebook +**/.ipynb_checkpoints + +# Environments +**/.env +**/.venv +**/env/ +**/venv/ +**/ENV/ +**/env.bak/ +**/venv.bak/ + +# Unit test / coverage reports +**/htmlcov/ +**/.tox/ +**/.nox/ +**/.coverage +**/.coverage.* +**/.cache +**/nosetests.xml +**/coverage.xml +**/*.cover +**/*.py,cover +**/.hypothesis/ +**/.pytest_cache/ +**/cover/ + +# vim +**/*.swp + +# macOS +**/.DS_Store diff --git a/packages/grid/syft-client/syft.Dockerfile.dockerignore b/packages/grid/syft-client/syft.Dockerfile.dockerignore index c5bacaa51c3..d78459cecbb 100644 --- a/packages/grid/syft-client/syft.Dockerfile.dockerignore +++ b/packages/grid/syft-client/syft.Dockerfile.dockerignore @@ -1,67 +1,67 @@ # Syft -tests/ -*.md +**/tests/ +**/*.md # Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class +**/__pycache__/ +**/*.py[cod] +**/*$py.class # Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST +**/.Python +**/build/ +**/develop-eggs/ +**/dist/ +**/downloads/ +**/eggs/ +**/.eggs/ +**/lib/ +**/lib64/ +**/parts/ +**/sdist/ +**/var/ +**/wheels/ +**/share/python-wheels/ +**/*.egg-info/ +**/.installed.cfg +**/*.egg +**/MANIFEST # Jupyter Notebook -.ipynb_checkpoints +**/.ipynb_checkpoints # IPython -profile_default/ -ipython_config.py +**/profile_default/ +**/ipython_config.py # Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ +**/.env +**/.venv +**/env/ +**/venv/ +**/ENV/ +**/env.bak/ +**/venv.bak/ # Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ +**/htmlcov/ +**/.tox/ +**/.nox/ +**/.coverage +**/.coverage.* +**/.cache +**/nosetests.xml +**/coverage.xml +**/*.cover +**/*.py,cover +**/.hypothesis/ +**/.pytest_cache/ +**/cover/ # mypy -.mypy_cache/ -.dmypy.json -dmypy.json +**/.mypy_cache/ +**/.dmypy.json +**/dmypy.json # macOS -.DS_Store +**/.DS_Store diff --git a/packages/log.txt b/packages/log.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/packages/syft/.dockerignore b/packages/syft/.dockerignore deleted file mode 100644 index fcac49cb125..00000000000 --- a/packages/syft/.dockerignore +++ /dev/null @@ -1,2 +0,0 @@ -.mypy_cache -**/.mypy_cache diff --git a/packages/syft/setup.cfg b/packages/syft/setup.cfg index 132fc2b23be..81b0272d8a3 100644 --- a/packages/syft/setup.cfg +++ b/packages/syft/setup.cfg @@ -69,6 +69,7 @@ syft = ipywidgets==8.1.2 rich==13.7.1 jinja2==3.1.4 + tenacity==8.3.0 install_requires = %(syft)s @@ -90,7 +91,8 @@ data_science = evaluate==0.4.1 recordlinkage==0.16 dm-haiku==0.0.10 - torch[cpu]==2.2.1 + # backend.dockerfile installs torch separately, so update the version over there as well! + torch==2.3.0 dev = %(test_plugins)s diff --git a/packages/syft/src/syft/__init__.py b/packages/syft/src/syft/__init__.py index 24a7eb69185..3aae76848dc 100644 --- a/packages/syft/src/syft/__init__.py +++ b/packages/syft/src/syft/__init__.py @@ -5,6 +5,7 @@ import pathlib from pathlib import Path import sys +from types import MethodType from typing import Any # relative @@ -70,6 +71,7 @@ from .service.user.roles import Roles as roles # noqa: F401 from .service.user.user_service import UserService # noqa: F401 from .stable_version import LATEST_STABLE_SYFT +from .types.syft_object import SyftObject from .types.twin_object import TwinObject # noqa: F401 from .types.uid import UID # noqa: F401 from .util import filterwarnings # noqa: F401 @@ -109,6 +111,87 @@ pass # nosec +def _patch_ipython_autocompletion() -> None: + try: + # third party + from IPython.core.guarded_eval import EVALUATION_POLICIES + except ImportError: + return + + ipython = get_ipython() + if ipython is None: + return + + try: + # this allows property getters to be used in nested autocomplete + ipython.Completer.evaluation = "limited" + ipython.Completer.use_jedi = False + policy = EVALUATION_POLICIES["limited"] + + policy.allowed_getattr_external.update( + [ + ("syft.client.api", "APIModule"), + ("syft.client.api", "SyftAPI"), + ] + ) + original_can_get_attr = policy.can_get_attr + + def patched_can_get_attr(value: Any, attr: str) -> bool: + attr_name = "__syft_allow_autocomplete__" + # first check if exist to prevent side effects + if hasattr(value, attr_name) and attr in getattr(value, attr_name, []): + if attr in dir(value): + return True + else: + return False + else: + return original_can_get_attr(value, attr) + + policy.can_get_attr = patched_can_get_attr + except Exception: + print("Failed to patch ipython autocompletion for syft property getters") + + try: + # this constraints the completions for autocomplete. + # if __syft_dir__ is defined we only autocomplete those properties + # stdlib + import re + + original_attr_matches = ipython.Completer.attr_matches + + def patched_attr_matches(self, text: str) -> list[str]: # type: ignore + res = original_attr_matches(text) + m2 = re.match(r"(.+)\.(\w*)$", self.line_buffer) + if not m2: + return res + expr, _ = m2.group(1, 2) + obj = self._evaluate_expr(expr) + if isinstance(obj, SyftObject) and hasattr(obj, "__syft_dir__"): + # here we filter all autocomplete results to only contain those + # defined in __syft_dir__, however the original autocomplete prefixes + # have the full path, while __syft_dir__ only defines the attr + attrs = set(obj.__syft_dir__()) + new_res = [] + for r in res: + splitted = r.split(".") + if len(splitted) > 1: + attr_name = splitted[-1] + if attr_name in attrs: + new_res.append(r) + return new_res + else: + return res + + ipython.Completer.attr_matches = MethodType( + patched_attr_matches, ipython.Completer + ) + except Exception: + print("Failed to patch syft autocompletion for __syft_dir__") + + +_patch_ipython_autocompletion() + + def module_property(func: Any) -> Callable: """Decorator to turn module functions into properties. Function names must be prefixed with an underscore.""" diff --git a/packages/syft/src/syft/client/api.py b/packages/syft/src/syft/client/api.py index d60c6460b4f..00b0673f221 100644 --- a/packages/syft/src/syft/client/api.py +++ b/packages/syft/src/syft/client/api.py @@ -48,6 +48,7 @@ from ..service.warnings import WarningContext from ..types.cache_object import CachedSyftObject from ..types.identity import Identity +from ..types.syft_object import SYFT_OBJECT_VERSION_1 from ..types.syft_object import SYFT_OBJECT_VERSION_2 from ..types.syft_object import SyftBaseObject from ..types.syft_object import SyftMigrationRegistry @@ -56,6 +57,7 @@ from ..types.uid import UID from ..util.autoreload import autoreload_enabled from ..util.markdown import as_markdown_python_code +from ..util.table import list_dict_repr_html from ..util.telemetry import instrument from ..util.util import prompt_warning_message from .connection import NodeConnection @@ -66,6 +68,21 @@ from ..service.job.job_stash import Job +IPYNB_BACKGROUND_METHODS = { + "getdoc", + "_partialmethod", + "__name__", + "__code__", + "__wrapped__", + "__custom_documentations__", + "__signature__", + "__defaults__", + "__kwdefaults__", +} + +IPYNB_BACKGROUND_PREFIXES = ["_ipy", "_repr", "__ipython", "__pydantic"] + + class APIRegistry: __api_registry__: dict[tuple, SyftAPI] = OrderedDict() @@ -585,6 +602,19 @@ def wrapper(*args: Any, **kwargs: Any) -> SyftError | Any: return wrapper +class APISubModulesView(SyftObject): + __canonical_name__ = "APISubModulesView" + __version__ = SYFT_OBJECT_VERSION_1 + + submodule: str = "" + endpoints: list[str] = [] + + __syft_include_id_coll_repr__ = False + + def _coll_repr_(self) -> dict[str, Any]: + return {"submodule": self.submodule, "endpoints": "\n".join(self.endpoints)} + + @serializable() class APIModule: _modules: list[str] @@ -596,6 +626,9 @@ def __init__(self, path: str, refresh_callback: Callable | None) -> None: self.path = path self.refresh_callback = refresh_callback + def __dir__(self) -> list[str]: + return self._modules + ["path"] + def has_submodule(self, name: str) -> bool: """We use this as hasattr() triggers __getattribute__ which triggers recursion""" try: @@ -610,18 +643,28 @@ def _add_submodule( setattr(self, attr_name, module_or_func) self._modules.append(attr_name) - def __getattribute__(self, name: str) -> Any: + def __getattr__(self, name: str) -> Any: try: return object.__getattribute__(self, name) except AttributeError: # if we fail, we refresh the api and try again - if self.refresh_callback is not None: + # however, we dont want this to happen all the time because of ipy magic happening + # in the background + if ( + self.refresh_callback is not None + and name not in IPYNB_BACKGROUND_METHODS + and not any( + name.startswith(prefix) for prefix in IPYNB_BACKGROUND_PREFIXES + ) + ): api = self.refresh_callback() try: + # get current path in the module tree new_current_module = api.services for submodule in self.path.split("."): if submodule != "": new_current_module = getattr(new_current_module, submodule) + # retry getting the attribute, if this fails, we throw an error return object.__getattribute__(new_current_module, name) except AttributeError: pass @@ -637,8 +680,35 @@ def __getitem__(self, key: str | int) -> Any: raise NotImplementedError def _repr_html_(self) -> Any: + if self.path == "settings": + return self.get()._repr_html_() + if not hasattr(self, "get_all"): - return NotImplementedError + + def recursively_get_submodules( + module: APIModule | Callable, + ) -> list[APIModule | Callable]: + children = [module] + if isinstance(module, APIModule): + for submodule_name in module._modules: + submodule = getattr(module, submodule_name) + children += recursively_get_submodules(submodule) + return children + + views = [] + for submodule_name in self._modules: + submodule = getattr(self, submodule_name) + children = recursively_get_submodules(submodule) + child_paths = [ + x.path for x in children if isinstance(x, RemoteFunction) + ] + views.append( + APISubModulesView(submodule=submodule_name, endpoints=child_paths) + ) + + return list_dict_repr_html(views) + # return NotImplementedError + results = self.get_all() return results._repr_html_() @@ -764,8 +834,26 @@ class SyftAPI(SyftObject): __user_role: ServiceRole = ServiceRole.NONE communication_protocol: PROTOCOL_TYPE - # def __post_init__(self) -> None: - # pass + # informs getattr does not have nasty side effects + __syft_allow_autocomplete__ = ["services"] + + def __dir__(self) -> list[str]: + modules = getattr(self.api_module, "_modules", []) + return ["services"] + modules + + def __syft_dir__(self) -> list[str]: + modules = getattr(self.api_module, "_modules", []) + return ["services"] + modules + + def __getattr__(self, name: str) -> Any: + try: + return getattr(self.api_module, name) + except Exception: + raise SyftAttributeError( + f"'SyftAPI' object has no submodule or method '{name}', " + "you may not have permission to access the module you are trying to access." + "If you think this is an error, try calling `client.refresh()` to update the API." + ) @staticmethod def for_user( diff --git a/packages/syft/src/syft/client/client.py b/packages/syft/src/syft/client/client.py index 9438294a6c0..ba4dfc38c80 100644 --- a/packages/syft/src/syft/client/client.py +++ b/packages/syft/src/syft/client/client.py @@ -8,7 +8,6 @@ from enum import Enum from getpass import getpass import json -import os from typing import Any from typing import TYPE_CHECKING from typing import cast @@ -485,6 +484,17 @@ class SyftClient: __logged_in_username: str = "" __user_role: ServiceRole = ServiceRole.NONE + # informs getattr does not have nasty side effects + __syft_allow_autocomplete__ = [ + "api", + "code", + "jobs", + "users", + "settings", + "notifications", + "custom_api", + ] + def __init__( self, connection: NodeConnection, @@ -496,6 +506,7 @@ def __init__( self.metadata = metadata self.credentials: SyftSigningKey | None = credentials self._api = api + self.services: APIModule | None = None self.communication_protocol: int | str | None = None self.current_protocol: int | str | None = None @@ -547,7 +558,7 @@ def create_project( user_email_address=user_email_address, members=[self], ) - project = project_create.start() + project = project_create.send() return project # TODO: type of request should be REQUEST, but it will give circular import error @@ -683,7 +694,7 @@ def exchange_route( if client.metadata is None: return SyftError(f"client {client}'s metadata is None!") - result = self.api.services.network.exchange_credentials_with( + return self.api.services.network.exchange_credentials_with( self_node_route=self_node_route, remote_node_route=remote_node_route, remote_node_verify_key=client.metadata.to(NodeMetadataV3).verify_key, @@ -693,8 +704,6 @@ def exchange_route( f"Invalid Route Exchange SyftProtocol: {protocol}.Supported protocols are {SyftProtocol.all()}" ) - return result - @property def jobs(self) -> APIModule | None: if self.api.has_service("job"): @@ -768,15 +777,6 @@ def login( register: bool = False, **kwargs: Any, ) -> Self: - # TODO: Remove this Hack (Note to Rasswanth) - # If SYFT_LOGIN_{NODE_NAME}_PASSWORD is set, use that as the password - # for the login. This is useful for CI/CD environments to test password - # randomization that is implemented by helm charts - if self.name is not None and email == "info@openmined.org": - pass_env_var = f"SYFT_LOGIN_{self.name}_PASSWORD" - if pass_env_var in os.environ: - password = os.environ[pass_env_var] - if email is None: email = input("Email: ") if password is None: @@ -958,6 +958,7 @@ def refresh_callback() -> SyftAPI: api=_api, ) self._api = _api + self.services = _api.services return _api diff --git a/packages/syft/src/syft/client/domain_client.py b/packages/syft/src/syft/client/domain_client.py index a2475c2ffbb..75ec142bfde 100644 --- a/packages/syft/src/syft/client/domain_client.py +++ b/packages/syft/src/syft/client/domain_client.py @@ -309,10 +309,15 @@ def connect_to_gateway( if isinstance(res, SyftSuccess): if self.metadata: return SyftSuccess( - message=f"Connected {self.metadata.node_type} '{self.metadata.name}' to gateway '{client.name}'" + message=( + f"Connected {self.metadata.node_type} " + f"'{self.metadata.name}' to gateway '{client.name}'. " + f"{res.message}" + ) ) else: return SyftSuccess(message=f"Connected to '{client.name}' gateway") + return res def _get_service_by_name_if_exists(self, name: str) -> APIModule | None: diff --git a/packages/syft/src/syft/client/enclave_client.py b/packages/syft/src/syft/client/enclave_client.py index cfb262dc422..32eebdf3189 100644 --- a/packages/syft/src/syft/client/enclave_client.py +++ b/packages/syft/src/syft/client/enclave_client.py @@ -93,9 +93,16 @@ def connect_to_gateway( res = self.exchange_route(client, protocol=protocol) if isinstance(res, SyftSuccess): - return SyftSuccess( - message=f"Connected {self.metadata.node_type} {self.metadata.name} to {client.name} gateway" - ) + if self.metadata: + return SyftSuccess( + message=( + f"Connected {self.metadata.node_type} " + f"'{self.metadata.name}' to gateway '{client.name}'. " + f"{res.message}" + ) + ) + else: + return SyftSuccess(message=f"Connected to '{client.name}' gateway") return res diff --git a/packages/syft/src/syft/client/registry.py b/packages/syft/src/syft/client/registry.py index 018c101de36..5a1f99a41eb 100644 --- a/packages/syft/src/syft/client/registry.py +++ b/packages/syft/src/syft/client/registry.py @@ -80,10 +80,10 @@ def check_network(network: dict) -> dict[Any, Any] | None: except Exception: online = False - # networks without frontend have a /ping route in 0.7.0 + # networks without frontend if not online: try: - ping_url = url + "ping" + ping_url = url + "api/v2/" res = requests.get(ping_url, timeout=DEFAULT_TIMEOUT) # nosec online = res.status_code == 200 except Exception: @@ -189,10 +189,10 @@ def check_network(network: dict) -> dict[Any, Any] | None: except Exception: online = False - # networks without frontend have a /ping route in 0.7.0 + # networks without frontend if not online: try: - ping_url = url + "ping" + ping_url = url + "api/v2/" res = requests.get(ping_url, timeout=DEFAULT_TIMEOUT) online = res.status_code == 200 except Exception: diff --git a/packages/syft/src/syft/client/syncing.py b/packages/syft/src/syft/client/syncing.py index 45e5c33a837..371b77df22a 100644 --- a/packages/syft/src/syft/client/syncing.py +++ b/packages/syft/src/syft/client/syncing.py @@ -1,25 +1,17 @@ # stdlib -from collections.abc import Callable -from time import sleep # relative from ..abstract_node import NodeSideType from ..node.credentials import SyftVerifyKey -from ..service.action.action_permissions import ActionObjectPermission -from ..service.action.action_permissions import ActionPermission -from ..service.action.action_permissions import StoragePermission -from ..service.code.user_code import UserCode -from ..service.job.job_stash import Job from ..service.response import SyftError from ..service.response import SyftSuccess from ..service.sync.diff_state import NodeDiff -from ..service.sync.diff_state import ObjectDiff from ..service.sync.diff_state import ObjectDiffBatch -from ..service.sync.diff_state import ResolvedSyncState from ..service.sync.diff_state import SyncInstruction from ..service.sync.resolve_widget import ResolveWidget from ..service.sync.sync_state import SyncState from ..types.uid import UID +from ..util.decorators import deprecated from .client import SyftClient from .sync_decision import SyncDecision from .sync_decision import SyncDirection @@ -72,11 +64,16 @@ def get_user_input_for_resolve() -> SyncDecision: print(f"Please choose between {options_str}") -def resolve_single(obj_diff_batch: ObjectDiffBatch) -> ResolveWidget: +def resolve(obj_diff_batch: ObjectDiffBatch) -> ResolveWidget: widget = ResolveWidget(obj_diff_batch) return widget +@deprecated(reason="resolve_single has been renamed to resolve", return_syfterror=True) +def resolve_single(obj_diff_batch: ObjectDiffBatch) -> ResolveWidget: + return resolve(obj_diff_batch) + + def handle_sync_batch( obj_diff_batch: ObjectDiffBatch, share_private_data: dict[UID, bool], @@ -254,292 +251,3 @@ def get_other_ignore_batches( ignored_ids.update(other_batch_ids) return other_ignore_batches - - -# Old resolve flow -#################################################################################################### - - -def resolve( - state: NodeDiff, - decision: str | None = None, - decision_callback: Callable[[ObjectDiffBatch], SyncDecision] | None = None, - share_private_objects: bool = False, - ask_for_input: bool = True, -) -> tuple[ResolvedSyncState, ResolvedSyncState]: - # TODO: fix this - previously_ignored_batches = state.low_state.ignored_batches - # TODO: only add permissions for objects where we manually give permission - # Maybe default read permission for some objects (high -> low) - resolved_state_low = ResolvedSyncState(node_uid=state.low_node_uid, alias="low") - resolved_state_high = ResolvedSyncState(node_uid=state.high_node_uid, alias="high") - - for batch_diff in state.all_batches: - if batch_diff.is_unchanged: - # Hierarchy has no diffs - continue - - if batch_diff.decision is not None: - # handles ignores - batch_decision = batch_diff.decision - elif decision is not None: - print(batch_diff.__repr__()) - batch_decision = SyncDecision(decision) - elif decision_callback is not None: - batch_decision = decision_callback(batch_diff) - else: - print(batch_diff.__repr__()) - batch_decision = get_user_input_for_resolve() - - batch_diff.decision = batch_decision - - other_batches = [b for b in state.all_batches if b is not batch_diff] - handle_ignore_skip(batch_diff, batch_decision, other_batches) - - if batch_decision not in [SyncDecision.SKIP, SyncDecision.IGNORE]: - sync_instructions = get_sync_instructions_for_batch_items_for_add( - batch_diff, - batch_decision, - share_private_objects=share_private_objects, - ask_for_input=ask_for_input, - ) - else: - sync_instructions = [] - if batch_decision == SyncDecision.IGNORE: - resolved_state_high.add_ignored(batch_diff) - resolved_state_low.add_ignored(batch_diff) - - if ( - batch_diff.root_id in previously_ignored_batches - and batch_diff.decision != SyncDecision.IGNORE - ): - resolved_state_high.add_unignored(batch_diff.root_id) - resolved_state_low.add_unignored(batch_diff.root_id) - - print(f"Decision: Syncing {len(sync_instructions)} objects") - - for sync_instruction in sync_instructions: - resolved_state_low.add_sync_instruction(sync_instruction) - resolved_state_high.add_sync_instruction(sync_instruction) - - print() - print("=" * 100) - print() - - return resolved_state_low, resolved_state_high - - -def handle_ignore_skip( - batch: ObjectDiffBatch, decision: SyncDecision, other_batches: list[ObjectDiffBatch] -) -> None: - # make sure type is SyncDecision at runtime - decision = SyncDecision(decision) - - if decision == SyncDecision.SKIP or decision == SyncDecision.IGNORE: - skipped_or_ignored_ids = { - x.object_id for x in batch.get_dependents(include_roots=False) - } - for other_batch in other_batches: - if other_batch.decision != decision: - # Currently, this is not recursive, in the future it might be - other_batch_ids = { - d.object_id - for d in other_batch.get_dependencies(include_roots=True) - } - if len(other_batch_ids & skipped_or_ignored_ids) != 0: - other_batch.decision = decision - skipped_or_ignored_ids.update(other_batch_ids) - action = "Skipping" if decision == SyncDecision.SKIP else "Ignoring" - print( - f"\n{action} other batch with root {other_batch.root_type.__name__}\n" - ) - - -def get_sync_instructions_for_batch_items_for_add( - batch_diff: ObjectDiffBatch, - decision: SyncDecision, - share_private_objects: bool = False, - ask_for_input: bool = True, -) -> list[SyncInstruction]: - sync_decisions: list[SyncInstruction] = [] - - unpublished_private_high_diffs: list[ObjectDiff] = [] - for diff in batch_diff.get_dependents(include_roots=False): - is_high_private_object = ( - diff.high_obj is not None and diff.high_obj._has_private_sync_attrs() - ) - is_low_published_object = diff.low_node_uid in diff.low_storage_permissions - if is_high_private_object and not is_low_published_object: - unpublished_private_high_diffs.append(diff) - - user_codes_high: list[UserCode] = [ - diff.high_obj - for diff in batch_diff.get_dependencies(include_roots=True) - if isinstance(diff.high_obj, UserCode) - ] - - if len(user_codes_high) == 0: - user_code_high = None - else: - # NOTE we can always assume the first usercode is - # not a nested code, because diffs are sorted in depth-first order - user_code_high = user_codes_high[0] - - if user_code_high is None and len(unpublished_private_high_diffs): - raise ValueError("Found unpublished private objects without user code") - - if share_private_objects: - private_high_diffs_to_share = unpublished_private_high_diffs - elif ask_for_input: - private_high_diffs_to_share = ask_user_input_permission( - user_code_high, unpublished_private_high_diffs - ) - else: - private_high_diffs_to_share = [] - - for diff in batch_diff.get_dependencies(include_roots=False): - is_unpublished_private_diff = diff in unpublished_private_high_diffs - has_share_decision = diff in private_high_diffs_to_share - - if isinstance(diff.high_obj, Job): - if user_code_high is None: - raise ValueError("Job without user code") - # Jobs are always shared - new_permissions_low_side = [ - ActionObjectPermission( - uid=diff.object_id, - permission=ActionPermission.READ, - credentials=user_code_high.user_verify_key, - ) - ] - mockify = False - - elif is_unpublished_private_diff and has_share_decision: - # private + want to share - new_permissions_low_side = [ - ActionObjectPermission( - uid=diff.object_id, - permission=ActionPermission.READ, - credentials=user_code_high.user_verify_key, # type: ignore - ) - ] - mockify = False - - elif is_unpublished_private_diff and not has_share_decision: - # private + do not share - new_permissions_low_side = [] - mockify = True - - else: - # any other object is shared - new_permissions_low_side = [] - mockify = False - - new_storage_permissions_lowside = [] - if not mockify: - new_storage_permissions_lowside = [ - StoragePermission(uid=diff.object_id, node_uid=diff.low_node_uid) - ] - - if ( - diff.status == "NEW" - and diff.high_obj is None - and decision == SyncDecision.LOW - ): - new_storage_permissions_highside = [ - StoragePermission(uid=diff.object_id, node_uid=diff.high_node_uid) - ] - else: - new_storage_permissions_highside = [] - - sync_decisions.append( - SyncInstruction( - diff=diff, - decision=decision, - new_permissions_lowside=new_permissions_low_side, - new_storage_permissions_lowside=new_storage_permissions_lowside, - new_storage_permissions_highside=new_storage_permissions_highside, - mockify=mockify, - ) - ) - - return sync_decisions - - -QUESTION_SHARE_PRIVATE_OBJS = """You currently have the following private objects: - -{objects_str} - -Do you want to share some of these private objects? If so type the first 3 characters of the id e.g. 'abc'. -If you want to share all private objects, type "all". -If you dont want to share any more private objects, type "no". -""" - -CONFIRMATION_SHARE_PRIVATE_OBJ = """Setting permissions for {object_type} #{object_id} to share with {user_verify_key}, -this will become effective when you call client.apply_state()) -""" - - -def ask_user_input_permission( - user_code: UserCode, all_private_high_diffs: list[ObjectDiff] -) -> list[ObjectDiff]: - if len(all_private_high_diffs) == 0: - return [] - - user_verify_key = user_code.user_verify_key - private_high_diffs_to_share = [] - print( - f"""This batch of updates contains new private objects on the high side that you may want \ - to share with user {user_verify_key}.""" - ) - - remaining_private_high_diffs = all_private_high_diffs[:] - while len(remaining_private_high_diffs): - objects_str = "\n".join( - [ - f"{diff.object_type} #{diff.object_id}" - for diff in remaining_private_high_diffs - ] - ) - print(QUESTION_SHARE_PRIVATE_OBJS.format(objects_str=objects_str), flush=True) - - sleep(0.1) - res = input() - if res == "no": - break - - if res == "all": - private_high_diffs_to_share.extend(remaining_private_high_diffs) - remaining_private_high_diffs = [] - elif len(res) >= 3: - matches = [ - diff - for diff in remaining_private_high_diffs - if str(diff.object_id).startswith(res) - ] - if len(matches) == 0: - print("Invalid input") - continue - elif len(matches) == 1: - diff = matches[0] - print() - print("=" * 100) - print() - print( - CONFIRMATION_SHARE_PRIVATE_OBJ.format( - object_type=diff.object_type, - object_id=diff.object_id, - user_verify_key=user_verify_key, - ) - ) - - remaining_private_high_diffs.remove(diff) - private_high_diffs_to_share.append(diff) - - else: - print("Found multiple matches for provided id, exiting") - break - else: - print("invalid input") - - return private_high_diffs_to_share diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index ad0b6f7cd6e..7861ee422e0 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -66,6 +66,7 @@ from ..service.metadata.metadata_service import MetadataService from ..service.metadata.node_metadata import NodeMetadataV3 from ..service.network.network_service import NetworkService +from ..service.network.utils import PeerHealthCheckTask from ..service.notification.notification_service import NotificationService from ..service.notifier.notifier_service import NotifierService from ..service.object_search.migration_state_service import MigrateStateService @@ -321,6 +322,7 @@ def __init__( smtp_port: int | None = None, smtp_host: str | None = None, association_request_auto_approval: bool = False, + background_tasks: bool = False, ): # 🟡 TODO 22: change our ENV variable format and default init args to make this # less horrible or add some convenience functions @@ -407,6 +409,16 @@ def __init__( self.init_blob_storage(config=blob_storage_config) + context = AuthedServiceContext( + node=self, + credentials=self.verify_key, + role=ServiceRole.ADMIN, + ) + + self.peer_health_manager: PeerHealthCheckTask | None = None + if background_tasks: + self.run_peer_health_checks(context=context) + # Migrate data before any operation on db if migrate: self.find_and_migrate_data() @@ -457,7 +469,14 @@ def init_blob_storage(self, config: BlobStorageConfig | None = None) -> None: remote_profile.profile_name ] = remote_profile + def run_peer_health_checks(self, context: AuthedServiceContext) -> None: + self.peer_health_manager = PeerHealthCheckTask() + self.peer_health_manager.run(context=context) + def stop(self) -> None: + if self.peer_health_manager is not None: + self.peer_health_manager.stop() + for consumer_list in self.queue_manager.consumers.values(): for c in consumer_list: c.close() @@ -602,6 +621,7 @@ def named( migrate: bool = False, in_memory_workers: bool = True, association_request_auto_approval: bool = False, + background_tasks: bool = False, ) -> Self: uid = UID.with_seed(name) name_hash = hashlib.sha256(name.encode("utf8")).digest() @@ -630,6 +650,7 @@ def named( in_memory_workers=in_memory_workers, reset=reset, association_request_auto_approval=association_request_auto_approval, + background_tasks=background_tasks, ) def is_root(self, credentials: SyftVerifyKey) -> bool: @@ -973,9 +994,13 @@ def settings(self) -> NodeSettings: if self.signing_key is None: raise ValueError(f"{self} has no signing key") settings = settings_stash.get_all(self.signing_key.verify_key) + if settings.is_err(): + raise ValueError( + f"Cannot get node settings for '{self.name}'. Error: {settings.err()}" + ) if settings.is_ok() and len(settings.ok()) > 0: - settings_data = settings.ok()[0] - return settings_data + settings = settings.ok()[0] + return settings @property def metadata(self) -> NodeMetadataV3: @@ -1264,6 +1289,27 @@ def add_api_endpoint_execution_to_queue( None, ) + def get_worker_pool_ref_by_name( + self, credentials: SyftVerifyKey, worker_pool_name: str | None = None + ) -> LinkedObject | SyftError: + # If worker pool id is not set, then use default worker pool + # Else, get the worker pool for given uid + if worker_pool_name is None: + worker_pool = self.get_default_worker_pool() + else: + result = self.pool_stash.get_by_name(credentials, worker_pool_name) + if result.is_err(): + return SyftError(message=f"{result.err()}") + worker_pool = result.ok() + + # Create a Worker pool reference object + worker_pool_ref = LinkedObject.from_obj( + worker_pool, + service_type=SyftWorkerPoolService, + node_uid=self.id, + ) + return worker_pool_ref + def add_action_to_queue( self, action: Action, @@ -1287,23 +1333,11 @@ def add_action_to_queue( user_code = result.ok() worker_pool_name = user_code.worker_pool_name - # If worker pool id is not set, then use default worker pool - # Else, get the worker pool for given uid - if worker_pool_name is None: - worker_pool = self.get_default_worker_pool() - else: - result = self.pool_stash.get_by_name(credentials, worker_pool_name) - if result.is_err(): - return SyftError(message=f"{result.err()}") - worker_pool = result.ok() - - # Create a Worker pool reference object - worker_pool_ref = LinkedObject.from_obj( - worker_pool, - service_type=SyftWorkerPoolService, - node_uid=self.id, + worker_pool_ref = self.get_worker_pool_ref_by_name( + credentials, worker_pool_name ) - + if isinstance(worker_pool_ref, SyftError): + return worker_pool_ref queue_item = ActionQueueItem( id=task_uid, node_uid=self.id, @@ -1448,12 +1482,10 @@ def add_api_call_to_queue( else: worker_settings = WorkerSettings.from_node(node=self) - default_worker_pool = self.get_default_worker_pool() - worker_pool = LinkedObject.from_obj( - default_worker_pool, - service_type=SyftWorkerPoolService, - node_uid=self.id, - ) + worker_pool_ref = self.get_worker_pool_ref_by_name(credentials=credentials) + if isinstance(worker_pool_ref, SyftError): + return worker_pool_ref + queue_item = QueueItem( id=UID(), node_uid=self.id, @@ -1465,7 +1497,7 @@ def add_api_call_to_queue( method=method_str, args=unsigned_call.args, kwargs=unsigned_call.kwargs, - worker_pool=worker_pool, + worker_pool=worker_pool_ref, ) return self.add_queueitem_to_queue( queue_item, diff --git a/packages/syft/src/syft/node/server.py b/packages/syft/src/syft/node/server.py index 8c9b71559cb..f5f05bf35ac 100644 --- a/packages/syft/src/syft/node/server.py +++ b/packages/syft/src/syft/node/server.py @@ -79,6 +79,7 @@ def run_uvicorn( create_producer: bool, association_request_auto_approval: bool, n_consumers: int, + background_tasks: bool, ) -> None: async def _run_uvicorn( name: str, @@ -112,6 +113,7 @@ async def _run_uvicorn( create_producer=create_producer, n_consumers=n_consumers, association_request_auto_approval=association_request_auto_approval, + background_tasks=background_tasks, ) else: worker = worker_class( @@ -127,6 +129,7 @@ async def _run_uvicorn( create_producer=create_producer, n_consumers=n_consumers, association_request_auto_approval=association_request_auto_approval, + background_tasks=background_tasks, ) router = make_routes(worker=worker) app = make_app(worker.name, router=router) @@ -186,6 +189,7 @@ def serve_node( create_producer: bool = False, n_consumers: int = 0, association_request_auto_approval: bool = False, + background_tasks: bool = False, ) -> tuple[Callable, Callable]: server_process = multiprocessing.Process( target=run_uvicorn, @@ -204,6 +208,7 @@ def serve_node( "create_producer": create_producer, "n_consumers": n_consumers, "association_request_auto_approval": association_request_auto_approval, + "background_tasks": background_tasks, }, ) diff --git a/packages/syft/src/syft/orchestra.py b/packages/syft/src/syft/orchestra.py index ffa2fe077c9..1a08f594aa2 100644 --- a/packages/syft/src/syft/orchestra.py +++ b/packages/syft/src/syft/orchestra.py @@ -165,6 +165,7 @@ def deploy_to_python( create_producer: bool = False, queue_port: int | None = None, association_request_auto_approval: bool = False, + background_tasks: bool = False, ) -> NodeHandle: worker_classes = { NodeType.DOMAIN: Domain, @@ -191,6 +192,7 @@ def deploy_to_python( "n_consumers": n_consumers, "create_producer": create_producer, "association_request_auto_approval": association_request_auto_approval, + "background_tasks": background_tasks, } if port: @@ -279,6 +281,7 @@ def launch( create_producer: bool = False, queue_port: int | None = None, association_request_auto_approval: bool = False, + background_tasks: bool = False, ) -> NodeHandle: if dev_mode is True: thread_workers = True @@ -314,6 +317,7 @@ def launch( create_producer=create_producer, queue_port=queue_port, association_request_auto_approval=association_request_auto_approval, + background_tasks=background_tasks, ) elif deployment_type_enum == DeploymentType.REMOTE: return deploy_to_remote( diff --git a/packages/syft/src/syft/protocol/protocol_version.json b/packages/syft/src/syft/protocol/protocol_version.json index ca4d715a57b..e30f48dfd5a 100644 --- a/packages/syft/src/syft/protocol/protocol_version.json +++ b/packages/syft/src/syft/protocol/protocol_version.json @@ -153,7 +153,7 @@ "NodePeer": { "3": { "version": 3, - "hash": "dababb03d2463b6218ae22d55293a60580f5a14bebd0c664d71da104e2f0b835", + "hash": "ec0e39fc77ddb542558519d6a1f7c55f41cc037b6312792333792a04feea57e6", "action": "add" } }, diff --git a/packages/syft/src/syft/service/code/user_code.py b/packages/syft/src/syft/service/code/user_code.py index 81ba861296d..e68436a47b0 100644 --- a/packages/syft/src/syft/service/code/user_code.py +++ b/packages/syft/src/syft/service/code/user_code.py @@ -770,15 +770,30 @@ def add_output_policy_ids(cls, values: Any) -> Any: def kwargs(self) -> dict[Any, Any] | None: return self.input_policy_init_kwargs - def __call__(self, *args: Any, syft_no_node: bool = False, **kwargs: Any) -> Any: + def __call__( + self, + *args: Any, + syft_no_node: bool = False, + blocking: bool = False, + time_alive: int | None = None, + n_consumers: int = 2, + **kwargs: Any, + ) -> Any: if syft_no_node: return self.local_call(*args, **kwargs) - return self._ephemeral_node_call(*args, **kwargs) + return self._ephemeral_node_call( + *args, + time_alive=time_alive, + n_consumers=n_consumers, + blocking=blocking, + **kwargs, + ) def local_call(self, *args: Any, **kwargs: Any) -> Any: # only run this on the client side if self.local_function: - tree = ast.parse(inspect.getsource(self.local_function)) + source = dedent(inspect.getsource(self.local_function)) + tree = ast.parse(source) # check there are no globals v = GlobalsVisitor() @@ -803,9 +818,10 @@ def local_call(self, *args: Any, **kwargs: Any) -> Any: def _ephemeral_node_call( self, - time_alive: int | None = None, - n_consumers: int | None = None, *args: Any, + time_alive: int | None = None, + n_consumers: int = 2, + blocking: bool = False, **kwargs: Any, ) -> Any: # relative @@ -814,15 +830,7 @@ def _ephemeral_node_call( # Right now we only create a number of workers # In the future we might need to have the same pools/images as well - if n_consumers is None: - print( - SyftInfo( - message="Creating a node with n_consumers=2 (the default value)" - ) - ) - n_consumers = 2 - - if time_alive is None and "blocking" in kwargs and not kwargs["blocking"]: + if time_alive is None and not blocking: print( SyftInfo( message="Closing the node after time_alive=300 (the default value)" diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py index b9c8b9426cb..ae685c34b39 100644 --- a/packages/syft/src/syft/service/dataset/dataset.py +++ b/packages/syft/src/syft/service/dataset/dataset.py @@ -2,6 +2,7 @@ from collections.abc import Callable from datetime import datetime from enum import Enum +import textwrap from typing import Any # third party @@ -37,7 +38,6 @@ from ...util.notebook_ui.icons import Icon from ...util.notebook_ui.styles import FONT_CSS from ...util.notebook_ui.styles import ITABLES_CSS -from ...util.util import get_mb_size from ..data_subject.data_subject import DataSubject from ..data_subject.data_subject import DataSubjectCreate from ..data_subject.data_subject_service import DataSubjectService @@ -45,9 +45,6 @@ from ..response import SyftException from ..response import SyftSuccess -DATA_SIZE_WARNING_LIMIT = 512 - - NamePartitionKey = PartitionKey(key="name", type_=str) @@ -329,8 +326,10 @@ class CreateAsset(SyftObject): __repr_attrs__ = ["name"] model_config = ConfigDict(validate_assignment=True) - def __init__(self, description: str | None = "", **data: Any) -> None: - super().__init__(**data, description=MarkdownDescription(text=str(description))) + def __init__(self, description: str | None = None, **data: Any) -> None: + if isinstance(description, str): + description = MarkdownDescription(text=description) + super().__init__(**data, description=description) @model_validator(mode="after") def __mock_is_real_for_empty_mock_must_be_false(self) -> Self: @@ -408,13 +407,6 @@ def check(self) -> SyftSuccess | SyftError: # return SyftError( # message=f"set_obj shape {data_shape} must match set_mock shape {mock_shape}" # ) - total_size_mb = get_mb_size(self.data) + get_mb_size(self.mock) - if total_size_mb > DATA_SIZE_WARNING_LIMIT: - print( - f"**WARNING**: The total size for asset: '{self.name}' exceeds '{DATA_SIZE_WARNING_LIMIT} MB'. " - "This might result in failure to upload dataset. " - "Please contact #support on OpenMined slack for further assistance.", - ) return SyftSuccess(message="Dataset is Valid") @@ -522,32 +514,15 @@ def action_ids(self) -> list[UID]: def assets(self) -> DictTuple[str, Asset]: return DictTuple((asset.name, asset) for asset in self.asset_list) - def _old_repr_markdown_(self) -> str: - _repr_str = f"Syft Dataset: {self.name}\n" - _repr_str += "Assets:\n" - for asset in self.asset_list: - if asset.description is not None: - _repr_str += f"\t{asset.name}: {asset.description.text}\n\n" - else: - _repr_str += f"\t{asset.name}\n\n" - if self.citation: - _repr_str += f"Citation: {self.citation}\n" - if self.url: - _repr_str += f"URL: {self.url}\n" - if self.description: - _repr_str += f"Description: {self.description.text}\n" - return as_markdown_python_code(_repr_str) - def _repr_markdown_(self, wrap_as_python: bool = True, indent: int = 0) -> str: - # return self._old_repr_markdown_() - return self._markdown_() - - def _markdown_(self) -> str: _repr_str = f"Syft Dataset: {self.name}\n\n" _repr_str += "Assets:\n\n" for asset in self.asset_list: if asset.description is not None: - _repr_str += f"\t{asset.name}: {asset.description.text}\n\n" + description_text = textwrap.shorten( + asset.description.text, width=100, placeholder="..." + ) + _repr_str += f"\t{asset.name}: {description_text}\n\n" else: _repr_str += f"\t{asset.name}\n\n" if self.citation: diff --git a/packages/syft/src/syft/service/job/job_service.py b/packages/syft/src/syft/service/job/job_service.py index 6ad8b0b11cc..323dff99ae9 100644 --- a/packages/syft/src/syft/service/job/job_service.py +++ b/packages/syft/src/syft/service/job/job_service.py @@ -1,4 +1,7 @@ # stdlib +from collections.abc import Callable +import inspect +import time from typing import Any from typing import cast @@ -28,6 +31,18 @@ from .job_stash import JobStatus +def wait_until( + predicate: Callable[[], bool], timeout: int = 10 +) -> SyftSuccess | SyftError: + start = time.time() + code_string = inspect.getsource(predicate).strip() + while time.time() - start < timeout: + if predicate(): + return SyftSuccess(message=f"Predicate {code_string} is True") + time.sleep(1) + return SyftError(message=f"Timeout reached for predicate {code_string}") + + @instrument @serializable() class JobService(AbstractService): @@ -112,16 +127,31 @@ def get_by_result_id( def restart( self, context: AuthedServiceContext, uid: UID ) -> SyftSuccess | SyftError: - res = self.stash.get_by_uid(context.credentials, uid=uid) - if res.is_err(): - return SyftError(message=res.err()) + job_or_err = self.stash.get_by_uid(context.credentials, uid=uid) + if job_or_err.is_err(): + return SyftError(message=job_or_err.err()) + if job_or_err.ok() is None: + return SyftError(message="Job not found") + + job = job_or_err.ok() + if job.parent_job_id is not None: + return SyftError( + message="Not possible to restart subjobs. Please restart the parent job." + ) + if job.status == JobStatus.PROCESSING: + return SyftError( + message="Jobs in progress cannot be restarted. " + "Please wait for completion or cancel the job via .cancel() to proceed." + ) - job = res.ok() job.status = JobStatus.CREATED self.update(context=context, job=job) task_uid = UID() worker_settings = WorkerSettings.from_node(context.node) + worker_pool_ref = context.node.get_worker_pool_ref_by_name(context.credentials) + if isinstance(worker_pool_ref, SyftError): + return worker_pool_ref queue_item = ActionQueueItem( id=task_uid, @@ -132,6 +162,7 @@ def restart( worker_settings=worker_settings, args=[], kwargs={"action": job.action}, + worker_pool=worker_pool_ref, ) context.node.queue_stash.set_placeholder(context.credentials, queue_item) @@ -139,8 +170,8 @@ def restart( log_service = context.node.get_service("logservice") result = log_service.restart(context, job.log_id) - if result.is_err(): - return SyftError(message=str(result.err())) + if isinstance(result, SyftError): + return result return SyftSuccess(message="Great Success!") @@ -158,28 +189,62 @@ def update( res = res.ok() return SyftSuccess(message="Great Success!") + def _kill(self, context: AuthedServiceContext, job: Job) -> SyftSuccess | SyftError: + # set job and subjobs status to TERMINATING + # so that MonitorThread can kill them + job.status = JobStatus.TERMINATING + res = self.stash.update(context.credentials, obj=job) + results = [res] + + # attempt to kill all subjobs + subjobs_or_err = self.stash.get_by_parent_id(context.credentials, uid=job.id) + if subjobs_or_err.is_ok() and subjobs_or_err.ok() is not None: + subjobs = subjobs_or_err.ok() + for subjob in subjobs: + subjob.status = JobStatus.TERMINATING + res = self.stash.update(context.credentials, obj=subjob) + results.append(res) + + errors = [res.err() for res in results if res.is_err()] + if errors: + return SyftError(message=f"Failed to kill job: {errors}") + + # wait for job and subjobs to be killed by MonitorThread + wait_until(lambda: job.fetched_status == JobStatus.INTERRUPTED) + wait_until( + lambda: all( + subjob.fetched_status == JobStatus.INTERRUPTED for subjob in job.subjobs + ) + ) + + return SyftSuccess(message="Job killed successfully!") + @service_method( path="job.kill", name="kill", roles=DATA_SCIENTIST_ROLE_LEVEL, ) def kill(self, context: AuthedServiceContext, id: UID) -> SyftSuccess | SyftError: - res = self.stash.get_by_uid(context.credentials, uid=id) - if res.is_err(): - return SyftError(message=res.err()) + job_or_err = self.stash.get_by_uid(context.credentials, uid=id) + if job_or_err.is_err(): + return SyftError(message=job_or_err.err()) + if job_or_err.ok() is None: + return SyftError(message="Job not found") - job = res.ok() - if job.job_pid is not None and job.status == JobStatus.PROCESSING: - job.status = JobStatus.INTERRUPTED - res = self.stash.update(context.credentials, obj=job) - if res.is_err(): - return SyftError(message=res.err()) - return SyftSuccess(message="Job killed successfully!") - else: + job = job_or_err.ok() + if job.parent_job_id is not None: return SyftError( - message="Job is not running or isn't running in multiprocessing mode." - "Killing threads is currently not supported" + message="Not possible to cancel subjobs. To stop execution, please cancel the parent job." ) + if job.status != JobStatus.PROCESSING: + return SyftError(message="Job is not running") + if job.job_pid is None: + return SyftError( + message="Job termination disabled in dev mode. " + "Set 'dev_mode=False' or 'thread_workers=False' to enable." + ) + + return self._kill(context, job) @service_method( path="job.get_subjobs", diff --git a/packages/syft/src/syft/service/job/job_stash.py b/packages/syft/src/syft/service/job/job_stash.py index 0171af347aa..d7aa3aca00b 100644 --- a/packages/syft/src/syft/service/job/job_stash.py +++ b/packages/syft/src/syft/service/job/job_stash.py @@ -54,6 +54,7 @@ class JobStatus(str, Enum): PROCESSING = "processing" ERRORED = "errored" COMPLETED = "completed" + TERMINATING = "terminating" INTERRUPTED = "interrupted" @@ -254,47 +255,26 @@ def apply_info(self, info: "JobInfo") -> None: self.result = info.result def restart(self, kill: bool = False) -> None: - if kill: - self.kill() - self.fetch() - if not self.has_parent: - # this is currently the limitation, we will need to implement - # killing toplevel jobs later - print("Can only kill nested jobs") - elif kill or ( - self.status != JobStatus.PROCESSING and self.status != JobStatus.CREATED - ): - api = APIRegistry.api_for( - node_uid=self.syft_node_location, - user_verify_key=self.syft_client_verify_key, - ) - if api is None: - raise ValueError( - f"Can't access Syft API. You must login to {self.syft_node_location}" - ) - call = SyftAPICall( - node_uid=self.node_uid, - path="job.restart", - args=[], - kwargs={"uid": self.id}, - blocking=True, - ) - - api.make_call(call) - else: - print( - "Job is running or scheduled, if you want to kill it use job.kill() first" + api = APIRegistry.api_for( + node_uid=self.syft_node_location, + user_verify_key=self.syft_client_verify_key, + ) + if api is None: + raise ValueError( + f"Can't access Syft API. You must login to {self.syft_node_location}" ) - return None + call = SyftAPICall( + node_uid=self.node_uid, + path="job.restart", + args=[], + kwargs={"uid": self.id}, + blocking=True, + ) + res = api.make_call(call) + self.fetch() + return res def kill(self) -> SyftError | SyftSuccess: - if self.status != JobStatus.PROCESSING: - return SyftError(message="Job is not running") - if self.job_pid is None: - return SyftError( - message="Job termination disabled in dev mode. " - "Set 'dev_mode=False' or 'thread_workers=False' to enable." - ) api = APIRegistry.api_for( node_uid=self.syft_node_location, user_verify_key=self.syft_client_verify_key, @@ -310,8 +290,9 @@ def kill(self) -> SyftError | SyftSuccess: kwargs={"id": self.id}, blocking=True, ) - api.make_call(call) - return SyftSuccess(message="Job is killed successfully!") + res = api.make_call(call) + self.fetch() + return res def fetch(self) -> None: api = APIRegistry.api_for( @@ -329,7 +310,9 @@ def fetch(self) -> None: kwargs={"uid": self.id}, blocking=True, ) - job: Job = api.make_call(call) + job: Job | None = api.make_call(call) + if job is None: + return self.resolved = job.resolved if job.resolved: self.result = job.result @@ -532,6 +515,11 @@ def _repr_markdown_(self, wrap_as_python: bool = True, indent: int = 0) -> str: """ return as_markdown_code(md) + @property + def fetched_status(self) -> JobStatus: + self.fetch() + return self.status + @property def requesting_user(self) -> UserView | SyftError: api = APIRegistry.api_for( diff --git a/packages/syft/src/syft/service/network/association_request.py b/packages/syft/src/syft/service/network/association_request.py index 70c08a52e56..94f99695392 100644 --- a/packages/syft/src/syft/service/network/association_request.py +++ b/packages/syft/src/syft/service/network/association_request.py @@ -36,6 +36,7 @@ def _run( from .network_service import NetworkService if not apply: + # TODO: implement undo for AssociationRequestChange return Err( SyftError(message="Undo not supported for AssociationRequestChange") ) diff --git a/packages/syft/src/syft/service/network/network_service.py b/packages/syft/src/syft/service/network/network_service.py index fd937f8491f..10074353146 100644 --- a/packages/syft/src/syft/service/network/network_service.py +++ b/packages/syft/src/syft/service/network/network_service.py @@ -1,5 +1,6 @@ # stdlib from collections.abc import Callable +from enum import Enum import secrets from typing import Any @@ -34,6 +35,7 @@ from ..data_subject.data_subject import NamePartitionKey from ..metadata.node_metadata import NodeMetadataV3 from ..request.request import Request +from ..request.request import RequestStatus from ..request.request import SubmitRequest from ..request.request_service import RequestService from ..response import SyftError @@ -58,6 +60,13 @@ OrderByNamePartitionKey = PartitionKey(key="name", type_=str) +@serializable() +class NodePeerAssociationStatus(Enum): + PEER_ASSOCIATED = "PEER_ASSOCIATED" + PEER_ASSOCIATION_PENDING = "PEER_ASSOCIATION_PENDING" + PEER_NOT_FOUND = "PEER_NOT_FOUND" + + @instrument @serializable() class NetworkStash(BaseUIDStoreStash): @@ -155,8 +164,10 @@ def exchange_credentials_with( self_node_route: NodeRoute, remote_node_route: NodeRoute, remote_node_verify_key: SyftVerifyKey, - ) -> SyftSuccess | SyftError: - """Exchange Route With Another Node""" + ) -> Request | SyftSuccess | SyftError: + """ + Exchange Route With Another Node. If there is a pending association request, return it + """ # Step 1: Validate the Route self_node_peer = self_node_route.validate_with_context(context=context) @@ -170,9 +181,74 @@ def exchange_credentials_with( remote_client: SyftClient = remote_node_route.client_with_context( context=context ) - random_challenge = secrets.token_bytes(16) + remote_node_peer = NodePeer.from_client(remote_client) + + # check locally if the remote node already exists as a peer + existing_peer_result = self.stash.get_by_uid( + context.node.verify_key, remote_node_peer.id + ) + if ( + existing_peer_result.is_ok() + and (existing_peer := existing_peer_result.ok()) is not None + ): + msg = [ + ( + f"{existing_peer.node_type} peer '{existing_peer.name}' already exist for " + f"{self_node_peer.node_type} '{self_node_peer.name}'." + ) + ] + if existing_peer != remote_node_peer: + result = self.stash.create_or_update_peer( + context.node.verify_key, + remote_node_peer, + ) + msg.append( + f"{existing_peer.node_type} peer '{existing_peer.name}' information change detected." + ) + if result.is_err(): + msg.append( + f"Attempt to update peer '{existing_peer.name}' information failed." + ) + return SyftError(message="\n".join(msg)) + msg.append( + f"{existing_peer.node_type} peer '{existing_peer.name}' information successfully updated." + ) + + # Also check remotely if the self node already exists as a peer + remote_self_node_peer = remote_client.api.services.network.get_peer_by_name( + name=self_node_peer.name + ) + if isinstance(remote_self_node_peer, NodePeer): + msg.append( + f"{self_node_peer.node_type} '{self_node_peer.name}' already exist " + f"as a peer for {remote_node_peer.node_type} '{remote_node_peer.name}'." + ) + if remote_self_node_peer != self_node_peer: + result = remote_client.api.services.network.update_peer( + peer=self_node_peer, + ) + msg.append( + f"{self_node_peer.node_type} peer '{self_node_peer.name}' information change detected." + ) + if isinstance(result, SyftError): + msg.apnpend( + f"Attempt to remotely update {self_node_peer.node_type} peer " + f"'{self_node_peer.name}' information remotely failed." + ) + return SyftError(message="\n".join(msg)) + msg.append( + f"{self_node_peer.node_type} peer '{self_node_peer.name}' " + f"information successfully updated." + ) + msg.append( + f"Routes between {remote_node_peer.node_type} '{remote_node_peer.name}' and " + f"{self_node_peer.node_type} '{self_node_peer.name}' already exchanged." + ) + return SyftSuccess(message="\n".join(msg)) - # ask the remote client to add this node (represented by `self_node_peer`) as a peer + # If peer does not exist, ask the remote client to add this node + # (represented by `self_node_peer`) as a peer + random_challenge = secrets.token_bytes(16) remote_res = remote_client.api.services.network.add_peer( peer=self_node_peer, challenge=random_challenge, @@ -185,15 +261,13 @@ def exchange_credentials_with( association_request_approved = not isinstance(remote_res, Request) - remote_node_peer = NodePeer.from_client(remote_client) - # save the remote peer for later result = self.stash.create_or_update_peer( context.node.verify_key, remote_node_peer, ) if result.is_err(): - return SyftError(message=str(result.err())) + return SyftError(message="Failed to update route information.") return ( SyftSuccess(message="Routes Exchanged") @@ -209,7 +283,7 @@ def add_peer( challenge: bytes, self_node_route: NodeRoute, verify_key: SyftVerifyKey, - ) -> list | SyftError: + ) -> Request | SyftSuccess | SyftError: """Add a Network Node Peer. Called by a remote node to add itself as a peer for the current node. """ @@ -228,19 +302,55 @@ def add_peer( message="verify_key does not match the remote node's verify_key for add_peer" ) + # check if the peer already is a node peer + existing_peer_res = self.stash.get_by_uid(context.node.verify_key, peer.id) + if existing_peer_res.is_err(): + return SyftError( + message=f"Failed to query peer from stash: {existing_peer_res.err()}" + ) + + if isinstance(existing_peer := existing_peer_res.ok(), NodePeer): + msg = [ + f"The peer '{peer.name}' is already associated with '{context.node.name}'" + ] + + if existing_peer != peer: + result = self.stash.create_or_update_peer( + context.node.verify_key, + peer, + ) + msg.append("Peer information change detected.") + + if result.is_err(): + msg.append("Attempt to update peer information failed.") + return SyftError(message="\n".join(msg)) + + msg.append("Peer information successfully updated.") + return SyftSuccess(message="\n".join(msg)) + + return SyftSuccess(message="\n".join(msg)) + + # check if the peer already submitted an association request + association_requests: list[Request] = self._get_association_requests_by_peer_id( + context=context, peer_id=peer.id + ) + if ( + association_requests + and (association_request := association_requests[-1]).status + == RequestStatus.PENDING + ): + return association_request + # only create and submit a new request if there is no requests yet + # or all previous requests have been rejected association_request_change = AssociationRequestChange( self_node_route=self_node_route, challenge=challenge, remote_peer=peer ) - submit_request = SubmitRequest( changes=[association_request_change], requesting_user_verify_key=context.credentials, ) - request_submit_method = context.node.get_service_method(RequestService.submit) - request = request_submit_method(context, submit_request) - if ( isinstance(request, Request) and context.node.settings.association_request_auto_approval @@ -269,6 +379,38 @@ def ping( return challenge_signature + @service_method( + path="network.check_peer_association", + name="check_peer_association", + roles=GUEST_ROLE_LEVEL, + ) + def check_peer_association( + self, context: AuthedServiceContext, peer_id: UID + ) -> NodePeerAssociationStatus | SyftError: + """Check if a peer exists in the network stash""" + + # get the node peer for the given sender peer_id + peer = self.stash.get_by_uid(context.node.verify_key, peer_id) + if err := peer.is_err(): + return SyftError(message=f"Failed to query peer from stash. Err: {err}") + + if isinstance(peer.ok(), NodePeer): + return NodePeerAssociationStatus.PEER_ASSOCIATED + + if peer.ok() is None: # peer is either pending or not found + association_requests: list[Request] = ( + self._get_association_requests_by_peer_id( + context=context, peer_id=peer_id + ) + ) + if ( + association_requests + and association_requests[-1].status == RequestStatus.PENDING + ): + return NodePeerAssociationStatus.PEER_ASSOCIATION_PENDING + + return NodePeerAssociationStatus.PEER_NOT_FOUND + @service_method( path="network.get_all_peers", name="get_all_peers", roles=GUEST_ROLE_LEVEL ) @@ -322,6 +464,26 @@ def get_peers_by_type( # Return peers or an empty list when result is None return result.ok() or [] + @service_method( + path="network.update_peer", name="update_peer", roles=GUEST_ROLE_LEVEL + ) + def update_peer( + self, + context: AuthedServiceContext, + peer: NodePeer, + ) -> SyftSuccess | SyftError: + result = self.stash.update( + credentials=context.node.verify_key, + peer=peer, + ) + if result.is_err(): + return SyftError( + message=f"Failed to update peer '{peer.name}'. Error: {result.err()}" + ) + return SyftSuccess( + message=f"Peer '{result.ok().name}' information successfully updated." + ) + @service_method( path="network.delete_peer_by_id", name="delete_peer_by_id", @@ -332,11 +494,22 @@ def delete_peer_by_id( ) -> SyftSuccess | SyftError: """Delete Node Peer""" result = self.stash.delete_by_uid(context.credentials, uid) - if result.is_err(): - return SyftError(message=str(result.err())) + if err := result.is_err(): + return SyftError(message=f"Failed to delete peer with UID {uid}: {err}.") + # Delete all the association requests from this peer + association_requests: list[Request] = self._get_association_requests_by_peer_id( + context=context, peer_id=uid + ) + for request in association_requests: + request_delete_method = context.node.get_service_method( + RequestService.delete_by_uid + ) + res = request_delete_method(context, request.id) + if isinstance(res, SyftError): + return res # TODO: Notify the peer (either by email or by other form of notifications) # that it has been deleted from the network - return SyftSuccess(message=f"Node Peer with id {uid} Deleted") + return SyftSuccess(message=f"Node Peer with id {uid} deleted.") @service_method(path="network.add_route_on_peer", name="add_route_on_peer") def add_route_on_peer( @@ -410,6 +583,11 @@ def add_route( return remote_node_peer # add and update the priority for the peer existed_route: NodeRoute | None = remote_node_peer.update_route(route) + if existed_route: + return SyftSuccess( + message=f"The route already exists between '{context.node.name}' and " + f"peer '{remote_node_peer.name}' with id '{existed_route.id}'." + ) # update the peer in the store with the updated routes result = self.stash.update( credentials=context.node.verify_key, @@ -417,11 +595,6 @@ def add_route( ) if result.is_err(): return SyftError(message=str(result.err())) - if existed_route: - return SyftSuccess( - message=f"The route already exists between '{context.node.name}' and " - f"peer '{remote_node_peer.name}' with id '{existed_route.id}', so its priority was updated" - ) return SyftSuccess( message=f"New route ({str(route)}) with id '{route.id}' " f"to peer {remote_node_peer.node_type.value} '{remote_node_peer.name}' " @@ -704,6 +877,29 @@ def _get_remote_node_peer_by_verify_key( ) return remote_node_peer + def _get_association_requests_by_peer_id( + self, context: AuthedServiceContext, peer_id: UID + ) -> list[Request]: + """ + Get all the association requests from a peer. The association requests are sorted by request_time. + """ + request_get_all_method: Callable = context.node.get_service_method( + RequestService.get_all + ) + all_requests: list[Request] = request_get_all_method(context) + association_requests: list[Request] = [] + for request in all_requests: + for change in request.changes: + if ( + isinstance(change, AssociationRequestChange) + and change.remote_peer.id == peer_id + ): + association_requests.append(request) + + return sorted( + association_requests, key=lambda request: request.request_time.utc_timestamp + ) + TYPE_TO_SERVICE[NodePeer] = NetworkService SERVICE_TO_TYPES[NetworkService].update({NodePeer}) diff --git a/packages/syft/src/syft/service/network/node_peer.py b/packages/syft/src/syft/service/network/node_peer.py index 70e6f9bfb40..35292dd89dd 100644 --- a/packages/syft/src/syft/service/network/node_peer.py +++ b/packages/syft/src/syft/service/network/node_peer.py @@ -1,5 +1,6 @@ # stdlib from collections.abc import Callable +from enum import Enum # third party from result import Err @@ -14,6 +15,7 @@ from ...node.credentials import SyftVerifyKey from ...serde.serializable import serializable from ...service.response import SyftError +from ...types.datetime import DateTime from ...types.syft_migration import migrate from ...types.syft_object import SYFT_OBJECT_VERSION_2 from ...types.syft_object import SYFT_OBJECT_VERSION_3 @@ -32,6 +34,13 @@ from .routes import route_to_connection +@serializable() +class NodePeerConnectionStatus(Enum): + ACTIVE = "ACTIVE" + INACTIVE = "INACTIVE" + TIMEOUT = "TIMEOUT" + + @serializable() class NodePeerV2(SyftObject): # version @@ -58,7 +67,14 @@ class NodePeer(SyftObject): __attr_searchable__ = ["name", "node_type"] __attr_unique__ = ["verify_key"] - __repr_attrs__ = ["name", "node_type", "admin_email"] + __repr_attrs__ = [ + "name", + "node_type", + "admin_email", + "ping_status.value", + "ping_status_message", + "pinged_timestamp", + ] id: UID | None = None # type: ignore[assignment] name: str @@ -66,6 +82,9 @@ class NodePeer(SyftObject): node_routes: list[NodeRouteType] = [] node_type: NodeType admin_email: str + ping_status: NodePeerConnectionStatus | None = None + ping_status_message: str | None = None + pinged_timestamp: DateTime | None = None def existed_route( self, route: NodeRouteType | None = None, route_id: UID | None = None @@ -112,24 +131,24 @@ def assign_highest_priority(self, route: NodeRoute) -> NodeRoute: route.priority = current_max_priority + 1 return route - def update_route(self, new_route: NodeRoute) -> NodeRoute | None: + def update_route(self, route: NodeRoute) -> NodeRoute | None: """ Update the route for the node. - If the route already exists, updates the priority of the existing route. - If it doesn't, it append the new route to the peer's list of node routes. + If the route already exists, return it. + If the route is new, assign it to have the highest priority + before appending it to the peer's list of node routes. Args: - new_route (NodeRoute): The new route to be added to the node. + route (NodeRoute): The new route to be added to the peer. Returns: NodeRoute | None: if the route already exists, return it, else returns None """ - new_route = self.assign_highest_priority(new_route) - existed, index = self.existed_route(new_route) - if existed and index is not None: - self.node_routes[index].priority = new_route.priority - return self.node_routes[index] + existed, _ = self.existed_route(route) + if existed: + return route else: + new_route = self.assign_highest_priority(route) self.node_routes.append(new_route) return None diff --git a/packages/syft/src/syft/service/network/utils.py b/packages/syft/src/syft/service/network/utils.py new file mode 100644 index 00000000000..c9e98da6179 --- /dev/null +++ b/packages/syft/src/syft/service/network/utils.py @@ -0,0 +1,126 @@ +# stdlib +import threading +import time +from typing import cast + +# third party +from loguru import logger + +# relative +from ...serde.serializable import serializable +from ...types.datetime import DateTime +from ..context import AuthedServiceContext +from ..response import SyftError +from .network_service import NetworkService +from .network_service import NodePeerAssociationStatus +from .node_peer import NodePeer +from .node_peer import NodePeerConnectionStatus + + +@serializable(without=["thread"]) +class PeerHealthCheckTask: + repeat_time = 10 # in seconds + + def __init__(self) -> None: + self.thread: threading.Thread | None = None + self.started_time = None + self._stop = False + + def peer_route_heathcheck(self, context: AuthedServiceContext) -> SyftError | None: + """ + Perform a health check on the peers in the network stash. + - If peer is accessible, ping the peer. + - Peer is connected to the network. + + Args: + context (AuthedServiceContext): The authenticated service context. + + Returns: + None + """ + + network_service = cast(NetworkService, context.node.get_service(NetworkService)) + network_stash = network_service.stash + + result = network_stash.get_all(context.node.verify_key) + + if result.is_err(): + logger.error(f"Failed to fetch peers from stash: {result.err()}") + return SyftError(message=f"{result.err()}") + + all_peers: list[NodePeer] = result.ok() + + for peer in all_peers: + peer.pinged_timestamp = DateTime.now() + try: + peer_client = peer.client_with_context(context=context) + if peer_client.is_err(): + logger.error( + f"Failed to create client for peer: {peer}: {peer_client.err()}" + ) + peer.ping_status = NodePeerConnectionStatus.TIMEOUT + peer_client = None + except Exception as e: + logger.error( + f"Failed to create client for peer: {peer} with exception {e}" + ) + peer.ping_status = NodePeerConnectionStatus.TIMEOUT + peer_client = None + + if peer_client is not None: + peer_client = peer_client.ok() + peer_status = peer_client.api.services.network.check_peer_association( + peer_id=context.node.id + ) + peer.ping_status = ( + NodePeerConnectionStatus.ACTIVE + if peer_status == NodePeerAssociationStatus.PEER_ASSOCIATED + else NodePeerConnectionStatus.INACTIVE + ) + if isinstance(peer_status, SyftError): + peer.ping_status_message = ( + f"Error `{peer_status.message}` when pinging peer '{peer.name}'" + ) + else: + peer.ping_status_message = f"Peer '{peer.name}''s ping status: {peer.ping_status.value.lower()}" + + result = network_stash.update( + credentials=context.node.verify_key, + peer=peer, + has_permission=True, + ) + + if result.is_err(): + logger.info(f"Failed to update peer in stash: {result.err()}") + + return None + + def _run(self, context: AuthedServiceContext) -> None: + self.started_time = DateTime.now() + while True: + if self._stop: + break + self.peer_route_heathcheck(context) + time.sleep(self.repeat_time) + + def run(self, context: AuthedServiceContext) -> None: + if self.thread is not None: + logger.info( + f"Peer health check task is already running in thread " + f"{self.thread.name} with ID: {self.thread.ident}." + ) + else: + self.thread = threading.Thread(target=self._run, args=(context,)) + logger.info( + f"Start running peers health check in thread " + f"{self.thread.name} with ID: {self.thread.ident}." + ) + self.thread.start() + + def stop(self) -> None: + if self.thread: + self._stop = True + self.thread.join() + self.thread = None + self.started_time = None + logger.info("Peer health check task stopped.") diff --git a/packages/syft/src/syft/service/project/project.py b/packages/syft/src/syft/service/project/project.py index aa8048f788e..d9b84ef9f15 100644 --- a/packages/syft/src/syft/service/project/project.py +++ b/packages/syft/src/syft/service/project/project.py @@ -38,6 +38,7 @@ from ...types.uid import UID from ...util import options from ...util.colors import SURFACE +from ...util.decorators import deprecated from ...util.markdown import markdown_as_class_with_fields from ...util.util import full_name_with_qualname from ..code.user_code import SubmitUserCode @@ -1261,7 +1262,13 @@ def create_code_request( reason=reason, ) + @deprecated( + reason="Project.start has been renamed to Project.send", return_syfterror=True + ) def start(self, return_all_projects: bool = False) -> Project | list[Project]: + return self.send(return_all_projects=return_all_projects) + + def send(self, return_all_projects: bool = False) -> Project | list[Project]: # Currently we are assuming that the first member is the leader # This would be changed in our future leaderless approach leader = self.clients[0] diff --git a/packages/syft/src/syft/service/queue/queue.py b/packages/syft/src/syft/service/queue/queue.py index fcf5cd2b397..968e4b7c975 100644 --- a/packages/syft/src/syft/service/queue/queue.py +++ b/packages/syft/src/syft/service/queue/queue.py @@ -1,14 +1,16 @@ # stdlib +from multiprocessing import Process import threading +from threading import Thread import time from typing import Any from typing import cast # third party +from loguru import logger import psutil from result import Err from result import Ok -from result import Result # relative from ...node.credentials import SyftVerifyKey @@ -20,7 +22,6 @@ from ...types.datetime import DateTime from ...types.uid import UID from ..job.job_stash import Job -from ..job.job_stash import JobStash from ..job.job_stash import JobStatus from ..response import SyftError from ..response import SyftSuccess @@ -59,20 +60,28 @@ def monitor(self) -> None: job = self.worker.job_stash.get_by_uid( self.credentials, self.queue_item.job_id ).ok() - if job is None or job.status != JobStatus.INTERRUPTED: - return - else: - job.resolved = True + if job and job.status == JobStatus.TERMINATING: + self.terminate(job) + for subjob in job.subjobs: + self.terminate(subjob) + self.queue_item.status = Status.INTERRUPTED self.queue_item.resolved = True self.worker.queue_stash.set_result(self.credentials, self.queue_item) - self.worker.job_stash.set_result(self.credentials, job) - process = psutil.Process(job.job_pid) - process.terminate() + # How about subjobs of subjobs? def stop(self) -> None: self.stop_requested.set() + def terminate(self, job: Job) -> None: + job.resolved = True + job.status = JobStatus.INTERRUPTED + self.worker.job_stash.set_result(self.credentials, job) + try: + psutil.Process(job.job_pid).terminate() + except psutil.Error as e: + logger.warning(f"Failed to terminate job {job.id}: {e}") + @serializable() class QueueManager(BaseQueueManager): @@ -245,32 +254,6 @@ def handle_message_multiprocessing( monitor_thread.stop() -def evaluate_can_run_job( - job_id: UID, job_stash: JobStash, credentials: SyftVerifyKey -) -> Result[Job, str]: - """Evaluate if a Job can be executed by the user. - - A Job cannot be executed if any of the following are met: - - User doesn't have permission to the job. - - Job is either marked Completed or result is available. - - Job is Cancelled or Interrupted. - """ - res = job_stash.get_by_uid(credentials, job_id) - - # User doesn't have access to job - if res.is_err(): - return res - - job_item = res.ok() - - if job_item.status == JobStatus.COMPLETED or job_item.resolved: - return Err(f"Job: {job_id} already Completed.") - elif job_item.status == JobStatus.INTERRUPTED: - return Err(f"Job interrupted. Job Id: {job_id}") - - return Ok(job_item) - - @serializable() class APICallMessageHandler(AbstractMessageHandler): queue_name = "api_call" @@ -304,9 +287,9 @@ def handle_message(message: bytes, syft_worker_id: UID) -> None: worker.signing_key = worker_settings.signing_key credentials = queue_item.syft_client_verify_key - - res = evaluate_can_run_job(queue_item.job_id, worker.job_stash, credentials) + res = worker.job_stash.get_by_uid(credentials, queue_item.job_id) if res.is_err(): + logger.warning(res.err()) raise Exception(res.value) job_item: Job = res.ok() @@ -317,14 +300,6 @@ def handle_message(message: bytes, syft_worker_id: UID) -> None: job_item.node_uid = cast(UID, worker.id) job_item.updated_at = DateTime.now() - # try: - # worker_name = os.getenv("DOCKER_WORKER_NAME", None) - # docker_worker = worker.worker_stash.get_worker_by_name( - # credentials, worker_name - # ).ok() - # job_item.job_worker_id = str(docker_worker.container_id) - # except Exception: - # job_item.job_worker_id = str(worker.id) if syft_worker_id is not None: job_item.job_worker_id = syft_worker_id @@ -337,9 +312,6 @@ def handle_message(message: bytes, syft_worker_id: UID) -> None: raise Exception(f"{job_result.err()}") if queue_config.thread_workers: - # stdlib - from threading import Thread - thread = Thread( target=handle_message_multiprocessing, args=(worker_settings, queue_item, credentials), @@ -347,8 +319,8 @@ def handle_message(message: bytes, syft_worker_id: UID) -> None: thread.start() thread.join() else: - # stdlib - from multiprocessing import Process + # if psutil.pid_exists(job_item.job_pid): + # psutil.Process(job_item.job_pid).terminate() process = Process( target=handle_message_multiprocessing, diff --git a/packages/syft/src/syft/service/request/request_service.py b/packages/syft/src/syft/service/request/request_service.py index 8461febb210..ac166f0a32a 100644 --- a/packages/syft/src/syft/service/request/request_service.py +++ b/packages/syft/src/syft/service/request/request_service.py @@ -288,6 +288,19 @@ def save( message=f"Failed to update Request: <{request.id}>. Error: {result.err()}" ) + @service_method( + path="request.delete_by_uid", + name="delete_by_uid", + ) + def delete_by_uid( + self, context: AuthedServiceContext, uid: UID + ) -> SyftSuccess | SyftError: + """Delete the request with the given uid.""" + result = self.stash.delete_by_uid(context.credentials, uid) + if result.is_err(): + return SyftError(message=str(result.err())) + return SyftSuccess(message=f"Request with id {uid} deleted.") + TYPE_TO_SERVICE[Request] = RequestService SERVICE_TO_TYPES[RequestService].update({Request}) diff --git a/packages/syft/src/syft/service/response.py b/packages/syft/src/syft/service/response.py index d30c1dbac2b..37227046c5c 100644 --- a/packages/syft/src/syft/service/response.py +++ b/packages/syft/src/syft/service/response.py @@ -57,6 +57,9 @@ def _repr_html_class_(self) -> str: def to_result(self) -> Err: return Err(value=self.message) + def __bool__(self) -> bool: + return False + @serializable() class SyftSuccess(SyftResponseMessage): diff --git a/packages/syft/src/syft/service/settings/settings.py b/packages/syft/src/syft/service/settings/settings.py index da4eb428d2a..aa4d5d6719c 100644 --- a/packages/syft/src/syft/service/settings/settings.py +++ b/packages/syft/src/syft/service/settings/settings.py @@ -1,5 +1,6 @@ # stdlib from collections.abc import Callable +from typing import Any # relative from ...abstract_node import NodeSideType @@ -16,6 +17,8 @@ from ...types.transforms import drop from ...types.transforms import make_set_default from ...types.uid import UID +from ...util import options +from ...util.colors import SURFACE @serializable() @@ -74,6 +77,23 @@ class NodeSettings(SyftObject): association_request_auto_approval: bool default_worker_pool: str = DEFAULT_WORKER_POOL_NAME + def _repr_html_(self) -> Any: + return f""" + +
+

Settings

+

Id: {self.id}

+

Name: {self.name}

+

Organization: {self.organization}

+

Deployed on: {self.deployed_on}

+

Signup enabled: {self.signup_enabled}

+

Admin email: {self.admin_email}

+
+ + """ + @serializable() class NodeSettingsV2(SyftObject): diff --git a/packages/syft/src/syft/service/sync/diff_state.py b/packages/syft/src/syft/service/sync/diff_state.py index d05883db242..014e33f5bc8 100644 --- a/packages/syft/src/syft/service/sync/diff_state.py +++ b/packages/syft/src/syft/service/sync/diff_state.py @@ -1,4 +1,5 @@ # stdlib +from collections.abc import Iterable import html import textwrap from typing import Any @@ -806,7 +807,7 @@ def _repr_html_(self) -> str: except Exception as _: return SyftError( message=html.escape( - "Could not render batch, please use resolve_single() instead." + "Could not render batch, please use resolve() instead." ) )._repr_html_() @@ -892,7 +893,7 @@ def __repr__(self) -> Any: except Exception as _: return SyftError( message=html.escape( - "Could not render batch, please use resolve_single() instead." + "Could not render batch, please use resolve() instead." ) )._repr_html_() @@ -1048,6 +1049,14 @@ def ignored_batches(self) -> list[ObjectDiffBatch]: batch for batch in self.all_batches if batch.decision == SyncDecision.IGNORE ] + @property + def active_batches(self) -> Iterable[ObjectDiffBatch]: + decisions_to_skip = {SyncDecision.IGNORE, SyncDecision.SKIP} + # self.batches might be modified during iteration + for batch in self.batches: + if batch.decision not in decisions_to_skip: + yield batch + @property def ignored_changes(self) -> list[IgnoredBatchView]: result = [] diff --git a/packages/syft/src/syft/service/sync/resolve_widget.py b/packages/syft/src/syft/service/sync/resolve_widget.py index 9aa4c81e19d..dd9dadc505e 100644 --- a/packages/syft/src/syft/service/sync/resolve_widget.py +++ b/packages/syft/src/syft/service/sync/resolve_widget.py @@ -447,40 +447,6 @@ def get_share_private_data_state(self) -> dict[UID, bool]: def get_mockify_state(self) -> dict[UID, bool]: return {uid: widget.mockify for uid, widget in self.id2widget.items()} - def click_ignore(self, *args: list, **kwargs: dict) -> SyftSuccess | SyftError: - # relative - from ...client.syncing import handle_ignore_batch - - if self.is_synced: - return SyftError( - message="The changes in this widget have already been synced." - ) - - res = handle_ignore_batch( - obj_diff_batch=self.obj_diff_batch, - all_batches=self.obj_diff_batch.global_batches, - ) - - self.set_widget_result_state(res) - return res - - def click_unignore(self, *args: list, **kwargs: dict) -> SyftSuccess | SyftError: - # relative - from ...client.syncing import handle_unignore_batch - - if self.is_synced: - return SyftError( - message="The changes in this widget have already been synced." - ) - - res = handle_unignore_batch( - obj_diff_batch=self.obj_diff_batch, - all_batches=self.obj_diff_batch.global_batches, - ) - - self.set_widget_result_state(res) - return res - def click_sync(self, *args: list, **kwargs: dict) -> SyftSuccess | SyftError: # relative from ...client.syncing import handle_sync_batch diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index 74762c4155f..e31adc18b7d 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -11,7 +11,12 @@ from botocore.client import BaseClient as S3BaseClient from botocore.client import ClientError as BotoClientError from botocore.client import Config +from botocore.exceptions import ConnectionError import requests +from tenacity import retry +from tenacity import retry_if_exception_type +from tenacity import stop_after_delay +from tenacity import wait_fixed from tqdm import tqdm from typing_extensions import Self @@ -215,12 +220,22 @@ def __init__( self.default_bucket_name = default_bucket_name self.config = config + self._check_connection() + def __enter__(self) -> Self: return self def __exit__(self, *exc: Any) -> None: self.client.close() + @retry( + wait=wait_fixed(5), + stop=stop_after_delay(60), + retry=retry_if_exception_type(ConnectionError), + ) + def _check_connection(self) -> dict: + return self.client.list_buckets() + def read( self, fp: SecureFilePathLocation, diff --git a/packages/syft/src/syft/util/decorators.py b/packages/syft/src/syft/util/decorators.py index 1262099d1c6..acfeba490e8 100644 --- a/packages/syft/src/syft/util/decorators.py +++ b/packages/syft/src/syft/util/decorators.py @@ -2,6 +2,10 @@ from collections.abc import Callable import functools from typing import Any +import warnings + +# relative +from ..service.response import SyftError def singleton(cls: Any) -> Callable: @@ -46,3 +50,25 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: return previous_instances[cls].get("instance") return wrapper + + +def deprecated( + reason: str = "This function is deprecated and may be removed in the future.", + return_syfterror: bool = False, +) -> Callable: + def decorator(func: Callable) -> Callable: + @functools.wraps(func) + def wrapper(*args: list, **kwargs: dict) -> Any: + message = f"{func.__qualname__} is deprecated: {reason}" + if return_syfterror: + return SyftError(message=message) + warnings.warn( + message, + category=DeprecationWarning, + stacklevel=2, + ) + return func(*args, **kwargs) + + return wrapper + + return decorator diff --git a/packages/syft/tests/conftest.py b/packages/syft/tests/conftest.py index 58f269f15a1..9623ea3d6dd 100644 --- a/packages/syft/tests/conftest.py +++ b/packages/syft/tests/conftest.py @@ -157,48 +157,6 @@ def low_worker() -> Worker: del worker -@pytest.fixture(scope="function") -def full_high_worker(n_consumers: int = 3, create_producer: bool = True) -> Worker: - _node = sy.orchestra.launch( - node_side_type=NodeSideType.HIGH_SIDE, - name=token_hex(8), - # dev_mode=True, - reset=True, - n_consumers=n_consumers, - create_producer=create_producer, - queue_port=None, - in_memory_workers=True, - local_db=False, - thread_workers=False, - ) - # startup code here - yield _node - # Cleanup code - _node.python_node.cleanup() - _node.land() - - -@pytest.fixture(scope="function") -def full_low_worker(n_consumers: int = 3, create_producer: bool = True) -> Worker: - _node = sy.orchestra.launch( - node_side_type=NodeSideType.LOW_SIDE, - name=token_hex(8), - # dev_mode=True, - reset=True, - n_consumers=n_consumers, - create_producer=create_producer, - queue_port=None, - in_memory_workers=True, - local_db=False, - thread_workers=False, - ) - # startup code here - yield _node - # # Cleanup code - _node.python_node.cleanup() - _node.land() - - @pytest.fixture def root_domain_client(worker) -> DomainClient: yield worker.root_client diff --git a/packages/syft/tests/syft/action_graph/action_graph_test.py b/packages/syft/tests/syft/action_graph/action_graph_test.py index b7e6dc6a3d9..1ec145cd5b8 100644 --- a/packages/syft/tests/syft/action_graph/action_graph_test.py +++ b/packages/syft/tests/syft/action_graph/action_graph_test.py @@ -329,6 +329,7 @@ def test_networkx_backing_store_subgraph( assert len(subgraph2.edges()) == 0 +@pytest.mark.flaky(reruns=3, reruns_delay=3) def test_in_memory_action_graph_store_init( in_mem_graph_config: InMemoryGraphConfig, ) -> None: diff --git a/packages/syft/tests/syft/project/project_test.py b/packages/syft/tests/syft/project/project_test.py index 9b2c8ce92f3..c186f2f35fa 100644 --- a/packages/syft/tests/syft/project/project_test.py +++ b/packages/syft/tests/syft/project/project_test.py @@ -23,7 +23,7 @@ def test_project_creation(worker): name="My Cool Project", description="My Cool Description", members=[ds_client] ) - project = new_project.start() + project = new_project.send() assert isinstance(project, Project) assert new_project.id == project.id @@ -47,7 +47,7 @@ def test_error_data_owner_project_creation(worker): name="My Cool Project", description="My Cool Description", members=[root_client] ) - project = new_project.start() + project = new_project.send() assert isinstance(project, sy.SyftError) assert project.message == "Only Data Scientists can create projects" @@ -96,7 +96,7 @@ def test_project_serde(worker): name="My Cool Project", description="My Cool Description", members=[root_client] ) - project = new_project.start() + project = new_project.send() ser_data = sy.serialize(project, to_bytes=True) assert isinstance(ser_data, bytes) diff --git a/packages/syft/tests/syft/service/sync/sync_flow_test.py b/packages/syft/tests/syft/service/sync/sync_flow_test.py deleted file mode 100644 index a48cc1a8d5e..00000000000 --- a/packages/syft/tests/syft/service/sync/sync_flow_test.py +++ /dev/null @@ -1,723 +0,0 @@ -# stdlib -import sys - -# third party -import numpy as np -import pytest - -# syft absolute -import syft -import syft as sy -from syft.abstract_node import NodeSideType -from syft.client.domain_client import DomainClient -from syft.client.sync_decision import SyncDecision -from syft.client.syncing import compare_clients -from syft.client.syncing import compare_states -from syft.client.syncing import resolve -from syft.client.syncing import resolve_single -from syft.service.action.action_object import ActionObject -from syft.service.response import SyftError -from syft.service.response import SyftSuccess - - -def compare_and_resolve(*, from_client: DomainClient, to_client: DomainClient): - diff_state_before = compare_clients(from_client, to_client) - for obj_diff_batch in diff_state_before.batches: - widget = resolve_single(obj_diff_batch) - widget.click_share_all_private_data() - res = widget.click_sync() - assert isinstance(res, SyftSuccess) - from_client.refresh() - to_client.refresh() - diff_state_after = compare_clients(from_client, to_client) - return diff_state_before, diff_state_after - - -def run_and_accept_result(client): - job_high = client.code.compute(blocking=True) - client.requests[0].accept_by_depositing_result(job_high) - return job_high - - -@syft.syft_function_single_use() -def compute() -> int: - return 42 - - -def get_ds_client(client: DomainClient) -> DomainClient: - client.register( - name="a", - email="a@a.com", - password="asdf", - password_verify="asdf", - ) - return client.login(email="a@a.com", password="asdf") - - -@pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows") -# @pytest.mark.flaky(reruns=3, reruns_delay=3) -def test_sync_flow(): - # somehow skipif does not work - if sys.platform == "win32": - return - low_worker = sy.Worker( - name="low-test", - local_db=True, - n_consumers=1, - create_producer=True, - node_side_type=NodeSideType.LOW_SIDE, - queue_port=None, - in_memory_workers=True, - ) - high_worker = sy.Worker( - name="high-test", - local_db=True, - n_consumers=1, - create_producer=True, - node_side_type=NodeSideType.HIGH_SIDE, - queue_port=None, - in_memory_workers=True, - ) - - low_client = low_worker.root_client - high_client = high_worker.root_client - - low_client.register( - email="newuser@openmined.org", - name="John Doe", - password="pw", - password_verify="pw", - ) - client_low_ds = low_worker.guest_client - - mock_high = np.array([10, 11, 12, 13, 14]) - private_high = np.array([15, 16, 17, 18, 19]) - - dataset_high = sy.Dataset( - name="my-dataset", - description="abc", - asset_list=[ - sy.Asset( - name="numpy-data", - mock=mock_high, - data=private_high, - shape=private_high.shape, - mock_is_real=True, - ) - ], - ) - - high_client.upload_dataset(dataset_high) - mock_low = np.array([0, 1, 2, 3, 4]) # do_high.mock - - dataset_low = sy.Dataset( - id=dataset_high.id, - name="my-dataset", - description="abc", - asset_list=[ - sy.Asset( - name="numpy-data", - mock=mock_low, - data=ActionObject.empty(data_node_id=high_client.id), - shape=mock_low.shape, - mock_is_real=True, - ) - ], - ) - - res = low_client.upload_dataset(dataset_low) - - data_low = client_low_ds.datasets[0].assets[0] - - @sy.syft_function_single_use(data=data_low) - def compute_mean(data) -> float: - return data.mean() - - res = client_low_ds.code.request_code_execution(compute_mean) - res = client_low_ds.code.request_code_execution(compute_mean) - print(res) - print("LOW CODE:", low_client.code.get_all()) - - low_state = low_client.get_sync_state() - high_state = high_client.get_sync_state() - - print(low_state.objects, high_state.objects) - - diff_state = compare_states(low_state, high_state) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, decision="low", share_private_objects=True - ) - - print(low_items_to_sync, high_items_to_sync) - - low_client.apply_state(low_items_to_sync) - - high_client.apply_state(high_items_to_sync) - - low_state = low_client.get_sync_state() - high_state = high_client.get_sync_state() - - diff_state = compare_states(low_state, high_state) - - high_client._fetch_api(high_client.credentials) - - data_high = high_client.datasets[0].assets[0] - - print(high_client.code.get_all()) - job_high = high_client.code.compute_mean(data=data_high, blocking=False) - print("Waiting for job...") - job_high.wait(timeout=60) - job_high.result.get() - - # syft absolute - from syft.service.request.request import Request - - request: Request = high_client.requests[0] - job_info = job_high.info(public_metadata=True, result=True) - - print(request.syft_client_verify_key, request.syft_node_location) - print(request.code.syft_client_verify_key, request.code.syft_node_location) - request.accept_by_depositing_result(job_info) - - request = high_client.requests[0] - code = request.code - job_high._get_log_objs() - - action_store_high = high_worker.get_service("actionservice").store - blob_store_high = high_worker.get_service("blobstorageservice").stash.partition - assert ( - f"{client_low_ds.verify_key}_READ" - in action_store_high.permissions[job_high.result.id.id] - ) - assert ( - f"{client_low_ds.verify_key}_READ" - in blob_store_high.permissions[job_high.result.syft_blob_storage_entry_id] - ) - - low_state = low_client.get_sync_state() - high_state = high_client.get_sync_state() - - diff_state_2 = compare_states(low_state, high_state) - - low_items_to_sync, high_items_to_sync = resolve( - diff_state_2, decision="high", share_private_objects=True - ) - for diff in diff_state_2.diffs: - print(diff.status, diff.object_type) - low_client.apply_state(low_items_to_sync) - - action_store_low = low_worker.get_service("actionservice").store - blob_store_low = low_worker.get_service("blobstorageservice").stash.partition - assert ( - f"{client_low_ds.verify_key}_READ" - in action_store_low.permissions[job_high.result.id.id] - ) - assert ( - f"{client_low_ds.verify_key}_READ" - in blob_store_low.permissions[job_high.result.syft_blob_storage_entry_id] - ) - - low_state = low_client.get_sync_state() - high_state = high_client.get_sync_state() - res_low = client_low_ds.code.compute_mean(data=data_low) - print("Res Low", res_low) - - assert res_low.get() == private_high.mean() - - assert ( - res_low.id.id - == job_high.result.id.id - == code.output_history[-1].outputs[0].id.id - ) - assert ( - job_high.result.syft_blob_storage_entry_id == res_low.syft_blob_storage_entry_id - ) - - job_low = client_low_ds.code.compute_mean(data=data_low, blocking=False) - - assert job_low.id == job_high.id - assert job_low.result.id == job_high.result.id - assert ( - job_low.result.syft_blob_storage_entry_id - == job_high.result.syft_blob_storage_entry_id - ) - low_worker.cleanup() - high_worker.cleanup() - - -def test_forget_usercode(low_worker, high_worker): - low_client = low_worker.root_client - client_low_ds = low_worker.guest_client - high_client = high_worker.root_client - - @sy.syft_function_single_use() - def compute() -> int: - print("computing...") - return 42 - - _ = client_low_ds.code.request_code_execution(compute) - - diff_state = compare_clients(low_client, high_client) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, decision="low", share_private_objects=True - ) - low_client.apply_state(low_items_to_sync) - high_client.apply_state(high_items_to_sync) - - high_client.code.get_all() - job_high = high_client.code.compute().get() - # job_info = job_high.info(public_metadata=True, result=True) - - request = high_client.requests[0] - request.accept_by_depositing_result(job_high) - - # job_high._get_log_objs() - - low_state = low_client.get_sync_state() - high_state = high_client.get_sync_state() - - diff_state_2 = compare_states(low_state, high_state) - - def skip_if_user_code(diff): - if diff.root.object_type == "UserCode": - return SyncDecision.IGNORE - raise Exception(f"Should not reach here, but got {diff.root.object_type}") - - low_items_to_sync, high_items_to_sync = resolve( - diff_state_2, - share_private_objects=True, - decision_callback=skip_if_user_code, - ) - - -@sy.api_endpoint_method() -def mock_function(context) -> str: - return -42 - - -@sy.api_endpoint_method() -def private_function(context) -> str: - return 42 - - -def test_skip_user_code(low_worker, high_worker): - low_client = low_worker.root_client - client_low_ds = low_worker.guest_client - high_client = high_worker.root_client - - @sy.syft_function_single_use() - def compute() -> int: - return 42 - - _ = client_low_ds.code.request_code_execution(compute) - - def skip_if_user_code(diff): - if diff.root.object_type == "UserCode": - return SyncDecision.SKIP - raise Exception(f"Should not reach here, but got {diff.root.object_type}") - - diff_state = compare_clients(low_client, high_client) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, - share_private_objects=True, - decision_callback=skip_if_user_code, - ) - low_client.apply_state(low_items_to_sync) - high_client.apply_state(high_items_to_sync) - - assert low_items_to_sync.is_empty - assert high_items_to_sync.is_empty - - -def test_unignore(low_worker, high_worker): - low_client = low_worker.root_client - client_low_ds = low_worker.guest_client - high_client = high_worker.root_client - - @sy.syft_function_single_use() - def compute() -> int: - return 42 - - _ = client_low_ds.code.request_code_execution(compute) - - diff_state = compare_clients(low_client, high_client) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, - share_private_objects=True, - decision="ignore", - ) - low_client.apply_state(low_items_to_sync) - high_client.apply_state(high_items_to_sync) - - assert low_items_to_sync.is_empty - assert high_items_to_sync.is_empty - - diff_state = compare_clients(low_client, high_client) - - for ignored in diff_state.ignored_changes: - deps = ignored.batch.get_dependencies() - if "Request" in [dep.object_type for dep in deps]: - ignored.stage_change() - - low_items_to_sync, high_items_to_sync = resolve( - diff_state, - share_private_objects=True, - decision="low", - ) - - assert not low_items_to_sync.is_empty - assert not high_items_to_sync.is_empty - - low_client.apply_state(low_items_to_sync) - high_client.apply_state(high_items_to_sync) - - diff_state = compare_clients(low_client, high_client) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, - share_private_objects=True, - decision="low", - ) - - assert diff_state.is_same - - -def test_request_code_execution_multiple(low_worker, high_worker): - low_client = low_worker.root_client - client_low_ds = low_worker.guest_client - high_client = high_worker.root_client - - @sy.syft_function_single_use() - def compute() -> int: - return 42 - - @sy.syft_function_single_use() - def compute_twice() -> int: - return 42 * 2 - - @sy.syft_function_single_use() - def compute_thrice() -> int: - return 42 * 3 - - _ = client_low_ds.code.request_code_execution(compute) - _ = client_low_ds.code.request_code_execution(compute_twice) - - diff_state = compare_clients(low_client, high_client) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, decision="low", share_private_objects=True - ) - - assert not diff_state.is_same - assert len(diff_state.diffs) % 2 == 0 - assert not low_items_to_sync.is_empty - assert not high_items_to_sync.is_empty - - low_client.apply_state(low_items_to_sync) - high_client.apply_state(high_items_to_sync) - - _ = client_low_ds.code.request_code_execution(compute_thrice) - - diff_state = compare_clients(low_client, high_client) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, decision="low", share_private_objects=True - ) - - assert not diff_state.is_same - assert len(diff_state.diffs) % 3 == 0 - assert not low_items_to_sync.is_empty - assert not high_items_to_sync.is_empty - - -def test_sync_high(low_worker, high_worker): - low_client = low_worker.root_client - client_low_ds = low_worker.guest_client - high_client = high_worker.root_client - - @sy.syft_function_single_use() - def compute() -> int: - return 42 - - _ = client_low_ds.code.request_code_execution(compute) - - diff_state = compare_clients(low_client, high_client) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, - decision="high", - ) - - assert not diff_state.is_same - assert not low_items_to_sync.is_empty - assert high_items_to_sync.is_empty - - -@pytest.mark.parametrize( - "decision", - ["skip", "ignore"], -) -def test_sync_skip_ignore(low_worker, high_worker, decision): - low_client = low_worker.root_client - client_low_ds = low_worker.guest_client - high_client = high_worker.root_client - - @sy.syft_function_single_use() - def compute() -> int: - return 42 - - _ = client_low_ds.code.request_code_execution(compute) - - diff_state = compare_clients(low_client, high_client) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, - decision=decision, - ) - - assert not diff_state.is_same - assert low_items_to_sync.is_empty - assert high_items_to_sync.is_empty - - low_client.apply_state(low_items_to_sync) - high_client.apply_state(high_items_to_sync) - - def should_not_be_called(diff): - # should not be called when decision is ignore before - if decision == "ignore": - raise Exception("Should not reach here") - return SyncDecision.SKIP - - diff_state = compare_clients(low_client, high_client) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, - decision_callback=should_not_be_called, - ) - - -def test_update_after_ignore(low_worker, high_worker): - low_client = low_worker.root_client - client_low_ds = low_worker.guest_client - high_client = high_worker.root_client - - @sy.syft_function_single_use() - def compute() -> int: - return 42 - - _ = client_low_ds.code.request_code_execution(compute) - - diff_state = compare_clients(low_client, high_client) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, - decision="ignore", - ) - - assert not diff_state.is_same - assert low_items_to_sync.is_empty - assert high_items_to_sync.is_empty - - low_client.apply_state(low_items_to_sync) - high_client.apply_state(high_items_to_sync) - - @sy.syft_function_single_use() - def compute() -> int: - return 43 - - # _ = client_low_ds.code.request_code_execution(compute) - low_client.requests[-1].approve() - - diff_state = compare_clients(low_client, high_client) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, - decision="low", - ) - - assert not high_items_to_sync.is_empty - - -@pytest.mark.parametrize( - "decision", - ["skip", "ignore", "low", "high"], -) -def test_sync_empty(low_worker, high_worker, decision): - low_client = low_worker.root_client - high_client = high_worker.root_client - - diff_state = compare_clients(low_client, high_client) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, - decision=decision, - ) - - assert diff_state.is_same - assert low_items_to_sync.is_empty - assert high_items_to_sync.is_empty - - -@pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows") -@pytest.mark.flaky(reruns=3, reruns_delay=3) -def test_sync_flow_no_sharing(): - # somehow skipif does not work - if sys.platform == "win32": - return - low_worker = sy.Worker( - name="low-test-2", - local_db=True, - n_consumers=1, - create_producer=True, - node_side_type=NodeSideType.LOW_SIDE, - queue_port=None, - in_memory_workers=True, - ) - high_worker = sy.Worker( - name="high-test-2", - local_db=True, - n_consumers=1, - create_producer=True, - node_side_type=NodeSideType.HIGH_SIDE, - queue_port=None, - in_memory_workers=True, - ) - - low_client = low_worker.root_client - high_client = high_worker.root_client - - low_client.register( - email="newuser@openmined.org", - name="John Doe", - password="pw", - password_verify="pw", - ) - client_low_ds = low_worker.guest_client - - mock_high = np.array([10, 11, 12, 13, 14]) - private_high = np.array([15, 16, 17, 18, 19]) - - dataset_high = sy.Dataset( - name="my-dataset", - description="abc", - asset_list=[ - sy.Asset( - name="numpy-data", - mock=mock_high, - data=private_high, - shape=private_high.shape, - mock_is_real=True, - ) - ], - ) - - high_client.upload_dataset(dataset_high) - mock_low = np.array([0, 1, 2, 3, 4]) # do_high.mock - - dataset_low = sy.Dataset( - id=dataset_high.id, - name="my-dataset", - description="abc", - asset_list=[ - sy.Asset( - name="numpy-data", - mock=mock_low, - data=ActionObject.empty(data_node_id=high_client.id), - shape=mock_low.shape, - mock_is_real=True, - ) - ], - ) - - res = low_client.upload_dataset(dataset_low) - - data_low = client_low_ds.datasets[0].assets[0] - - @sy.syft_function_single_use(data=data_low) - def compute_mean(data) -> float: - return data.mean() - - res = client_low_ds.code.request_code_execution(compute_mean) - print(res) - print("LOW CODE:", low_client.code.get_all()) - - low_state = low_client.get_sync_state() - high_state = high_client.get_sync_state() - - print(low_state.objects, high_state.objects) - - diff_state = compare_states(low_state, high_state) - low_items_to_sync, high_items_to_sync = resolve( - diff_state, decision="low", share_private_objects=True - ) - - print(low_items_to_sync, high_items_to_sync) - - low_client.apply_state(low_items_to_sync) - - high_client.apply_state(high_items_to_sync) - - low_state = low_client.get_sync_state() - high_state = high_client.get_sync_state() - - diff_state = compare_states(low_state, high_state) - - high_client._fetch_api(high_client.credentials) - - data_high = high_client.datasets[0].assets[0] - - print(high_client.code.get_all()) - job_high = high_client.code.compute_mean(data=data_high, blocking=False) - print("Waiting for job...") - job_high.wait(timeout=60) - job_high.result.get() - - # syft absolute - from syft.service.request.request import Request - - request: Request = high_client.requests[0] - job_info = job_high.info(public_metadata=True, result=True) - - print(request.syft_client_verify_key, request.syft_node_location) - print(request.code.syft_client_verify_key, request.code.syft_node_location) - request.accept_by_depositing_result(job_info) - - request = high_client.requests[0] - job_high._get_log_objs() - - action_store_high = high_worker.get_service("actionservice").store - blob_store_high = high_worker.get_service("blobstorageservice").stash.partition - assert ( - f"{client_low_ds.verify_key}_READ" - in action_store_high.permissions[job_high.result.id.id] - ) - assert ( - f"{client_low_ds.verify_key}_READ" - in blob_store_high.permissions[job_high.result.syft_blob_storage_entry_id] - ) - - low_state = low_client.get_sync_state() - high_state = high_client.get_sync_state() - - diff_state_2 = compare_states(low_state, high_state) - - low_items_to_sync, high_items_to_sync = resolve( - diff_state_2, decision="high", share_private_objects=False, ask_for_input=False - ) - for diff in diff_state_2.diffs: - print(diff.status, diff.object_type) - low_client.apply_state(low_items_to_sync) - - low_state = low_client.get_sync_state() - high_state = high_client.get_sync_state() - res_low = client_low_ds.code.compute_mean(data=data_low) - assert isinstance(res_low, SyftError) - assert ( - res_low.message - == f"Permission: [READ: {job_high.result.id.id} as {client_low_ds.verify_key}] denied" - ) - - job_low = client_low_ds.code.compute_mean(data=data_low, blocking=False) - - assert job_low.id == job_high.id - assert job_low.result.id == job_high.result.id - result = job_low.result.get() - assert isinstance(result, SyftError) - assert ( - result.message - == f"Permission: [READ: {job_high.result.id.id} as {client_low_ds.verify_key}] denied" - ) - - low_worker.cleanup() - high_worker.cleanup() diff --git a/packages/syft/tests/syft/service/sync/sync_resolve_single_test.py b/packages/syft/tests/syft/service/sync/sync_resolve_single_test.py index bf6cb8aca2d..b3972532521 100644 --- a/packages/syft/tests/syft/service/sync/sync_resolve_single_test.py +++ b/packages/syft/tests/syft/service/sync/sync_resolve_single_test.py @@ -5,17 +5,43 @@ import syft import syft as sy from syft.client.domain_client import DomainClient +from syft.client.sync_decision import SyncDecision from syft.client.syncing import compare_clients -from syft.client.syncing import resolve_single +from syft.client.syncing import resolve +from syft.service.code.user_code import UserCode +from syft.service.response import SyftError from syft.service.response import SyftSuccess - - -def compare_and_resolve(*, from_client: DomainClient, to_client: DomainClient): +from syft.service.sync.resolve_widget import ResolveWidget + + +def handle_decision( + widget: ResolveWidget, decision: SyncDecision +) -> SyftSuccess | SyftError: + if decision == SyncDecision.IGNORE: + # ignore not yet implemented on the widget + return widget.obj_diff_batch.ignore() + elif decision in [SyncDecision.LOW, SyncDecision.HIGH]: + return widget.click_sync() + else: + raise ValueError(f"Unknown decision {decision}") + + +def compare_and_resolve( + *, + from_client: DomainClient, + to_client: DomainClient, + decision: SyncDecision = SyncDecision.LOW, + decision_callback: callable = None, +): diff_state_before = compare_clients(from_client, to_client) - for obj_diff_batch in diff_state_before.batches: - widget = resolve_single(obj_diff_batch) + for obj_diff_batch in diff_state_before.active_batches: + widget = resolve( + obj_diff_batch=obj_diff_batch, + ) + if decision_callback: + decision = decision_callback(obj_diff_batch) widget.click_share_all_private_data() - res = widget.click_sync() + res = handle_decision(widget, decision) assert isinstance(res, SyftSuccess) from_client.refresh() to_client.refresh() @@ -76,7 +102,7 @@ def compute() -> int: client_low_ds.refresh() res = client_low_ds.code.compute(blocking=True) - assert res == compute(blocking=True).get() + assert res == compute(syft_no_node=True) def test_sync_with_error(low_worker, high_worker): @@ -146,3 +172,75 @@ def compute() -> int: assert len(diff.batches) == 1 assert len(diff.ignored_batches) == 1 assert len(diff.all_batches) == 2 + + +def test_forget_usercode(low_worker, high_worker): + low_client = low_worker.root_client + client_low_ds = low_worker.guest_client + high_client = high_worker.root_client + + @sy.syft_function_single_use() + def compute() -> int: + print("computing...") + return 42 + + _ = client_low_ds.code.request_code_execution(compute) + + diff_before, diff_after = compare_and_resolve( + from_client=low_client, to_client=high_client + ) + + run_and_accept_result(high_client) + + def skip_if_user_code(diff): + if diff.root_type is UserCode: + return SyncDecision.IGNORE + + raise ValueError( + f"Should not reach here after ignoring user code, got {diff.root_type}" + ) + + diff_before, diff_after = compare_and_resolve( + from_client=low_client, + to_client=high_client, + decision_callback=skip_if_user_code, + ) + assert not diff_after.is_same + assert not diff_after.is_same + + +def test_request_code_execution_multiple(low_worker, high_worker): + low_client = low_worker.root_client + client_low_ds = low_worker.guest_client + high_client = high_worker.root_client + + @sy.syft_function_single_use() + def compute() -> int: + return 42 + + @sy.syft_function_single_use() + def compute_twice() -> int: + return 42 * 2 + + @sy.syft_function_single_use() + def compute_thrice() -> int: + return 42 * 3 + + _ = client_low_ds.code.request_code_execution(compute) + _ = client_low_ds.code.request_code_execution(compute_twice) + + diff_before, diff_after = compare_and_resolve( + from_client=low_client, to_client=high_client + ) + + assert not diff_before.is_same + assert diff_after.is_same + + _ = client_low_ds.code.request_code_execution(compute_thrice) + + diff_before, diff_after = compare_and_resolve( + from_client=low_client, to_client=high_client + ) + + assert not diff_before.is_same + assert diff_after.is_same diff --git a/packages/syft/tests/syft/users/local_execution_test.py b/packages/syft/tests/syft/users/local_execution_test.py index e4da87d075b..bdb706ae945 100644 --- a/packages/syft/tests/syft/users/local_execution_test.py +++ b/packages/syft/tests/syft/users/local_execution_test.py @@ -1,14 +1,17 @@ # stdlib from collections import OrderedDict +import sys # third party import numpy as np +import pytest # syft absolute import syft as sy from syft.client.api import APIRegistry +@pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows") def test_local_execution(worker): root_domain_client = worker.root_client dataset = sy.Dataset( @@ -40,5 +43,8 @@ def my_func(x): return x + 1 # time.sleep(10) - local_res = my_func(x=asset, time_alive=1) + local_res = my_func( + x=asset, + time_alive=1, + ) assert (local_res == np.array([2, 2, 2])).all() diff --git a/packages/syft/tests/syft/users/user_code_test.py b/packages/syft/tests/syft/users/user_code_test.py index 4132c829e2f..53758e3c451 100644 --- a/packages/syft/tests/syft/users/user_code_test.py +++ b/packages/syft/tests/syft/users/user_code_test.py @@ -87,7 +87,7 @@ def test_duplicated_user_code(worker, guest_client: User) -> None: # request the a different function name but same content will also succeed # flaky if not blocking - mock_syft_func_2(blocking=True) + mock_syft_func_2(syft_no_node=True) result = guest_client.api.services.code.request_code_execution(mock_syft_func_2) assert isinstance(result, Request) assert len(guest_client.code.get_all()) == 2 diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 1c8a4fc8b27..9152038b1f7 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,8 +1,16 @@ +# stdlib +from secrets import token_hex + # third party import _pytest from faker import Faker import pytest +# syft absolute +import syft as sy +from syft.abstract_node import NodeSideType +from syft.node.worker import Worker + def pytest_configure(config: _pytest.config.Config) -> None: config.addinivalue_line("markers", "frontend: frontend integration tests") @@ -31,3 +39,43 @@ def domain_2_port() -> int: @pytest.fixture def faker(): return Faker() + + +@pytest.fixture(scope="function") +def full_low_worker(n_consumers: int = 3, create_producer: bool = True) -> Worker: + _node = sy.orchestra.launch( + node_side_type=NodeSideType.LOW_SIDE, + name=token_hex(8), + # dev_mode=True, + reset=True, + n_consumers=n_consumers, + create_producer=create_producer, + queue_port=None, + local_db=False, + thread_workers=False, + ) + # startup code here + yield _node + # # Cleanup code + _node.python_node.cleanup() + _node.land() + + +@pytest.fixture(scope="function") +def full_high_worker(n_consumers: int = 3, create_producer: bool = True) -> Worker: + _node = sy.orchestra.launch( + node_side_type=NodeSideType.HIGH_SIDE, + name=token_hex(8), + # dev_mode=True, + reset=True, + n_consumers=n_consumers, + create_producer=create_producer, + queue_port=None, + local_db=False, + thread_workers=False, + ) + # startup code here + yield _node + # Cleanup code + _node.python_node.cleanup() + _node.land() diff --git a/tests/integration/container_workload/pool_image_test.py b/tests/integration/container_workload/pool_image_test.py index d8cb59f8f1f..96613240660 100644 --- a/tests/integration/container_workload/pool_image_test.py +++ b/tests/integration/container_workload/pool_image_test.py @@ -1,38 +1,70 @@ # stdlib import os -from time import sleep # third party from faker import Faker import numpy as np import pytest +import requests # syft absolute import syft as sy from syft.client.domain_client import DomainClient from syft.custom_worker.config import DockerWorkerConfig -from syft.node.node import get_default_worker_tag_by_env from syft.service.request.request import Request from syft.service.response import SyftSuccess from syft.service.worker.worker_image import SyftWorkerImage from syft.service.worker.worker_pool import SyftWorker from syft.service.worker.worker_pool import WorkerPool -SYFT_BASE_TAG = get_default_worker_tag_by_env() -hagrid_flags = os.getenv("HAGRID_FLAGS") -if hagrid_flags: - SYFT_BASE_TAG = get_default_worker_tag_by_env(dev_mode=True) +registry = os.getenv("SYFT_BASE_IMAGE_REGISTRY", "docker.io") +repo = "openmined/grid-backend" + +if "k3d" in registry: + res = requests.get(url=f"http://{registry}/v2/{repo}/tags/list") + tag = res.json()["tags"][0] +else: + tag = sy.__version__ + +external_registry = os.getenv("EXTERNAL_REGISTRY", registry) +external_registry_username = os.getenv("EXTERNAL_REGISTRY_USERNAME", None) +external_registry_password = os.getenv("EXTERNAL_REGISTRY_PASSWORD", None) + + +@pytest.fixture +def external_registry_uid(domain_1_port): + domain_client: DomainClient = sy.login( + port=domain_1_port, email="info@openmined.org", password="changethis" + ) + image_registry_list = domain_client.api.services.image_registry.get_all() + if len(image_registry_list) > 1: + raise Exception("Only one registry should be present for testing") + + elif len(image_registry_list) == 1: + assert ( + image_registry_list[0].url == external_registry + ), "External registry different from the one set in the environment variable" + return image_registry_list[0].id + else: + registry_add_result = domain_client.api.services.image_registry.add( + external_registry + ) + + assert isinstance(registry_add_result, sy.SyftSuccess), str(registry_add_result) + + image_registry_list = domain_client.api.services.image_registry.get_all() + return image_registry_list[0].id @pytest.mark.container_workload -def test_image_build(domain_1_port) -> None: +def test_image_build(domain_1_port, external_registry_uid) -> None: domain_client: DomainClient = sy.login( port=domain_1_port, email="info@openmined.org", password="changethis" ) - # Submit Docker Worker Config + # Submit Docker Worker Config. docker_config_rl = f""" - FROM openmined/grid-backend:{SYFT_BASE_TAG} + FROM {registry}/{repo}:{tag} RUN pip install recordlinkage """ docker_config = DockerWorkerConfig(dockerfile=docker_config_rl) @@ -49,12 +81,11 @@ def test_image_build(domain_1_port) -> None: assert not isinstance(workerimage, sy.SyftError) # Build docker image - tag_version = sy.UID().short() - docker_tag = f"openmined/custom-worker-rl:{tag_version}" + docker_tag = "openmined/custom-worker-rl:latest" docker_build_result = domain_client.api.services.worker_image.build( image_uid=workerimage.id, tag=docker_tag, - pull=False, + registry_uid=external_registry_uid, ) assert isinstance(docker_build_result, SyftSuccess) @@ -67,18 +98,9 @@ def test_image_build(domain_1_port) -> None: assert workerimage.image_identifier.repo_with_tag == docker_tag assert workerimage.image_hash is not None - # Delete image - delete_result = domain_client.api.services.worker_image.remove(uid=workerimage.id) - assert isinstance(delete_result, sy.SyftSuccess) - - # Validate the image is successfully deleted - assert len(domain_client.images.get_all()) == 1 - workerimage = domain_client.images.get_all()[0] - assert workerimage.config != docker_config - @pytest.mark.container_workload -def test_pool_launch(domain_1_port) -> None: +def test_pool_launch(domain_1_port, external_registry_uid) -> None: domain_client: DomainClient = sy.login( port=domain_1_port, email="info@openmined.org", password="changethis" ) @@ -86,7 +108,7 @@ def test_pool_launch(domain_1_port) -> None: # Submit Docker Worker Config docker_config_opendp = f""" - FROM openmined/grid-backend:{SYFT_BASE_TAG} + FROM {registry}/{repo}:{tag} RUN pip install opendp """ docker_config = DockerWorkerConfig(dockerfile=docker_config_opendp) @@ -103,18 +125,24 @@ def test_pool_launch(domain_1_port) -> None: assert not worker_image.is_built # Build docker image - tag_version = sy.UID().short() - docker_tag = f"openmined/custom-worker-opendp:{tag_version}" + docker_tag = "openmined/custom-worker-opendp:latest" docker_build_result = domain_client.api.services.worker_image.build( image_uid=worker_image.id, tag=docker_tag, - pull=False, + registry_uid=external_registry_uid, ) assert isinstance(docker_build_result, SyftSuccess) + # Push Image to External registry + push_result = domain_client.api.services.worker_image.push( + worker_image.id, + username=external_registry_username, + password=external_registry_password, + ) + assert isinstance(push_result, sy.SyftSuccess), str(push_result) + # Launch a worker pool - pool_version = sy.UID().short() - worker_pool_name = f"custom_worker_pool_ver{pool_version}" + worker_pool_name = "custom-worker-pool-opendp" worker_pool_res = domain_client.api.services.worker_pool.launch( name=worker_pool_name, image_uid=worker_image.id, @@ -156,14 +184,9 @@ def test_pool_launch(domain_1_port) -> None: # TODO: delete the launched pool - # Clean the build images - sleep(10) - delete_result = domain_client.api.services.worker_image.remove(uid=worker_image.id) - assert isinstance(delete_result, sy.SyftSuccess) - @pytest.mark.container_workload -def test_pool_image_creation_job_requests(domain_1_port) -> None: +def test_pool_image_creation_job_requests(domain_1_port, external_registry_uid) -> None: """ Test register ds client, ds requests to create an image and pool creation, do approves, then ds creates a function attached to the worker pool, then creates another @@ -187,21 +210,19 @@ def test_pool_image_creation_job_requests(domain_1_port) -> None: # the DS makes a request to create an image and a pool based on the image docker_config_np = f""" - FROM openmined/grid-backend:{SYFT_BASE_TAG} + FROM {registry}/{repo}:{tag} RUN pip install numpy """ docker_config = DockerWorkerConfig(dockerfile=docker_config_np) - tag_version = sy.UID().short() - docker_tag = f"openmined/custom-worker-np:{tag_version}" - pool_version = sy.UID().short() - worker_pool_name = f"custom_worker_pool_ver{pool_version}" + docker_tag = "openmined/custom-worker-np:latest" + worker_pool_name = "custom-worker-pool-numpy" request = ds_client.api.services.worker_pool.create_image_and_pool_request( pool_name=worker_pool_name, num_workers=1, tag=docker_tag, config=docker_config, reason="I want to do some more cool data science with PySyft and Recordlinkage", - pull_image=False, + registry_uid=external_registry_uid, ) assert isinstance(request, Request) assert len(request.changes) == 2 @@ -224,7 +245,7 @@ def test_pool_image_creation_job_requests(domain_1_port) -> None: worker: SyftWorker = launched_pool.workers[0] assert launched_pool.name in worker.name - assert worker.status.value == "Pending" + assert worker.status.value == "Running" assert worker.healthcheck.value == "✅" # assert worker.consumer_state.value == "Idle" assert isinstance(worker.logs, str) @@ -279,8 +300,3 @@ def custom_worker_func(x): assert isinstance(res, sy.SyftSuccess) # TODO: delete the launched pool - - # Clean the build images - sleep(10) - delete_result = domain_client.api.services.worker_image.remove(uid=built_image.id) - assert isinstance(delete_result, sy.SyftSuccess) diff --git a/tests/integration/local/gateway_local_test.py b/tests/integration/local/gateway_local_test.py index 344e64e187c..a26e6ad35bb 100644 --- a/tests/integration/local/gateway_local_test.py +++ b/tests/integration/local/gateway_local_test.py @@ -1,5 +1,7 @@ # stdlib +import os from secrets import token_hex +import time # third party from faker import Faker @@ -11,13 +13,20 @@ from syft.client.domain_client import DomainClient from syft.client.enclave_client import EnclaveClient from syft.client.gateway_client import GatewayClient +from syft.service.network.network_service import NodePeerAssociationStatus from syft.service.network.node_peer import NodePeer +from syft.service.network.node_peer import NodePeerConnectionStatus +from syft.service.network.utils import PeerHealthCheckTask from syft.service.request.request import Request from syft.service.response import SyftSuccess from syft.service.user.user_roles import ServiceRole -def launch(node_type: NodeType, association_request_auto_approval: bool = True): +def _launch( + node_type: NodeType, + association_request_auto_approval: bool = True, + port: int | str | None = None, +): return sy.orchestra.launch( name=token_hex(8), node_type=node_type, @@ -25,12 +34,14 @@ def launch(node_type: NodeType, association_request_auto_approval: bool = True): reset=True, local_db=True, association_request_auto_approval=association_request_auto_approval, + port=port, + background_tasks=True, ) @pytest.fixture def gateway(): - node = launch(NodeType.GATEWAY) + node = _launch(NodeType.GATEWAY) yield node node.python_node.cleanup() node.land() @@ -38,7 +49,7 @@ def gateway(): @pytest.fixture(params=[True, False]) def gateway_association_request_auto_approval(request: pytest.FixtureRequest): - node = launch(NodeType.GATEWAY, association_request_auto_approval=request.param) + node = _launch(NodeType.GATEWAY, association_request_auto_approval=request.param) yield (request.param, node) node.python_node.cleanup() node.land() @@ -46,7 +57,7 @@ def gateway_association_request_auto_approval(request: pytest.FixtureRequest): @pytest.fixture def domain(): - node = launch(NodeType.DOMAIN) + node = _launch(NodeType.DOMAIN) yield node node.python_node.cleanup() node.land() @@ -54,7 +65,7 @@ def domain(): @pytest.fixture def domain_2(): - node = launch(NodeType.DOMAIN) + node = _launch(NodeType.DOMAIN) yield node node.python_node.cleanup() node.land() @@ -62,12 +73,95 @@ def domain_2(): @pytest.fixture def enclave(): - node = launch(NodeType.ENCLAVE) + node = _launch(NodeType.ENCLAVE) yield node node.python_node.cleanup() node.land() +@pytest.fixture +def gateway_webserver(): + node = _launch(node_type=NodeType.GATEWAY, port="auto") + yield node + node.land() + + +@pytest.fixture +def domain_webserver(): + node = _launch(NodeType.DOMAIN, port="auto") + yield node + node.land() + + +@pytest.fixture +def domain_2_webserver(): + node = _launch(NodeType.DOMAIN, port="auto") + yield node + node.land() + + +@pytest.fixture(scope="function") +def set_network_json_env_var(gateway_webserver): + """Set the environment variable for the network registry JSON string.""" + json_string = f""" + {{ + "2.0.0": {{ + "gateways": [ + {{ + "name": "{gateway_webserver.name}", + "host_or_ip": "localhost", + "protocol": "http", + "port": "{gateway_webserver.port}", + "admin_email": "support@openmined.org", + "website": "https://www.openmined.org/", + "slack": "https://slack.openmined.org/", + "slack_channel": "#support" + }} + ] + }} + }} + """ + os.environ["NETWORK_REGISTRY_JSON"] = json_string + yield + # Clean up the environment variable after all tests in the module have run + del os.environ["NETWORK_REGISTRY_JSON"] + + +@pytest.mark.local_node +def test_create_gateway( + set_network_json_env_var, gateway_webserver, domain_webserver, domain_2_webserver +): + assert isinstance(sy.gateways, sy.NetworkRegistry) + assert len(sy.gateways.all_networks) == 1 + assert sy.gateways.all_networks[0]["name"] == gateway_webserver.name + assert len(sy.gateways.online_networks) == 1 + assert sy.gateways.online_networks[0]["name"] == gateway_webserver.name + + gateway_client: GatewayClient = gateway_webserver.login( + email="info@openmined.org", + password="changethis", + ) + res = gateway_client.settings.allow_association_request_auto_approval(enable=True) + assert isinstance(res, SyftSuccess) + + domain_client: DomainClient = domain_webserver.login( + email="info@openmined.org", + password="changethis", + ) + domain_client_2: DomainClient = domain_2_webserver.login( + email="info@openmined.org", + password="changethis", + ) + result = domain_client.connect_to_gateway(handle=gateway_webserver) + assert isinstance(result, SyftSuccess) + result = domain_client_2.connect_to_gateway(handle=gateway_webserver) + assert isinstance(result, SyftSuccess) + + time.sleep(PeerHealthCheckTask.repeat_time + 1) + assert len(sy.domains.all_domains) == 2 + assert len(sy.domains.online_domains) == 2 + + @pytest.mark.local_node def test_create_gateway_client(gateway): client = gateway.client @@ -102,15 +196,9 @@ def test_domain_connect_to_gateway(gateway_association_request_auto_approval, do all_peers = gateway_client.api.services.network.get_all_peers() assert all_peers[0].node_routes[0].priority == 1 - # Try via client approach + # Try again (via client approach) result_2 = domain_client.connect_to_gateway(via_client=gateway_client) - - if association_request_auto_approval: - assert isinstance(result_2, SyftSuccess) - else: - assert isinstance(result_2, Request) - r = gateway_client.api.services.request.get_all()[-1].approve() - assert isinstance(r, SyftSuccess) + assert isinstance(result_2, SyftSuccess) assert len(domain_client.peers) == 1 assert len(gateway_client.peers) == 1 @@ -149,7 +237,7 @@ def test_domain_connect_to_gateway(gateway_association_request_auto_approval, do # check priority all_peers = gateway_client.api.services.network.get_all_peers() - assert all_peers[0].node_routes[0].priority == 2 + assert all_peers[0].node_routes[0].priority == 1 @pytest.mark.local_node @@ -175,13 +263,13 @@ def test_domain_connect_to_gateway_routes_priority(gateway, domain, domain_2) -> domain_1_routes = all_peers[0].node_routes assert domain_1_routes[0].priority == 1 - # reconnect to the gateway. The route's priority should be increased by 1 + # reconnect to the gateway result = domain_client.connect_to_gateway(via_client=gateway_client) assert isinstance(result, SyftSuccess) all_peers = gateway_client.api.services.network.get_all_peers() assert len(all_peers) == 1 domain_1_routes = all_peers[0].node_routes - assert domain_1_routes[0].priority == 2 + assert domain_1_routes[0].priority == 1 # another domain client connects to the gateway domain_client_2: DomainClient = domain_2.login( @@ -194,10 +282,7 @@ def test_domain_connect_to_gateway_routes_priority(gateway, domain, domain_2) -> all_peers = gateway_client.api.services.network.get_all_peers() assert len(all_peers) == 2 for peer in all_peers: - if peer.name == domain_client.metadata.name: - assert peer.node_routes[0].priority == 2 - if peer.name == domain_client_2.metadata.name: - assert peer.node_routes[0].priority == 1 + assert peer.node_routes[0].priority == 1 @pytest.mark.local_node @@ -253,3 +338,45 @@ def test_enclave_connect_to_gateway(faker: Faker, gateway, enclave): assert ( proxy_enclave_client.api.endpoints.keys() == enclave_client.api.endpoints.keys() ) + + +@pytest.mark.local_node +@pytest.mark.parametrize( + "gateway_association_request_auto_approval", [False], indirect=True +) +def test_repeated_association_requests_peers_health_check( + gateway_association_request_auto_approval, domain +): + _, gateway = gateway_association_request_auto_approval + gateway_client: GatewayClient = gateway.login( + email="info@openmined.org", + password="changethis", + ) + domain_client: DomainClient = domain.login( + email="info@openmined.org", + password="changethis", + ) + + result = domain_client.connect_to_gateway(handle=gateway) + assert isinstance(result, Request) + + result = domain_client.connect_to_gateway(handle=gateway) + assert isinstance(result, Request) + + r = gateway_client.api.services.request.get_all()[-1].approve() + assert isinstance(r, SyftSuccess) + + result = domain_client.connect_to_gateway(handle=gateway) + assert isinstance(result, SyftSuccess) + + # the gateway client checks that the peer is associated + res = gateway_client.api.services.network.check_peer_association( + peer_id=domain_client.id + ) + assert isinstance(res, NodePeerAssociationStatus) + assert res.value == "PEER_ASSOCIATED" + + # check for peer connection status + time.sleep(PeerHealthCheckTask.repeat_time + 1) + domain_peer = gateway_client.api.services.network.get_all_peers()[0] + assert domain_peer.ping_status == NodePeerConnectionStatus.ACTIVE diff --git a/tests/integration/local/job_test.py b/tests/integration/local/job_test.py new file mode 100644 index 00000000000..e713da731df --- /dev/null +++ b/tests/integration/local/job_test.py @@ -0,0 +1,133 @@ +# stdlib + +# stdlib +from secrets import token_hex +import time + +# third party +import pytest + +# syft absolute +import syft as sy +from syft import syft_function +from syft import syft_function_single_use +from syft.service.job.job_service import wait_until +from syft.service.job.job_stash import JobStatus +from syft.service.response import SyftError +from syft.service.response import SyftSuccess + + +@pytest.mark.local_node +def test_job_restart(job) -> None: + job.wait(timeout=2) + + assert wait_until( + lambda: job.fetched_status == JobStatus.PROCESSING + ), "Job not started" + assert wait_until( + lambda: all( + subjob.fetched_status == JobStatus.PROCESSING for subjob in job.subjobs + ) + ), "Subjobs not started" + + result = job.subjobs[0].restart() + assert isinstance(result, SyftError), "Should not restart subjob" + + result = job.restart() + assert isinstance(result, SyftError), "Should not restart running job" + + result = job.kill() + assert isinstance(result, SyftSuccess), "Should kill job" + assert job.fetched_status == JobStatus.INTERRUPTED + + result = job.restart() + assert isinstance(result, SyftSuccess), "Should restart idle job" + + job.wait(timeout=10) + + assert wait_until( + lambda: job.fetched_status == JobStatus.PROCESSING + ), "Job not restarted" + assert wait_until( + lambda: len( + [ + subjob.fetched_status == JobStatus.PROCESSING + for subjob in job.subjobs + if subjob.fetched_status != JobStatus.INTERRUPTED + ] + ) + == 2 + ), "Subjobs not restarted" + + +@pytest.fixture +def node(): + node = sy.orchestra.launch( + name=token_hex(8), + dev_mode=False, + thread_workers=False, + reset=True, + n_consumers=4, + create_producer=True, + node_side_type=sy.NodeSideType.LOW_SIDE, + ) + try: + yield node + finally: + node.python_node.cleanup() + node.land() + + +@pytest.fixture +def job(node): + client = node.login(email="info@openmined.org", password="changethis") + _ = client.register(name="a", email="aa@b.org", password="c", password_verify="c") + ds_client = node.login(email="aa@b.org", password="c") + + @syft_function() + def process_batch(): + # stdlib + + while time.sleep(1) is None: + ... + + ds_client.code.submit(process_batch) + + @syft_function_single_use() + def process_all(domain): + # stdlib + + _ = domain.launch_job(process_batch) + _ = domain.launch_job(process_batch) + + while time.sleep(1) is None: + ... + + _ = ds_client.code.request_code_execution(process_all) + client.requests[-1].approve(approve_nested=True) + client = node.login(email="info@openmined.org", password="changethis") + job = client.code.process_all(blocking=False) + try: + yield job + finally: + job.kill() + + +@pytest.mark.local_node +def test_job_kill(job) -> None: + job.wait(timeout=2) + assert wait_until( + lambda: job.fetched_status == JobStatus.PROCESSING + ), "Job not started" + assert wait_until( + lambda: all( + subjob.fetched_status == JobStatus.PROCESSING for subjob in job.subjobs + ) + ), "Subjobs not started" + + result = job.subjobs[0].kill() + assert isinstance(result, SyftError), "Should not kill subjob" + + result = job.kill() + assert isinstance(result, SyftSuccess), "Should kill job" + assert job.fetched_status == JobStatus.INTERRUPTED diff --git a/tests/integration/local/request_multiple_nodes_test.py b/tests/integration/local/request_multiple_nodes_test.py index 601988673dc..e81f75b57d6 100644 --- a/tests/integration/local/request_multiple_nodes_test.py +++ b/tests/integration/local/request_multiple_nodes_test.py @@ -21,7 +21,6 @@ def node_1(): local_db=True, create_producer=True, n_consumers=1, - in_memory_workers=True, queue_port=None, ) yield node @@ -39,7 +38,6 @@ def node_2(): local_db=True, create_producer=True, n_consumers=1, - in_memory_workers=True, queue_port=None, ) yield node diff --git a/tests/integration/local/syft_function_test.py b/tests/integration/local/syft_function_test.py index 6ca60f3b90d..8cc85cce4e2 100644 --- a/tests/integration/local/syft_function_test.py +++ b/tests/integration/local/syft_function_test.py @@ -23,7 +23,6 @@ def node(): n_consumers=3, create_producer=True, queue_port=None, - in_memory_workers=True, local_db=False, ) # startup code here diff --git a/tests/integration/local/twin_api_sync_test.py b/tests/integration/local/twin_api_sync_test.py index 27bf3ab4ced..d39066ade9a 100644 --- a/tests/integration/local/twin_api_sync_test.py +++ b/tests/integration/local/twin_api_sync_test.py @@ -1,5 +1,4 @@ # stdlib -from secrets import token_hex import sys # third party @@ -9,11 +8,9 @@ # syft absolute import syft import syft as sy -from syft.abstract_node import NodeSideType from syft.client.domain_client import DomainClient from syft.client.syncing import compare_clients -from syft.client.syncing import resolve_single -from syft.node.worker import Worker +from syft.client.syncing import resolve from syft.service.job.job_stash import JobStatus from syft.service.response import SyftError from syft.service.response import SyftSuccess @@ -22,7 +19,7 @@ def compare_and_resolve(*, from_client: DomainClient, to_client: DomainClient): diff_state_before = compare_clients(from_client, to_client) for obj_diff_batch in diff_state_before.batches: - widget = resolve_single(obj_diff_batch) + widget = resolve(obj_diff_batch) widget.click_share_all_private_data() res = widget.click_sync() assert isinstance(res, SyftSuccess) @@ -48,48 +45,6 @@ def get_ds_client(client: DomainClient) -> DomainClient: return client.login(email="a@a.com", password="asdf") -@pytest.fixture(scope="function") -def full_high_worker(n_consumers: int = 3, create_producer: bool = True) -> Worker: - _node = sy.orchestra.launch( - node_side_type=NodeSideType.HIGH_SIDE, - name=token_hex(8), - # dev_mode=True, - reset=True, - n_consumers=n_consumers, - create_producer=create_producer, - queue_port=None, - in_memory_workers=True, - local_db=False, - thread_workers=False, - ) - # startup code here - yield _node - # Cleanup code - _node.python_node.cleanup() - _node.land() - - -@pytest.fixture(scope="function") -def full_low_worker(n_consumers: int = 3, create_producer: bool = True) -> Worker: - _node = sy.orchestra.launch( - node_side_type=NodeSideType.LOW_SIDE, - name=token_hex(8), - # dev_mode=True, - reset=True, - n_consumers=n_consumers, - create_producer=create_producer, - queue_port=None, - in_memory_workers=True, - local_db=False, - thread_workers=False, - ) - # startup code here - yield _node - # Cleanup code - _node.python_node.cleanup() - _node.land() - - @sy.api_endpoint_method() def mock_function(context) -> str: return -42 diff --git a/tests/integration/network/gateway_test.py b/tests/integration/network/gateway_test.py index 25a8282096e..be72aae81e6 100644 --- a/tests/integration/network/gateway_test.py +++ b/tests/integration/network/gateway_test.py @@ -1,6 +1,7 @@ # stdlib import itertools import os +import time import uuid # third party @@ -18,9 +19,12 @@ from syft.client.search import SearchResults from syft.service.dataset.dataset import Dataset from syft.service.network.association_request import AssociationRequestChange +from syft.service.network.network_service import NodePeerAssociationStatus from syft.service.network.node_peer import NodePeer +from syft.service.network.node_peer import NodePeerConnectionStatus from syft.service.network.routes import HTTPNodeRoute from syft.service.network.routes import NodeRouteType +from syft.service.network.utils import PeerHealthCheckTask from syft.service.request.request import Request from syft.service.response import SyftError from syft.service.response import SyftSuccess @@ -343,13 +347,15 @@ def test_deleting_peers(set_env_var, domain_1_port: int, gateway_port: int) -> N assert len(gateway_client.peers) == 0 -def test_add_route(set_env_var, gateway_port: int, domain_1_port: int) -> None: +def test_add_update_route_priority( + set_env_var, gateway_port: int, domain_1_port: int +) -> None: """ Test the network service's `add_route` functionalities to add routes directly for a self domain. Scenario: Connect a domain to a gateway. The gateway adds 2 new routes to the domain and check their priorities. - Then add an existed route and check if its priority gets updated. + Then update an existed route's priority and check if its priority gets updated. Check for the gateway if the proxy client to connect to the domain uses the route with the highest priority. """ @@ -360,9 +366,10 @@ def test_add_route(set_env_var, gateway_port: int, domain_1_port: int) -> None: domain_client: DomainClient = sy.login( port=domain_1_port, email="info@openmined.org", password="changethis" ) - # Remove existing peers - assert isinstance(_remove_existing_peers(domain_client), SyftSuccess) - assert isinstance(_remove_existing_peers(gateway_client), SyftSuccess) + + # Try removing existing peers just to make sure + _remove_existing_peers(domain_client) + _remove_existing_peers(gateway_client) # Enable automatic acceptance of association requests res = gateway_client.settings.allow_association_request_auto_approval(enable=True) @@ -396,7 +403,7 @@ def test_add_route(set_env_var, gateway_port: int, domain_1_port: int) -> None: assert domain_peer.node_routes[-1].port == new_route2.port assert domain_peer.node_routes[-1].priority == 3 - # add an existed route to the domain and check its priority gets updated + # add an existed route to the domain. Its priority should not be updated res = gateway_client.api.services.network.add_route( peer_verify_key=domain_peer.verify_key, route=domain_peer.node_routes[0] ) @@ -404,27 +411,26 @@ def test_add_route(set_env_var, gateway_port: int, domain_1_port: int) -> None: assert isinstance(res, SyftSuccess) domain_peer = gateway_client.api.services.network.get_all_peers()[0] assert len(domain_peer.node_routes) == 3 - assert domain_peer.node_routes[0].priority == 4 + assert domain_peer.node_routes[0].priority == 1 - # the gateway gets the proxy client to the domain - # the proxy client should use the route with the highest priority + # getting the proxy client using the current highest priority route should + # give back an error since it is a route with a random port (10001) proxy_domain_client = gateway_client.peers[0] - assert isinstance(proxy_domain_client, DomainClient) + assert isinstance(proxy_domain_client, SyftError) + assert "Failed to establish a connection with" in proxy_domain_client.message - # add another existed route (port 10000) - res = gateway_client.api.services.network.add_route( - peer_verify_key=domain_peer.verify_key, route=domain_peer.node_routes[1] + # update the valid route to have the highest priority + res = gateway_client.api.services.network.update_route_priority( + peer_verify_key=domain_peer.verify_key, route=domain_peer.node_routes[0] ) - assert "route already exists" in res.message assert isinstance(res, SyftSuccess) domain_peer = gateway_client.api.services.network.get_all_peers()[0] assert len(domain_peer.node_routes) == 3 - assert domain_peer.node_routes[1].priority == 5 - # getting the proxy client using the current highest priority route should - # give back an error since it is a route with a random port (10000) + assert domain_peer.node_routes[0].priority == 4 + + # proxying should success now proxy_domain_client = gateway_client.peers[0] - assert isinstance(proxy_domain_client, SyftError) - assert "Failed to establish a connection with" in proxy_domain_client.message + assert isinstance(proxy_domain_client, DomainClient) # the routes the domain client uses to connect to the gateway should stay the same gateway_peer: NodePeer = domain_client.peers[0] @@ -436,6 +442,11 @@ def test_add_route(set_env_var, gateway_port: int, domain_1_port: int) -> None: def test_delete_route(set_env_var, gateway_port: int, domain_1_port: int) -> None: + """ + Scenario: + Connect a domain to a gateway. The gateway adds a new route to the domain + and then deletes it. + """ # login to the domain and gateway gateway_client: GatewayClient = sy.login( port=gateway_port, email="info@openmined.org", password="changethis" @@ -479,12 +490,14 @@ def test_delete_route(set_env_var, gateway_port: int, domain_1_port: int) -> Non assert isinstance(_remove_existing_peers(gateway_client), SyftSuccess) -def test_add_route_on_peer(set_env_var, gateway_port: int, domain_1_port: int) -> None: +def test_add_update_route_priority_on_peer( + set_env_var, gateway_port: int, domain_1_port: int +) -> None: """ Test the `add_route_on_peer` of network service. Connect a domain to a gateway. The gateway adds 2 new routes for the domain and check their priorities. - Then add an existed route and check if its priority gets updated. + The gateway updates the route priority for the domain remotely. Then the domain adds a route to itself for the gateway. """ # login to the domain and gateway @@ -496,8 +509,8 @@ def test_add_route_on_peer(set_env_var, gateway_port: int, domain_1_port: int) - ) # Remove existing peers - assert isinstance(_remove_existing_peers(domain_client), SyftSuccess) - assert isinstance(_remove_existing_peers(gateway_client), SyftSuccess) + _remove_existing_peers(domain_client) + _remove_existing_peers(gateway_client) # Enable automatic acceptance of association requests res = gateway_client.settings.allow_association_request_auto_approval(enable=True) @@ -535,21 +548,17 @@ def test_add_route_on_peer(set_env_var, gateway_port: int, domain_1_port: int) - assert gateway_peer.node_routes[-1].port == new_route2.port assert gateway_peer.node_routes[-1].priority == 3 - # add an existed route for the domain and check its priority gets updated - existed_route = gateway_peer.node_routes[0] - res = gateway_client.api.services.network.add_route_on_peer( - peer=domain_peer, route=existed_route + # update the route priority remotely on the domain + first_route = gateway_peer.node_routes[0] + res = gateway_client.api.services.network.update_route_priority_on_peer( + peer=domain_peer, route=first_route ) - assert "route already exists" in res.message assert isinstance(res, SyftSuccess) - gateway_peer = domain_client.peers[0] - assert len(gateway_peer.node_routes) == 3 - assert gateway_peer.node_routes[0].priority == 4 # the domain calls `add_route_on_peer` to to add a route to itself for the gateway assert len(domain_peer.node_routes) == 1 res = domain_client.api.services.network.add_route_on_peer( - peer=gateway_peer, route=new_route + peer=domain_client.peers[0], route=new_route ) assert isinstance(res, SyftSuccess) domain_peer = gateway_client.api.services.network.get_all_peers()[0] @@ -639,9 +648,9 @@ def test_update_route_priority( port=domain_1_port, email="info@openmined.org", password="changethis" ) - # Remove existing peers - assert isinstance(_remove_existing_peers(domain_client), SyftSuccess) - assert isinstance(_remove_existing_peers(gateway_client), SyftSuccess) + # Try remove existing peers + _remove_existing_peers(domain_client) + _remove_existing_peers(gateway_client) # Enable automatic acceptance of association requests res = gateway_client.settings.allow_association_request_auto_approval(enable=True) @@ -711,8 +720,8 @@ def test_update_route_priority_on_peer( ) # Remove existing peers - assert isinstance(_remove_existing_peers(domain_client), SyftSuccess) - assert isinstance(_remove_existing_peers(gateway_client), SyftSuccess) + _remove_existing_peers(domain_client) + _remove_existing_peers(gateway_client) # Enable automatic acceptance of association requests res = gateway_client.settings.allow_association_request_auto_approval(enable=True) @@ -781,6 +790,10 @@ def test_dataset_stream(set_env_var, gateway_port: int, domain_1_port: int) -> N port=domain_1_port, email="info@openmined.org", password="changethis" ) + # Remove existing peers just to make sure + _remove_existing_peers(domain_client) + _remove_existing_peers(gateway_client) + res = gateway_client.settings.allow_association_request_auto_approval(enable=True) assert isinstance(res, SyftSuccess) @@ -814,3 +827,85 @@ def test_dataset_stream(set_env_var, gateway_port: int, domain_1_port: int) -> N # the domain client delete the dataset domain_client.api.services.dataset.delete_by_uid(uid=retrieved_dataset.id) + + # Remove existing peers + assert isinstance(_remove_existing_peers(domain_client), SyftSuccess) + assert isinstance(_remove_existing_peers(gateway_client), SyftSuccess) + + +def test_peer_health_check(set_env_var, gateway_port: int, domain_1_port: int) -> None: + """ + Scenario: Connecting a domain node to a gateway node. + The gateway client approves the association request. + The gateway client checks that the domain peer is associated + TODO: check for peer connection status through NodePeer.pingstatus + TODO: check that the domain is online with `DomainRegistry.online_domains` + Then make the domain go offline, which should be reflected when calling + `DomainRegistry.online_domains` + """ + # login to the domain and gateway + gateway_client: GatewayClient = sy.login( + port=gateway_port, email="info@openmined.org", password="changethis" + ) + domain_client: DomainClient = sy.login( + port=domain_1_port, email="info@openmined.org", password="changethis" + ) + + res = gateway_client.settings.allow_association_request_auto_approval(enable=False) + assert isinstance(res, SyftSuccess) + + # Try removing existing peers just to make sure + _remove_existing_peers(domain_client) + _remove_existing_peers(gateway_client) + + # gateway checks that the domain is not yet associated + res = gateway_client.api.services.network.check_peer_association( + peer_id=domain_client.id + ) + assert isinstance(res, NodePeerAssociationStatus) + assert res.value == "PEER_NOT_FOUND" + + # the domain tries to connect to the gateway + result = domain_client.connect_to_gateway(gateway_client) + assert isinstance(result, Request) + assert isinstance(result.changes[0], AssociationRequestChange) + + # check that the peer's association request is pending + res = gateway_client.api.services.network.check_peer_association( + peer_id=domain_client.id + ) + assert isinstance(res, NodePeerAssociationStatus) + assert res.value == "PEER_ASSOCIATION_PENDING" + + # the domain tries to connect to the gateway (again) + result = domain_client.connect_to_gateway(gateway_client) + assert isinstance(result, Request) # the pending request is returned + # there should be only 1 association requests from the domain + assert len(gateway_client.api.services.request.get_all()) == 1 + + # check again that the peer's association request is still pending + res = gateway_client.api.services.network.check_peer_association( + peer_id=domain_client.id + ) + assert isinstance(res, NodePeerAssociationStatus) + assert res.value == "PEER_ASSOCIATION_PENDING" + + # the gateway client approves one of the association requests + res = gateway_client.api.services.request.get_all()[-1].approve() + assert not isinstance(res, SyftError) + assert len(gateway_client.peers) == 1 + + # the gateway client checks that the peer is associated + res = gateway_client.api.services.network.check_peer_association( + peer_id=domain_client.id + ) + assert isinstance(res, NodePeerAssociationStatus) + assert res.value == "PEER_ASSOCIATED" + + time.sleep(PeerHealthCheckTask.repeat_time + 1) + domain_peer = gateway_client.api.services.network.get_all_peers()[0] + assert domain_peer.ping_status == NodePeerConnectionStatus.ACTIVE + + # Remove existing peers + assert isinstance(_remove_existing_peers(domain_client), SyftSuccess) + assert isinstance(_remove_existing_peers(gateway_client), SyftSuccess) diff --git a/tox.ini b/tox.ini index 7811433c6e6..697354e11db 100644 --- a/tox.ini +++ b/tox.ini @@ -465,16 +465,46 @@ commands = python_version = 3.12 disable_error_code = attr-defined, valid-type, no-untyped-call, arg-type +[testenv:syft.test.integration] +description = Integration Tests for Syft Stack +basepython = python3 +deps = + {[testenv:syft]deps} + {[testenv:hagrid]deps} +changedir = {toxinidir} +passenv=HOME, USER +allowlist_externals = + bash +setenv = + PYTEST_MODULES = {env:PYTEST_MODULES:local_node} + ASSOCIATION_REQUEST_AUTO_APPROVAL = {env:ASSOCIATION_REQUEST_AUTO_APPROVAL:true} + PYTEST_FLAGS = {env:PYTEST_FLAGS:--ignore=tests/integration/local/gateway_local_test.py --ignore=tests/integration/local/job_test.py} +commands = + python -c 'import syft as sy; sy.stage_protocol_changes()' + + # Run Integration Tests + bash -c '\ + PYTEST_MODULES=($PYTEST_MODULES); \ + for i in "${PYTEST_MODULES[@]}"; do \ + echo "Starting test for $i"; date; \ + pytest tests/integration -m $i -vvvv -p no:randomly -p no:benchmark -o log_cli=True --capture=no $PYTEST_FLAGS; \ + return=$?; \ + echo "Finished $i"; \ + date; \ + if [[ $return -ne 0 ]]; then \ + exit $return; \ + fi; \ + done' [testenv:stack.test.integration.k8s] -description = Integration Tests for Core Stack +description = Integration Tests for Core Stack using K8s basepython = python3 deps = {[testenv:syft]deps} {[testenv:hagrid]deps} nbmake changedir = {toxinidir} -passenv=HOME, USER +passenv=HOME, USER, AZURE_BLOB_STORAGE_KEY allowlist_externals = devspace kubectl @@ -486,41 +516,39 @@ allowlist_externals = echo tox setenv = - ORCHESTRA_DEPLOYMENT_TYPE = {env:ORCHESTRA_DEPLOYMENT_TYPE:remote} NODE_PORT = {env:NODE_PORT:9082} GITHUB_CI = {env:GITHUB_CI:false} - PYTEST_MODULES = {env:PYTEST_MODULES:frontend container_workload local} - SYFT_BASE_IMAGE_REGISTRY = {env:SYFT_BASE_IMAGE_REGISTRY:k3d-registry.localhost:5800} + PYTEST_MODULES = {env:PYTEST_MODULES:frontend network container_workload} + DOMAIN_CLUSTER_NAME = {env:DOMAIN_CLUSTER_NAME:test-domain-1} + GATEWAY_CLUSTER_NAME = {env:GATEWAY_CLUSTER_NAME:test-gateway-1} ASSOCIATION_REQUEST_AUTO_APPROVAL = {env:ASSOCIATION_REQUEST_AUTO_APPROVAL:true} + SYFT_BASE_IMAGE_REGISTRY = {env:SYFT_BASE_IMAGE_REGISTRY:k3d-registry.localhost:5800} commands = bash -c "echo Running with GITHUB_CI=$GITHUB_CI; date" python -c 'import syft as sy; sy.stage_protocol_changes()' k3d version - # Since cluster name cannot have underscore and environment variable cannot have hyphen - # we are passing a grouped name for node names - # bash -c "docker rm $(docker ps -aq) --force || true" - # Deleting current cluster - bash -c "k3d cluster delete testgateway1 || true" - bash -c "k3d cluster delete testdomain1 || true" + # Deleting Old Cluster + bash -c "k3d cluster delete ${DOMAIN_CLUSTER_NAME} || true" + bash -c "k3d cluster delete ${GATEWAY_CLUSTER_NAME} || true" # Deleting registry & volumes bash -c "k3d registry delete k3d-registry.localhost || true" - bash -c "docker volume rm k3d-testgateway1-images --force || true" - bash -c "docker volume rm k3d-testdomain1-images --force || true" + bash -c "docker volume rm k3d-${DOMAIN_CLUSTER_NAME}-images --force || true" + bash -c "docker volume rm k3d-${GATEWAY_CLUSTER_NAME}-images --force || true" # Create registry tox -e dev.k8s.registry - # Creating testgateway1 cluster on port 9081 + # Creating test-gateway-1 cluster on port 9081 bash -c '\ - export CLUSTER_NAME=testgateway1 CLUSTER_HTTP_PORT=9081 DEVSPACE_PROFILE=gateway && \ + export CLUSTER_NAME=${GATEWAY_CLUSTER_NAME} CLUSTER_HTTP_PORT=9081 DEVSPACE_PROFILE=gateway && \ tox -e dev.k8s.start && \ tox -e dev.k8s.deploy' - # Creating testdomain1 cluster on port 9082 + # Creating test-domain-1 cluster on port 9082 bash -c '\ - export CLUSTER_NAME=testdomain1 CLUSTER_HTTP_PORT=9082 && \ + export CLUSTER_NAME=${DOMAIN_CLUSTER_NAME} CLUSTER_HTTP_PORT=9082 && \ tox -e dev.k8s.start && \ tox -e dev.k8s.deploy' @@ -532,65 +560,115 @@ commands = sleep 30 - # wait for front end - bash packages/grid/scripts/wait_for.sh service frontend --context k3d-testdomain1 --namespace syft - bash -c '(kubectl logs service/frontend --context k3d-testdomain1 --namespace syft -f &) | grep -q -E "Network:\s+https?://[a-zA-Z0-9.-]+:[0-9]+/" || true' - # wait for test gateway 1 - bash packages/grid/scripts/wait_for.sh service mongo --context k3d-testgateway1 --namespace syft - bash packages/grid/scripts/wait_for.sh service backend --context k3d-testgateway1 --namespace syft - bash packages/grid/scripts/wait_for.sh service proxy --context k3d-testgateway1 --namespace syft + bash packages/grid/scripts/wait_for.sh service mongo --context k3d-{env:GATEWAY_CLUSTER_NAME} --namespace syft + bash packages/grid/scripts/wait_for.sh service backend --context k3d-{env:GATEWAY_CLUSTER_NAME} --namespace syft + bash packages/grid/scripts/wait_for.sh service proxy --context k3d-{env:GATEWAY_CLUSTER_NAME} --namespace syft # wait for test domain 1 - bash packages/grid/scripts/wait_for.sh service mongo --context k3d-testdomain1 --namespace syft - bash packages/grid/scripts/wait_for.sh service backend --context k3d-testdomain1 --namespace syft - bash packages/grid/scripts/wait_for.sh service proxy --context k3d-testdomain1 --namespace syft - bash packages/grid/scripts/wait_for.sh service seaweedfs --context k3d-testdomain1 --namespace syft + bash packages/grid/scripts/wait_for.sh service mongo --context k3d-{env:DOMAIN_CLUSTER_NAME} --namespace syft + bash packages/grid/scripts/wait_for.sh service backend --context k3d-{env:DOMAIN_CLUSTER_NAME} --namespace syft + bash packages/grid/scripts/wait_for.sh service proxy --context k3d-{env:DOMAIN_CLUSTER_NAME} --namespace syft + bash packages/grid/scripts/wait_for.sh service seaweedfs --context k3d-{env:DOMAIN_CLUSTER_NAME} --namespace syft + bash packages/grid/scripts/wait_for.sh service frontend --context k3d-{env:DOMAIN_CLUSTER_NAME} --namespace syft + bash -c '(kubectl logs service/frontend --context k3d-${DOMAIN_CLUSTER_NAME} --namespace syft -f &) | grep -q -E "Network:\s+https?://[a-zA-Z0-9.-]+:[0-9]+/" || true' # Checking logs generated & startup of test-domain 1 - bash -c '(kubectl logs service/backend --context k3d-testdomain1 --namespace syft -f &) | grep -q "Application startup complete" || true' + bash -c '(kubectl logs service/backend --context k3d-${DOMAIN_CLUSTER_NAME} --namespace syft -f &) | grep -q "Application startup complete" || true' # Checking logs generated & startup of testgateway1 - bash -c '(kubectl logs service/backend --context k3d-testgateway1 --namespace syft -f &) | grep -q "Application startup complete" || true' - - # frontend - bash -c 'if [[ "$PYTEST_MODULES" == *"frontend"* ]]; then \ - echo "Starting frontend"; date; \ - pytest tests/integration -m frontend -p no:randomly -k "test_serves_domain_frontend" --co; \ - pytest tests/integration -m frontend -vvvv -p no:randomly -p no:benchmark -o log_cli=True --capture=no -k "test_serves_domain_frontend"; \ - return=$?; \ - echo "Finished frontend"; date; \ - exit $return; \ + bash -c '(kubectl logs service/backend --context k3d-${GATEWAY_CLUSTER_NAME} --namespace syft -f &) | grep -q "Application startup complete" || true' + + # Run Integration Tests + bash -c '\ + PYTEST_MODULES=($PYTEST_MODULES); \ + for i in "${PYTEST_MODULES[@]}"; do \ + echo "Starting test for $i"; date; \ + pytest tests/integration -m $i -vvvv -p no:randomly -p no:benchmark -o log_cli=True --capture=no; \ + return=$?; \ + echo "Finished $i"; \ + date; \ + if [[ $return -ne 0 ]]; then \ + exit $return; \ + fi; \ + done' + + # deleting clusters created + bash -c "CLUSTER_NAME=${DOMAIN_CLUSTER_NAME} tox -e dev.k8s.destroy || true" + bash -c "CLUSTER_NAME=${GATEWAY_CLUSTER_NAME} tox -e dev.k8s.destroy || true" + bash -c "k3d registry delete k3d-registry.localhost || true" + bash -c "docker volume rm k3d-${DOMAIN_CLUSTER_NAME}-images --force || true" + bash -c "docker volume rm k3d-${GATEWAY_CLUSTER_NAME}-images --force || true" + +[testenv:stack.test.notebook.k8s] +description = Notebook Tests for Core Stack using K8s +basepython = python3 +deps = + {[testenv:syft]deps} + nbmake +changedir = {toxinidir} +passenv=HOME, USER +allowlist_externals = + devspace + kubectl + grep + sleep + bash + k3d + echo + tox +setenv = + ORCHESTRA_DEPLOYMENT_TYPE = {env:ORCHESTRA_DEPLOYMENT_TYPE:remote} + GITHUB_CI = {env:GITHUB_CI:false} + SYFT_BASE_IMAGE_REGISTRY = {env:SYFT_BASE_IMAGE_REGISTRY:k3d-registry.localhost:5800} + DOMAIN_CLUSTER_NAME = {env:DOMAIN_CLUSTER_NAME:test-domain-1} + NODE_PORT = {env:NODE_PORT:8080} +commands = + bash -c "echo Running with GITHUB_CI=$GITHUB_CI; date" + python -c 'import syft as sy; sy.stage_protocol_changes()' + k3d version + + # Deleting Old Cluster + bash -c "k3d cluster delete ${DOMAIN_CLUSTER_NAME} || true" + + # Deleting registry & volumes + bash -c "k3d registry delete k3d-registry.localhost || true" + bash -c "docker volume rm k3d-${DOMAIN_CLUSTER_NAME}-images --force || true" + + # Create registry + tox -e dev.k8s.registry + + + # Creating test-domain-1 cluster on port NODE_PORT + bash -c '\ + export CLUSTER_NAME=${DOMAIN_CLUSTER_NAME} CLUSTER_HTTP_PORT=${NODE_PORT} && \ + tox -e dev.k8s.start && \ + tox -e dev.k8s.deploy' + + # free up build cache after build of images + bash -c 'if [[ "$GITHUB_CI" != "false" ]]; then \ + docker image prune --all --force; \ + docker builder prune --all --force; \ fi' - # Integration + Gateway Connection Tests - # Gateway tests are not run in kuberetes, as currently,it does not have a way to configure - # high/low side warning flag. - bash -c "source ./scripts/get_k8s_secret_ci.sh; \ - pytest tests/integration/network -k 'not test_domain_gateway_user_code' -p no:randomly -vvvv" - - # Shutting down the gateway cluster to free up space, as the - # below code does not require gateway cluster - bash -c "CLUSTER_NAME=testgateway1 tox -e dev.k8s.destroy || true" - bash -c "docker volume rm k3d-testgateway1-images --force || true" - - ; container workload - ; bash -c 'if [[ "$PYTEST_MODULES" == *"container_workload"* ]]; then \ - ; echo "Starting Container Workload test"; date; \ - ; pytest tests/integration -m container_workload -p no:randomly --co; \ - ; pytest tests/integration -m container_workload -vvvv -p no:randomly -p no:benchmark -o log_cli=True --capture=no; \ - ; return=$?; \ - ; echo "Finished container workload"; date; \ - ; exit $return; \ - ; fi' - - bash -c "source ./scripts/get_k8s_secret_ci.sh; \ - pytest -x --nbmake --nbmake-timeout=1000 notebooks/api/0.8 -p no:randomly -k 'not 10-container-images.ipynb' -vvvv" + sleep 30 + + # wait for test-domain-1 + bash packages/grid/scripts/wait_for.sh service mongo --context k3d-{env:DOMAIN_CLUSTER_NAME} --namespace syft + bash packages/grid/scripts/wait_for.sh service backend --context k3d-{env:DOMAIN_CLUSTER_NAME} --namespace syft + bash packages/grid/scripts/wait_for.sh service proxy --context k3d-{env:DOMAIN_CLUSTER_NAME} --namespace syft + bash packages/grid/scripts/wait_for.sh service seaweedfs --context k3d-{env:DOMAIN_CLUSTER_NAME} --namespace syft + bash packages/grid/scripts/wait_for.sh service frontend --context k3d-{env:DOMAIN_CLUSTER_NAME} --namespace syft + bash -c '(kubectl logs service/frontend --context k3d-${DOMAIN_CLUSTER_NAME} --namespace syft -f &) | grep -q -E "Network:\s+https?://[a-zA-Z0-9.-]+:[0-9]+/" || true' + + # Checking logs generated & startup of test-domain 1 + bash -c '(kubectl logs service/backend --context k3d-${DOMAIN_CLUSTER_NAME} --namespace syft -f &) | grep -q "Application startup complete" || true' + + bash -c "pytest -x --nbmake notebooks/api/0.8 -p no:randomly -k 'not 10-container-images.ipynb' -vvvv --nbmake-timeout=1000" # deleting clusters created - bash -c "CLUSTER_NAME=testdomain1 tox -e dev.k8s.destroy || true" + bash -c "CLUSTER_NAME=${DOMAIN_CLUSTER_NAME} tox -e dev.k8s.destroy || true" bash -c "k3d registry delete k3d-registry.localhost || true" - bash -c "docker rm $(docker ps -aq) --force || true" - bash -c "docker volume rm k3d-testdomain1-images --force || true" + bash -c "docker volume rm k3d-${DOMAIN_CLUSTER_NAME}-images --force || true" [testenv:syft.build.helm] @@ -719,7 +797,8 @@ commands = bash -c 'k3d --version' ; create registry - bash -c 'k3d registry create registry.localhost --port 5800 -v $HOME/.k3d-registry:/var/lib/registry || true' + bash -c 'docker volume create k3d-registry-vol || true' + bash -c 'k3d registry create registry.localhost --port 5800 -v k3d-registry-vol:/var/lib/registry --no-help || true' ; add patches to host bash -c 'if ! grep -q k3d-registry.localhost /etc/hosts; then sudo {envpython} scripts/patch_hosts.py --add-k3d-registry --fix-docker-hosts; fi' @@ -847,7 +926,7 @@ commands = tox -e dev.k8s.{posargs:deploy} [testenv:dev.k8s.launch.enclave] -description = Launch a single Enclave on K8s +description = Launch a single Enclave on K8s passenv = HOME, USER setenv= CLUSTER_NAME = {env:CLUSTER_NAME:test-enclave-1} @@ -869,9 +948,6 @@ allowlist_externals = tox bash commands = - ; purge deployment and dangling resources - tox -e dev.k8s.cleanup - ; destroy cluster bash -c '\ rm -rf .devspace; echo ""; \ @@ -891,7 +967,7 @@ commands = ; destroy registry bash -c 'k3d registry delete registry.localhost || true' - bash -c 'sudo rm -rf ~/.k3d-registry' + bash -c 'docker volume rm k3d-registry-vol --force || true' [testenv:backend.test.basecpu] description = Base CPU Docker Image Test