diff --git a/.buildkite/.env b/.buildkite/.env deleted file mode 100644 index 85b102d07fff..000000000000 --- a/.buildkite/.env +++ /dev/null @@ -1,13 +0,0 @@ -CI -BUILDKITE -BUILDKITE_BUILD_NUMBER -BUILDKITE_BRANCH -BUILDKITE_BUILD_NUMBER -BUILDKITE_JOB_ID -BUILDKITE_BUILD_URL -BUILDKITE_PROJECT_SLUG -BUILDKITE_COMMIT -BUILDKITE_PULL_REQUEST -BUILDKITE_TAG -CODECOV_TOKEN -TRIAL_FLAGS diff --git a/.buildkite/merge_base_branch.sh b/.buildkite/merge_base_branch.sh deleted file mode 100755 index 361440fd1a1c..000000000000 --- a/.buildkite/merge_base_branch.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env bash - -set -e - -if [[ "$BUILDKITE_BRANCH" =~ ^(develop|master|dinsic|shhs|release-.*)$ ]]; then - echo "Not merging forward, as this is a release branch" - exit 0 -fi - -if [[ -z $BUILDKITE_PULL_REQUEST_BASE_BRANCH ]]; then - echo "Not a pull request, or hasn't had a PR opened yet..." - - # It probably hasn't had a PR opened yet. Since all PRs land on develop, we - # can probably assume it's based on it and will be merged into it. - GITBASE="develop" -else - # Get the reference, using the GitHub API - GITBASE=$BUILDKITE_PULL_REQUEST_BASE_BRANCH -fi - -echo "--- merge_base_branch $GITBASE" - -# Show what we are before -git --no-pager show -s - -# Set up username so it can do a merge -git config --global user.email bot@matrix.org -git config --global user.name "A robot" - -# Fetch and merge. If it doesn't work, it will raise due to set -e. -git fetch -u origin $GITBASE -git merge --no-edit --no-commit origin/$GITBASE - -# Show what we are after. -git --no-pager show -s diff --git a/.buildkite/worker-blacklist b/.buildkite/worker-blacklist deleted file mode 100644 index 5975cb98cfda..000000000000 --- a/.buildkite/worker-blacklist +++ /dev/null @@ -1,10 +0,0 @@ -# This file serves as a blacklist for SyTest tests that we expect will fail in -# Synapse when run under worker mode. For more details, see sytest-blacklist. - -Can re-join room if re-invited - -# new failures as of https://github.com/matrix-org/sytest/pull/732 -Device list doesn't change if remote server is down - -# https://buildkite.com/matrix-dot-org/synapse/builds/6134#6f67bf47-e234-474d-80e8-c6e1868b15c5 -Server correctly handles incoming m.device_list_update diff --git a/.ci/patch_for_twisted_trunk.sh b/.ci/patch_for_twisted_trunk.sh new file mode 100755 index 000000000000..f524581986a9 --- /dev/null +++ b/.ci/patch_for_twisted_trunk.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +# replaces the dependency on Twisted in `python_dependencies` with trunk. + +set -e +cd "$(dirname "$0")"/.. + +sed -i -e 's#"Twisted.*"#"Twisted @ git+https://github.com/twisted/twisted"#' synapse/python_dependencies.py diff --git a/.buildkite/postgres-config.yaml b/.ci/postgres-config.yaml similarity index 86% rename from .buildkite/postgres-config.yaml rename to .ci/postgres-config.yaml index 67e17fa9d1de..f5a4aecd51ca 100644 --- a/.buildkite/postgres-config.yaml +++ b/.ci/postgres-config.yaml @@ -3,7 +3,7 @@ # CI's Docker setup at the point where this file is considered. server_name: "localhost:8800" -signing_key_path: ".buildkite/test.signing.key" +signing_key_path: ".ci/test.signing.key" report_stats: false @@ -11,7 +11,7 @@ database: name: "psycopg2" args: user: postgres - host: postgres + host: localhost password: postgres database: synapse diff --git a/.buildkite/scripts/postgres_exec.py b/.ci/scripts/postgres_exec.py similarity index 92% rename from .buildkite/scripts/postgres_exec.py rename to .ci/scripts/postgres_exec.py index 086b391724ec..0f39a336d52d 100755 --- a/.buildkite/scripts/postgres_exec.py +++ b/.ci/scripts/postgres_exec.py @@ -23,7 +23,7 @@ # We use "postgres" as a database because it's bound to exist and the "synapse" one # doesn't exist yet. db_conn = psycopg2.connect( - user="postgres", host="postgres", password="postgres", dbname="postgres" + user="postgres", host="localhost", password="postgres", dbname="postgres" ) db_conn.autocommit = True cur = db_conn.cursor() diff --git a/.ci/scripts/test_export_data_command.sh b/.ci/scripts/test_export_data_command.sh new file mode 100755 index 000000000000..ab96387a0aef --- /dev/null +++ b/.ci/scripts/test_export_data_command.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash + +# Test for the export-data admin command against sqlite and postgres + +set -xe +cd "$(dirname "$0")/../.." + +echo "--- Install dependencies" + +# Install dependencies for this test. +pip install psycopg2 + +# Install Synapse itself. This won't update any libraries. +pip install -e . + +echo "--- Generate the signing key" + +# Generate the server's signing key. +python -m synapse.app.homeserver --generate-keys -c .ci/sqlite-config.yaml + +echo "--- Prepare test database" + +# Make sure the SQLite3 database is using the latest schema and has no pending background update. +scripts/update_synapse_database --database-config .ci/sqlite-config.yaml --run-background-updates + +# Run the export-data command on the sqlite test database +python -m synapse.app.admin_cmd -c .ci/sqlite-config.yaml export-data @anon-20191002_181700-832:localhost:8800 \ +--output-directory /tmp/export_data + +# Test that the output directory exists and contains the rooms directory +dir="/tmp/export_data/rooms" +if [ -d "$dir" ]; then + echo "Command successful, this test passes" +else + echo "No output directories found, the command fails against a sqlite database." + exit 1 +fi + +# Create the PostgreSQL database. +.ci/scripts/postgres_exec.py "CREATE DATABASE synapse" + +# Port the SQLite databse to postgres so we can check command works against postgres +echo "+++ Port SQLite3 databse to postgres" +scripts/synapse_port_db --sqlite-database .ci/test_db.db --postgres-config .ci/postgres-config.yaml + +# Run the export-data command on postgres database +python -m synapse.app.admin_cmd -c .ci/postgres-config.yaml export-data @anon-20191002_181700-832:localhost:8800 \ +--output-directory /tmp/export_data2 + +# Test that the output directory exists and contains the rooms directory +dir2="/tmp/export_data2/rooms" +if [ -d "$dir2" ]; then + echo "Command successful, this test passes" +else + echo "No output directories found, the command fails against a postgres database." + exit 1 +fi diff --git a/.buildkite/scripts/test_old_deps.sh b/.ci/scripts/test_old_deps.sh similarity index 81% rename from .buildkite/scripts/test_old_deps.sh rename to .ci/scripts/test_old_deps.sh index 9270d55f0461..8b473936f8c3 100755 --- a/.buildkite/scripts/test_old_deps.sh +++ b/.ci/scripts/test_old_deps.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# this script is run by buildkite in a plain `bionic` container; it installs the +# this script is run by GitHub Actions in a plain `bionic` container; it installs the # minimal requirements for tox and hands over to the py3-old tox environment. set -ex diff --git a/.buildkite/scripts/test_synapse_port_db.sh b/.ci/scripts/test_synapse_port_db.sh similarity index 59% rename from .buildkite/scripts/test_synapse_port_db.sh rename to .ci/scripts/test_synapse_port_db.sh index 82d7d56d4e9f..797904e64ca5 100755 --- a/.buildkite/scripts/test_synapse_port_db.sh +++ b/.ci/scripts/test_synapse_port_db.sh @@ -7,7 +7,7 @@ set -xe -cd `dirname $0`/../.. +cd "$(dirname "$0")/../.." echo "--- Install dependencies" @@ -20,22 +20,22 @@ pip install -e . echo "--- Generate the signing key" # Generate the server's signing key. -python -m synapse.app.homeserver --generate-keys -c .buildkite/sqlite-config.yaml +python -m synapse.app.homeserver --generate-keys -c .ci/sqlite-config.yaml echo "--- Prepare test database" # Make sure the SQLite3 database is using the latest schema and has no pending background update. -scripts-dev/update_database --database-config .buildkite/sqlite-config.yaml +scripts/update_synapse_database --database-config .ci/sqlite-config.yaml --run-background-updates # Create the PostgreSQL database. -./.buildkite/scripts/postgres_exec.py "CREATE DATABASE synapse" +.ci/scripts/postgres_exec.py "CREATE DATABASE synapse" echo "+++ Run synapse_port_db against test database" -coverage run scripts/synapse_port_db --sqlite-database .buildkite/test_db.db --postgres-config .buildkite/postgres-config.yaml +coverage run scripts/synapse_port_db --sqlite-database .ci/test_db.db --postgres-config .ci/postgres-config.yaml # We should be able to run twice against the same database. echo "+++ Run synapse_port_db a second time" -coverage run scripts/synapse_port_db --sqlite-database .buildkite/test_db.db --postgres-config .buildkite/postgres-config.yaml +coverage run scripts/synapse_port_db --sqlite-database .ci/test_db.db --postgres-config .ci/postgres-config.yaml ##### @@ -44,14 +44,14 @@ coverage run scripts/synapse_port_db --sqlite-database .buildkite/test_db.db --p echo "--- Prepare empty SQLite database" # we do this by deleting the sqlite db, and then doing the same again. -rm .buildkite/test_db.db +rm .ci/test_db.db -scripts-dev/update_database --database-config .buildkite/sqlite-config.yaml +scripts/update_synapse_database --database-config .ci/sqlite-config.yaml --run-background-updates # re-create the PostgreSQL database. -./.buildkite/scripts/postgres_exec.py \ +.ci/scripts/postgres_exec.py \ "DROP DATABASE synapse" \ "CREATE DATABASE synapse" echo "+++ Run synapse_port_db against empty database" -coverage run scripts/synapse_port_db --sqlite-database .buildkite/test_db.db --postgres-config .buildkite/postgres-config.yaml +coverage run scripts/synapse_port_db --sqlite-database .ci/test_db.db --postgres-config .ci/postgres-config.yaml diff --git a/.buildkite/sqlite-config.yaml b/.ci/sqlite-config.yaml similarity index 80% rename from .buildkite/sqlite-config.yaml rename to .ci/sqlite-config.yaml index d16459cfd947..3373743da3cd 100644 --- a/.buildkite/sqlite-config.yaml +++ b/.ci/sqlite-config.yaml @@ -3,14 +3,14 @@ # schema and run background updates on it. server_name: "localhost:8800" -signing_key_path: ".buildkite/test.signing.key" +signing_key_path: ".ci/test.signing.key" report_stats: false database: name: "sqlite3" args: - database: ".buildkite/test_db.db" + database: ".ci/test_db.db" # Suppress the key server warning. trusted_key_servers: [] diff --git a/.buildkite/test_db.db b/.ci/test_db.db similarity index 100% rename from .buildkite/test_db.db rename to .ci/test_db.db diff --git a/.ci/twisted_trunk_build_failed_issue_template.md b/.ci/twisted_trunk_build_failed_issue_template.md new file mode 100644 index 000000000000..2ead1dc39477 --- /dev/null +++ b/.ci/twisted_trunk_build_failed_issue_template.md @@ -0,0 +1,4 @@ +--- +title: CI run against Twisted trunk is failing +--- +See https://github.com/{{env.GITHUB_REPOSITORY}}/actions/runs/{{env.GITHUB_RUN_ID}} diff --git a/.ci/worker-blacklist b/.ci/worker-blacklist new file mode 100644 index 000000000000..cb8eae5d2a34 --- /dev/null +++ b/.ci/worker-blacklist @@ -0,0 +1,2 @@ +# This file serves as a blacklist for SyTest tests that we expect will fail in +# Synapse when run under worker mode. For more details, see sytest-blacklist. diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000000..d6cd75f1d076 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,2 @@ +# Automatically request reviews from the synapse-core team when a pull request comes in. +* @matrix-org/synapse-core \ No newline at end of file diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index fc22d894269d..6c3a99849925 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,12 +1,13 @@ ### Pull Request Checklist - + * [ ] Pull request is based on the develop branch -* [ ] Pull request includes a [changelog file](https://github.com/matrix-org/synapse/blob/master/CONTRIBUTING.md#changelog). The entry should: +* [ ] Pull request includes a [changelog file](https://matrix-org.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. -* [ ] Pull request includes a [sign off](https://github.com/matrix-org/synapse/blob/master/CONTRIBUTING.md#sign-off) -* [ ] Code style is correct (run the [linters](https://github.com/matrix-org/synapse/blob/master/CONTRIBUTING.md#code-style)) +* [ ] Pull request includes a [sign off](https://matrix-org.github.io/synapse/latest/development/contributing_guide.html#sign-off) +* [ ] [Code style](https://matrix-org.github.io/synapse/latest/code_style.html) is correct + (run the [linters](https://matrix-org.github.io/synapse/latest/development/contributing_guide.html#run-the-linters)) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index af7ed21fce7f..3276d1e122e2 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -5,7 +5,7 @@ name: Build docker images on: push: tags: ["v*"] - branches: [ master, main ] + branches: [ master, main, develop ] workflow_dispatch: permissions: @@ -38,6 +38,9 @@ jobs: id: set-tag run: | case "${GITHUB_REF}" in + refs/heads/develop) + tag=develop + ;; refs/heads/master|refs/heads/main) tag=latest ;; diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 808f82533128..2bf32e376b06 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -61,6 +61,5 @@ jobs: uses: peaceiris/actions-gh-pages@068dc23d9710f1ba62e86896f84735d869951305 # v3.8.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} - keep_files: true publish_dir: ./book destination_dir: ./${{ steps.vars.outputs.branch-version }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 239553ae138e..21c9ee7823c7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,7 @@ on: concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true - + jobs: lint: runs-on: ubuntu-latest @@ -38,20 +38,15 @@ jobs: if: ${{ github.base_ref == 'develop' || contains(github.base_ref, 'release-') }} runs-on: ubuntu-latest steps: - # Note: This and the script can be simplified once we drop Buildkite. See: - # https://github.com/actions/checkout/issues/266#issuecomment-638346893 - # https://github.com/actions/checkout/issues/416 - uses: actions/checkout@v2 with: ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 - uses: actions/setup-python@v2 - run: pip install tox - - name: Patch Buildkite-specific test script - run: | - sed -i -e 's/\$BUILDKITE_PULL_REQUEST/${{ github.event.number }}/' \ - scripts-dev/check-newsfragment - run: scripts-dev/check-newsfragment + env: + PULL_REQUEST_NUMBER: ${{ github.event.number }} lint-sdist: runs-on: ubuntu-latest @@ -81,22 +76,25 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.6", "3.7", "3.8", "3.9"] + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] database: ["sqlite"] + toxenv: ["py"] include: # Newest Python without optional deps - - python-version: "3.9" - toxenv: "py-noextras,combine" + - python-version: "3.10" + toxenv: "py-noextras" # Oldest Python with PostgreSQL - python-version: "3.6" database: "postgres" postgres-version: "9.6" + toxenv: "py" - # Newest Python with PostgreSQL - - python-version: "3.9" + # Newest Python with newest PostgreSQL + - python-version: "3.10" database: "postgres" - postgres-version: "13" + postgres-version: "14" + toxenv: "py" steps: - uses: actions/checkout@v2 @@ -116,7 +114,7 @@ jobs: if: ${{ matrix.postgres-version }} timeout-minutes: 2 run: until pg_isready -h localhost; do sleep 1; done - - run: tox -e py,combine + - run: tox -e ${{ matrix.toxenv }} env: TRIAL_FLAGS: "--jobs=2" SYNAPSE_POSTGRES: ${{ matrix.database == 'postgres' || '' }} @@ -124,6 +122,8 @@ jobs: SYNAPSE_POSTGRES_USER: postgres SYNAPSE_POSTGRES_PASSWORD: postgres - name: Dump logs + # Logs are most useful when the command fails, always include them. + if: ${{ always() }} # Note: Dumps to workflow logs instead of using actions/upload-artifact # This keeps logs colocated with failing jobs # It also ignores find's exit code; this is a best effort affair @@ -144,10 +144,12 @@ jobs: uses: docker://ubuntu:bionic # For old python and sqlite with: workdir: /github/workspace - entrypoint: .buildkite/scripts/test_old_deps.sh + entrypoint: .ci/scripts/test_old_deps.sh env: TRIAL_FLAGS: "--jobs=2" - name: Dump logs + # Logs are most useful when the command fails, always include them. + if: ${{ always() }} # Note: Dumps to workflow logs instead of using actions/upload-artifact # This keeps logs colocated with failing jobs # It also ignores find's exit code; this is a best effort affair @@ -174,10 +176,12 @@ jobs: with: python-version: ${{ matrix.python-version }} - run: pip install tox - - run: tox -e py,combine + - run: tox -e py env: TRIAL_FLAGS: "--jobs=2" - name: Dump logs + # Logs are most useful when the command fails, always include them. + if: ${{ always() }} # Note: Dumps to workflow logs instead of using actions/upload-artifact # This keeps logs colocated with failing jobs # It also ignores find's exit code; this is a best effort affair @@ -197,12 +201,13 @@ jobs: volumes: - ${{ github.workspace }}:/src env: - BUILDKITE_BRANCH: ${{ github.head_ref }} + SYTEST_BRANCH: ${{ github.head_ref }} POSTGRES: ${{ matrix.postgres && 1}} MULTI_POSTGRES: ${{ (matrix.postgres == 'multi-postgres') && 1}} WORKERS: ${{ matrix.workers && 1 }} REDIS: ${{ matrix.redis && 1 }} BLACKLIST: ${{ matrix.workers && 'synapse-blacklist-with-workers' }} + TOP: ${{ github.workspace }} strategy: fail-fast: false @@ -232,7 +237,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Prepare test blacklist - run: cat sytest-blacklist .buildkite/worker-blacklist > synapse-blacklist-with-workers + run: cat sytest-blacklist .ci/worker-blacklist > synapse-blacklist-with-workers - name: Run SyTest run: /bootstrap.sh synapse working-directory: /src @@ -248,18 +253,49 @@ jobs: /logs/results.tap /logs/**/*.log* + export-data: + if: ${{ !failure() && !cancelled() }} # Allow previous steps to be skipped, but not fail + needs: [linting-done, portdb] + runs-on: ubuntu-latest + env: + TOP: ${{ github.workspace }} + + services: + postgres: + image: postgres + ports: + - 5432:5432 + env: + POSTGRES_PASSWORD: "postgres" + POSTGRES_INITDB_ARGS: "--lc-collate C --lc-ctype C --encoding UTF8" + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + steps: + - uses: actions/checkout@v2 + - run: sudo apt-get -qq install xmlsec1 + - uses: actions/setup-python@v2 + with: + python-version: "3.9" + - run: .ci/scripts/test_export_data_command.sh + portdb: if: ${{ !failure() && !cancelled() }} # Allow previous steps to be skipped, but not fail needs: linting-done runs-on: ubuntu-latest + env: + TOP: ${{ github.workspace }} strategy: matrix: include: - python-version: "3.6" postgres-version: "9.6" - - python-version: "3.9" - postgres-version: "13" + - python-version: "3.10" + postgres-version: "14" services: postgres: @@ -281,13 +317,7 @@ jobs: - uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - - name: Patch Buildkite-specific test scripts - run: | - sed -i -e 's/host="postgres"/host="localhost"/' .buildkite/scripts/postgres_exec.py - sed -i -e 's/host: postgres/host: localhost/' .buildkite/postgres-config.yaml - sed -i -e 's|/src/||' .buildkite/{sqlite,postgres}-config.yaml - sed -i -e 's/\$TOP/\$GITHUB_WORKSPACE/' .coveragerc - - run: .buildkite/scripts/test_synapse_port_db.sh + - run: .ci/scripts/test_synapse_port_db.sh complement: if: ${{ !failure() && !cancelled() }} @@ -344,7 +374,7 @@ jobs: working-directory: complement/dockerfiles # Run Complement - - run: go test -v -tags synapse_blacklist,msc2403,msc2946,msc3083 ./tests/... + - run: go test -v -tags synapse_blacklist,msc2403 ./tests/... env: COMPLEMENT_BASE_IMAGE: complement-synapse:latest working-directory: complement @@ -374,6 +404,11 @@ jobs: rc=0 results=$(jq -r 'to_entries[] | [.key,.value.result] | join(" ")' <<< $NEEDS_CONTEXT) while read job result ; do + # The newsfile lint may be skipped on non PR builds + if [ $result == "skipped" ] && [ $job == "lint-newsfile" ]; then + continue + fi + if [ "$result" != "success" ]; then echo "::set-failed ::Job $job returned $result" rc=1 diff --git a/.github/workflows/twisted_trunk.yml b/.github/workflows/twisted_trunk.yml new file mode 100644 index 000000000000..e974ac7aba37 --- /dev/null +++ b/.github/workflows/twisted_trunk.yml @@ -0,0 +1,92 @@ +name: Twisted Trunk + +on: + schedule: + - cron: 0 8 * * * + + workflow_dispatch: + +jobs: + mypy: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + - run: .ci/patch_for_twisted_trunk.sh + - run: pip install tox + - run: tox -e mypy + + trial: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - run: sudo apt-get -qq install xmlsec1 + - uses: actions/setup-python@v2 + with: + python-version: 3.6 + - run: .ci/patch_for_twisted_trunk.sh + - run: pip install tox + - run: tox -e py + env: + TRIAL_FLAGS: "--jobs=2" + + - name: Dump logs + # Logs are most useful when the command fails, always include them. + if: ${{ always() }} + # Note: Dumps to workflow logs instead of using actions/upload-artifact + # This keeps logs colocated with failing jobs + # It also ignores find's exit code; this is a best effort affair + run: >- + find _trial_temp -name '*.log' + -exec echo "::group::{}" \; + -exec cat {} \; + -exec echo "::endgroup::" \; + || true + + sytest: + runs-on: ubuntu-latest + container: + image: matrixdotorg/sytest-synapse:buster + volumes: + - ${{ github.workspace }}:/src + + steps: + - uses: actions/checkout@v2 + - name: Patch dependencies + run: .ci/patch_for_twisted_trunk.sh + working-directory: /src + - name: Run SyTest + run: /bootstrap.sh synapse + working-directory: /src + - name: Summarise results.tap + if: ${{ always() }} + run: /sytest/scripts/tap_to_gha.pl /logs/results.tap + - name: Upload SyTest logs + uses: actions/upload-artifact@v2 + if: ${{ always() }} + with: + name: Sytest Logs - ${{ job.status }} - (${{ join(matrix.*, ', ') }}) + path: | + /logs/results.tap + /logs/**/*.log* + + # open an issue if the build fails, so we know about it. + open-issue: + if: failure() + needs: + - mypy + - trial + - sytest + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - uses: JasonEtco/create-an-issue@5d9504915f79f9cc6d791934b8ef34f2353dd74d # v2.5.0, 2020-12-06 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + update_existing: true + filename: .ci/twisted_trunk_build_failed_issue_template.md diff --git a/.gitignore b/.gitignore index 6b9257b5c95b..fe137f337019 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ __pycache__/ /.coverage* /.mypy_cache/ /.tox +/.tox-pg-container /build/ /coverage.* /dist/ diff --git a/CHANGES.md b/CHANGES.md index 0e5e052951a9..9f6e29631df6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,982 @@ +Synapse 1.49.2 (2021-12-21) +=========================== + +This release fixes a regression introduced in Synapse 1.49.0 which could cause `/sync` requests to take significantly longer. This would particularly affect "initial" syncs for users participating in a large number of rooms, and in extreme cases, could make it impossible for such users to log in on a new client. + +**Note:** in line with our [deprecation policy](https://matrix-org.github.io/synapse/latest/deprecation_policy.html) for platform dependencies, this will be the last release to support Python 3.6 and PostgreSQL 9.6, both of which have now reached upstream end-of-life. Synapse will require Python 3.7+ and PostgreSQL 10+. + +**Note:** We will also stop producing packages for Ubuntu 18.04 (Bionic Beaver) after this release, as it uses Python 3.6. + +Bugfixes +-------- + +- Fix a performance regression in `/sync` handling, introduced in 1.49.0. ([\#11583](https://github.com/matrix-org/synapse/issues/11583)) + +Internal Changes +---------------- + +- Work around a build problem on Debian Buster. ([\#11625](https://github.com/matrix-org/synapse/issues/11625)) + + +Synapse 1.49.1 (2021-12-21) +=========================== + +Not released due to problems building the debian packages. + + +Synapse 1.49.0 (2021-12-14) +=========================== + +No significant changes since version 1.49.0rc1. + + +Support for Ubuntu 21.04 ends next month on the 20th of January +--------------------------------------------------------------- + +For users of Ubuntu 21.04 (Hirsute Hippo), please be aware that [upstream support for this version of Ubuntu will end next month][Ubuntu2104EOL]. +We will stop producing packages for Ubuntu 21.04 after upstream support ends. + +[Ubuntu2104EOL]: https://lists.ubuntu.com/archives/ubuntu-announce/2021-December/000275.html + + +The wiki has been migrated to the documentation website +------------------------------------------------------- + +We've decided to move the existing, somewhat stagnant pages from the GitHub wiki +to the [documentation website](https://matrix-org.github.io/synapse/latest/). + +This was done for two reasons. The first was to ensure that changes are checked by +multiple authors before being committed (everyone makes mistakes!) and the second +was visibility of the documentation. Not everyone knows that Synapse has some very +useful information hidden away in its GitHub wiki pages. Bringing them to the +documentation website should help with visibility, as well as keep all Synapse documentation +in one, easily-searchable location. + +Note that contributions to the documentation website happen through [GitHub pull +requests](https://github.com/matrix-org/synapse/pulls). Please visit [#synapse-dev:matrix.org](https://matrix.to/#/#synapse-dev:matrix.org) +if you need help with the process! + + +Synapse 1.49.0rc1 (2021-12-07) +============================== + +Features +-------- + +- Add [MSC3030](https://github.com/matrix-org/matrix-doc/pull/3030) experimental client and federation API endpoints to get the closest event to a given timestamp. ([\#9445](https://github.com/matrix-org/synapse/issues/9445)) +- Include bundled relation aggregations during a limited `/sync` request and `/relations` request, per [MSC2675](https://github.com/matrix-org/matrix-doc/pull/2675). ([\#11284](https://github.com/matrix-org/synapse/issues/11284), [\#11478](https://github.com/matrix-org/synapse/issues/11478)) +- Add plugin support for controlling database background updates. ([\#11306](https://github.com/matrix-org/synapse/issues/11306), [\#11475](https://github.com/matrix-org/synapse/issues/11475), [\#11479](https://github.com/matrix-org/synapse/issues/11479)) +- Support the stable API endpoints for [MSC2946](https://github.com/matrix-org/matrix-doc/pull/2946): the room `/hierarchy` endpoint. ([\#11329](https://github.com/matrix-org/synapse/issues/11329)) +- Add admin API to get some information about federation status with remote servers. ([\#11407](https://github.com/matrix-org/synapse/issues/11407)) +- Support expiry of refresh tokens and expiry of the overall session when refresh tokens are in use. ([\#11425](https://github.com/matrix-org/synapse/issues/11425)) +- Stabilise support for [MSC2918](https://github.com/matrix-org/matrix-doc/blob/main/proposals/2918-refreshtokens.md#msc2918-refresh-tokens) refresh tokens as they have now been merged into the Matrix specification. ([\#11435](https://github.com/matrix-org/synapse/issues/11435), [\#11522](https://github.com/matrix-org/synapse/issues/11522)) +- Update [MSC2918 refresh token](https://github.com/matrix-org/matrix-doc/blob/main/proposals/2918-refreshtokens.md#msc2918-refresh-tokens) support to confirm with the latest revision: accept the `refresh_tokens` parameter in the request body rather than in the URL parameters. ([\#11430](https://github.com/matrix-org/synapse/issues/11430)) +- Support configuring the lifetime of non-refreshable access tokens separately to refreshable access tokens. ([\#11445](https://github.com/matrix-org/synapse/issues/11445)) +- Expose `synapse_homeserver` and `synapse_worker` commands as entry points to run Synapse's main process and worker processes, respectively. Contributed by @Ma27. ([\#11449](https://github.com/matrix-org/synapse/issues/11449)) +- `synctl stop` will now wait for Synapse to exit before returning. ([\#11459](https://github.com/matrix-org/synapse/issues/11459), [\#11490](https://github.com/matrix-org/synapse/issues/11490)) +- Extend the "delete room" admin api to work correctly on rooms which have previously been partially deleted. ([\#11523](https://github.com/matrix-org/synapse/issues/11523)) +- Add support for the `/_matrix/client/v3/login/sso/redirect/{idpId}` API from Matrix v1.1. This endpoint was overlooked when support for v3 endpoints was added in Synapse 1.48.0rc1. ([\#11451](https://github.com/matrix-org/synapse/issues/11451)) + + +Bugfixes +-------- + +- Fix using [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) batch sending in combination with event persistence workers. Contributed by @tulir at Beeper. ([\#11220](https://github.com/matrix-org/synapse/issues/11220)) +- Fix a long-standing bug where all requests that read events from the database could get stuck as a result of losing the database connection, properly this time. Also fix a race condition introduced in the previous insufficient fix in Synapse 1.47.0. ([\#11376](https://github.com/matrix-org/synapse/issues/11376)) +- The `/send_join` response now includes the stable `event` field instead of the unstable field from [MSC3083](https://github.com/matrix-org/matrix-doc/pull/3083). ([\#11413](https://github.com/matrix-org/synapse/issues/11413)) +- Fix a bug introduced in Synapse 1.47.0 where `send_join` could fail due to an outdated `ijson` version. ([\#11439](https://github.com/matrix-org/synapse/issues/11439), [\#11441](https://github.com/matrix-org/synapse/issues/11441), [\#11460](https://github.com/matrix-org/synapse/issues/11460)) +- Fix a bug introduced in Synapse 1.36.0 which could cause problems fetching event-signing keys from trusted key servers. ([\#11440](https://github.com/matrix-org/synapse/issues/11440)) +- Fix a bug introduced in Synapse 1.47.1 where the media repository would fail to work if the media store path contained any symbolic links. ([\#11446](https://github.com/matrix-org/synapse/issues/11446)) +- Fix an `LruCache` corruption bug, introduced in Synapse 1.38.0, that would cause certain requests to fail until the next Synapse restart. ([\#11454](https://github.com/matrix-org/synapse/issues/11454)) +- Fix a long-standing bug where invites from ignored users were included in incremental syncs. ([\#11511](https://github.com/matrix-org/synapse/issues/11511)) +- Fix a regression in Synapse 1.48.0 where presence workers would not clear their presence updates over replication on shutdown. ([\#11518](https://github.com/matrix-org/synapse/issues/11518)) +- Fix a regression in Synapse 1.48.0 where the module API's `looping_background_call` method would spam errors to the logs when given a non-async function. ([\#11524](https://github.com/matrix-org/synapse/issues/11524)) + + +Updates to the Docker image +--------------------------- + +- Update `Dockerfile-workers` to healthcheck all workers in the container. ([\#11429](https://github.com/matrix-org/synapse/issues/11429)) + + +Improved Documentation +---------------------- + +- Update the media repository documentation. ([\#11415](https://github.com/matrix-org/synapse/issues/11415)) +- Update section about backward extremities in the room DAG concepts doc to correct the misconception about backward extremities indicating whether we have fetched an events' `prev_events`. ([\#11469](https://github.com/matrix-org/synapse/issues/11469)) + + +Internal Changes +---------------- + +- Add `Final` annotation to string constants in `synapse.api.constants` so that they get typed as `Literal`s. ([\#11356](https://github.com/matrix-org/synapse/issues/11356)) +- Add a check to ensure that users cannot start the Synapse master process when `worker_app` is set. ([\#11416](https://github.com/matrix-org/synapse/issues/11416)) +- Add a note about postgres memory management and hugepages to postgres doc. ([\#11467](https://github.com/matrix-org/synapse/issues/11467)) +- Add missing type hints to `synapse.config` module. ([\#11465](https://github.com/matrix-org/synapse/issues/11465)) +- Add missing type hints to `synapse.federation`. ([\#11483](https://github.com/matrix-org/synapse/issues/11483)) +- Add type annotations to `tests.storage.test_appservice`. ([\#11488](https://github.com/matrix-org/synapse/issues/11488), [\#11492](https://github.com/matrix-org/synapse/issues/11492)) +- Add type annotations to some of the configuration surrounding refresh tokens. ([\#11428](https://github.com/matrix-org/synapse/issues/11428)) +- Add type hints to `synapse/tests/rest/admin`. ([\#11501](https://github.com/matrix-org/synapse/issues/11501)) +- Add type hints to storage classes. ([\#11411](https://github.com/matrix-org/synapse/issues/11411)) +- Add wiki pages to documentation website. ([\#11402](https://github.com/matrix-org/synapse/issues/11402)) +- Clean up `tests.storage.test_main` to remove use of legacy code. ([\#11493](https://github.com/matrix-org/synapse/issues/11493)) +- Clean up `tests.test_visibility` to remove legacy code. ([\#11495](https://github.com/matrix-org/synapse/issues/11495)) +- Convert status codes to `HTTPStatus` in `synapse.rest.admin`. ([\#11452](https://github.com/matrix-org/synapse/issues/11452), [\#11455](https://github.com/matrix-org/synapse/issues/11455)) +- Extend the `scripts-dev/sign_json` script to support signing events. ([\#11486](https://github.com/matrix-org/synapse/issues/11486)) +- Improve internal types in push code. ([\#11409](https://github.com/matrix-org/synapse/issues/11409)) +- Improve type annotations in `synapse.module_api`. ([\#11029](https://github.com/matrix-org/synapse/issues/11029)) +- Improve type hints for `LruCache`. ([\#11453](https://github.com/matrix-org/synapse/issues/11453)) +- Preparation for database schema simplifications: disambiguate queries on `state_key`. ([\#11497](https://github.com/matrix-org/synapse/issues/11497)) +- Refactor `backfilled` into specific behavior function arguments (`_persist_events_and_state_updates` and downstream calls). ([\#11417](https://github.com/matrix-org/synapse/issues/11417)) +- Refactor `get_version_string` to fix-up types and duplicated code. ([\#11468](https://github.com/matrix-org/synapse/issues/11468)) +- Refactor various parts of the `/sync` handler. ([\#11494](https://github.com/matrix-org/synapse/issues/11494), [\#11515](https://github.com/matrix-org/synapse/issues/11515)) +- Remove unnecessary `json.dumps` from `tests.rest.admin`. ([\#11461](https://github.com/matrix-org/synapse/issues/11461)) +- Save the OpenID Connect session ID on login. ([\#11482](https://github.com/matrix-org/synapse/issues/11482)) +- Update and clean up recently ported documentation pages. ([\#11466](https://github.com/matrix-org/synapse/issues/11466)) + + +Synapse 1.48.0 (2021-11-30) +=========================== + +This release removes support for the long-deprecated `trust_identity_server_for_password_resets` configuration flag. + +This release also fixes some performance issues with some background database updates introduced in Synapse 1.47.0. + +No significant changes since 1.48.0rc1. + +Synapse 1.48.0rc1 (2021-11-25) +============================== + +Features +-------- + +- Experimental support for the thread relation defined in [MSC3440](https://github.com/matrix-org/matrix-doc/pull/3440). ([\#11161](https://github.com/matrix-org/synapse/issues/11161)) +- Support filtering by relation senders & types per [MSC3440](https://github.com/matrix-org/matrix-doc/pull/3440). ([\#11236](https://github.com/matrix-org/synapse/issues/11236)) +- Add support for the `/_matrix/client/v3` and `/_matrix/media/v3` APIs from Matrix v1.1. ([\#11318](https://github.com/matrix-org/synapse/issues/11318), [\#11371](https://github.com/matrix-org/synapse/issues/11371)) +- Support the stable version of [MSC2778](https://github.com/matrix-org/matrix-doc/pull/2778): the `m.login.application_service` login type. Contributed by @tulir. ([\#11335](https://github.com/matrix-org/synapse/issues/11335)) +- Add a new version of delete room admin API `DELETE /_synapse/admin/v2/rooms/` to run it in the background. Contributed by @dklimpel. ([\#11223](https://github.com/matrix-org/synapse/issues/11223)) +- Allow the admin [Delete Room API](https://matrix-org.github.io/synapse/latest/admin_api/rooms.html#delete-room-api) to block a room without the need to join it. ([\#11228](https://github.com/matrix-org/synapse/issues/11228)) +- Add an admin API to un-shadow-ban a user. ([\#11347](https://github.com/matrix-org/synapse/issues/11347)) +- Add an admin API to run background database schema updates. ([\#11352](https://github.com/matrix-org/synapse/issues/11352)) +- Add an admin API for blocking a room. ([\#11324](https://github.com/matrix-org/synapse/issues/11324)) +- Update the JWT login type to support custom a `sub` claim. ([\#11361](https://github.com/matrix-org/synapse/issues/11361)) +- Store and allow querying of arbitrary event relations. ([\#11391](https://github.com/matrix-org/synapse/issues/11391)) + + +Bugfixes +-------- + +- Fix a long-standing bug wherein display names or avatar URLs containing null bytes cause an internal server error when stored in the DB. ([\#11230](https://github.com/matrix-org/synapse/issues/11230)) +- Prevent [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) historical state events from being pushed to an application service via `/transactions`. ([\#11265](https://github.com/matrix-org/synapse/issues/11265)) +- Fix a long-standing bug where uploading extremely thin images (e.g. 1000x1) would fail. Contributed by @Neeeflix. ([\#11288](https://github.com/matrix-org/synapse/issues/11288)) +- Fix a bug, introduced in Synapse 1.46.0, which caused the `check_3pid_auth` and `on_logged_out` callbacks in legacy password authentication provider modules to not be registered. Modules using the generic module interface were not affected. ([\#11340](https://github.com/matrix-org/synapse/issues/11340)) +- Fix a bug introduced in 1.41.0 where space hierarchy responses would be incorrectly reused if multiple users were to make the same request at the same time. ([\#11355](https://github.com/matrix-org/synapse/issues/11355)) +- Fix a bug introduced in 1.45.0 where the `read_templates` method of the module API would error. ([\#11377](https://github.com/matrix-org/synapse/issues/11377)) +- Fix an issue introduced in 1.47.0 which prevented servers re-joining rooms they had previously left, if their signing keys were replaced. ([\#11379](https://github.com/matrix-org/synapse/issues/11379)) +- Fix a bug introduced in 1.13.0 where creating and publishing a room could cause errors if `room_list_publication_rules` is configured. ([\#11392](https://github.com/matrix-org/synapse/issues/11392)) +- Improve performance of various background database updates. ([\#11421](https://github.com/matrix-org/synapse/issues/11421), [\#11422](https://github.com/matrix-org/synapse/issues/11422)) + + +Improved Documentation +---------------------- + +- Suggest users of the Debian packages add configuration to `/etc/matrix-synapse/conf.d/` to prevent, upon upgrade, being asked to choose between their configuration and the maintainer's. ([\#11281](https://github.com/matrix-org/synapse/issues/11281)) +- Fix typos in the documentation for the `username_available` admin API. Contributed by Stanislav Motylkov. ([\#11286](https://github.com/matrix-org/synapse/issues/11286)) +- Add Single Sign-On, SAML and CAS pages to the documentation. ([\#11298](https://github.com/matrix-org/synapse/issues/11298)) +- Change the word 'Home server' as one word 'homeserver' in documentation. ([\#11320](https://github.com/matrix-org/synapse/issues/11320)) +- Fix missing quotes for wildcard domains in `federation_certificate_verification_whitelist`. ([\#11381](https://github.com/matrix-org/synapse/issues/11381)) + + +Deprecations and Removals +------------------------- + +- Remove deprecated `trust_identity_server_for_password_resets` configuration flag. ([\#11333](https://github.com/matrix-org/synapse/issues/11333), [\#11395](https://github.com/matrix-org/synapse/issues/11395)) + + +Internal Changes +---------------- + +- Add type annotations to `synapse.metrics`. ([\#10847](https://github.com/matrix-org/synapse/issues/10847)) +- Split out federated PDU retrieval function into a non-cached version. ([\#11242](https://github.com/matrix-org/synapse/issues/11242)) +- Clean up code relating to to-device messages and sending ephemeral events to application services. ([\#11247](https://github.com/matrix-org/synapse/issues/11247)) +- Fix a small typo in the error response when a relation type other than 'm.annotation' is passed to `GET /rooms/{room_id}/aggregations/{event_id}`. ([\#11278](https://github.com/matrix-org/synapse/issues/11278)) +- Drop unused database tables `room_stats_historical` and `user_stats_historical`. ([\#11280](https://github.com/matrix-org/synapse/issues/11280)) +- Require all files in synapse/ and tests/ to pass mypy unless specifically excluded. ([\#11282](https://github.com/matrix-org/synapse/issues/11282), [\#11285](https://github.com/matrix-org/synapse/issues/11285), [\#11359](https://github.com/matrix-org/synapse/issues/11359)) +- Add missing type hints to `synapse.app`. ([\#11287](https://github.com/matrix-org/synapse/issues/11287)) +- Remove unused parameters on `FederationEventHandler._check_event_auth`. ([\#11292](https://github.com/matrix-org/synapse/issues/11292)) +- Add type hints to `synapse._scripts`. ([\#11297](https://github.com/matrix-org/synapse/issues/11297)) +- Fix an issue which prevented the `remove_deleted_devices_from_device_inbox` background database schema update from running when updating from a recent Synapse version. ([\#11303](https://github.com/matrix-org/synapse/issues/11303)) +- Add type hints to storage classes. ([\#11307](https://github.com/matrix-org/synapse/issues/11307), [\#11310](https://github.com/matrix-org/synapse/issues/11310), [\#11311](https://github.com/matrix-org/synapse/issues/11311), [\#11312](https://github.com/matrix-org/synapse/issues/11312), [\#11313](https://github.com/matrix-org/synapse/issues/11313), [\#11314](https://github.com/matrix-org/synapse/issues/11314), [\#11316](https://github.com/matrix-org/synapse/issues/11316), [\#11322](https://github.com/matrix-org/synapse/issues/11322), [\#11332](https://github.com/matrix-org/synapse/issues/11332), [\#11339](https://github.com/matrix-org/synapse/issues/11339), [\#11342](https://github.com/matrix-org/synapse/issues/11342)) +- Add type hints to `synapse.util`. ([\#11321](https://github.com/matrix-org/synapse/issues/11321), [\#11328](https://github.com/matrix-org/synapse/issues/11328)) +- Improve type annotations in Synapse's test suite. ([\#11323](https://github.com/matrix-org/synapse/issues/11323), [\#11330](https://github.com/matrix-org/synapse/issues/11330)) +- Test that room alias deletion works as intended. ([\#11327](https://github.com/matrix-org/synapse/issues/11327)) +- Add type annotations for some methods and properties in the module API. ([\#11341](https://github.com/matrix-org/synapse/issues/11341)) +- Fix running `scripts-dev/complement.sh`, which was broken in v1.47.0rc1. ([\#11368](https://github.com/matrix-org/synapse/issues/11368)) +- Rename internal functions for token generation to better reflect what they do. ([\#11369](https://github.com/matrix-org/synapse/issues/11369), [\#11370](https://github.com/matrix-org/synapse/issues/11370)) +- Add type hints to configuration classes. ([\#11377](https://github.com/matrix-org/synapse/issues/11377)) +- Publish a `develop` image to Docker Hub. ([\#11380](https://github.com/matrix-org/synapse/issues/11380)) +- Keep fallback key marked as used if it's re-uploaded. ([\#11382](https://github.com/matrix-org/synapse/issues/11382)) +- Use `auto_attribs` on the `attrs` class `RefreshTokenLookupResult`. ([\#11386](https://github.com/matrix-org/synapse/issues/11386)) +- Rename unstable `access_token_lifetime` configuration option to `refreshable_access_token_lifetime` to make it clear it only concerns refreshable access tokens. ([\#11388](https://github.com/matrix-org/synapse/issues/11388)) +- Do not run the broken MSC2716 tests when running `scripts-dev/complement.sh`. ([\#11389](https://github.com/matrix-org/synapse/issues/11389)) +- Remove dead code from supporting ACME. ([\#11393](https://github.com/matrix-org/synapse/issues/11393)) +- Refactor including the bundled relations when serializing an event. ([\#11408](https://github.com/matrix-org/synapse/issues/11408)) + + +Synapse 1.47.1 (2021-11-23) +=========================== + +This release fixes a security issue in the media store, affecting all prior releases of Synapse. Server administrators are encouraged to update Synapse as soon as possible. We are not aware of these vulnerabilities being exploited in the wild. + +Server administrators who are unable to update Synapse may use the workarounds described in the linked GitHub Security Advisory below. + +Security advisory +----------------- + +The following issue is fixed in 1.47.1. + +- **[GHSA-3hfw-x7gx-437c](https://github.com/matrix-org/synapse/security/advisories/GHSA-3hfw-x7gx-437c) / [CVE-2021-41281](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41281): Path traversal when downloading remote media.** + + Synapse instances with the media repository enabled can be tricked into downloading a file from a remote server into an arbitrary directory, potentially outside the media store directory. + + The last two directories and file name of the path are chosen randomly by Synapse and cannot be controlled by an attacker, which limits the impact. + + Homeservers with the media repository disabled are unaffected. Homeservers configured with a federation whitelist are also unaffected. + + Fixed by [91f2bd090](https://github.com/matrix-org/synapse/commit/91f2bd090). + + +Synapse 1.47.0 (2021-11-17) +=========================== + +No significant changes since 1.47.0rc3. + + +Synapse 1.47.0rc3 (2021-11-16) +============================== + +Bugfixes +-------- + +- Fix a bug introduced in 1.47.0rc1 which caused worker processes to not halt startup in the presence of outstanding database migrations. ([\#11346](https://github.com/matrix-org/synapse/issues/11346)) +- Fix a bug introduced in 1.47.0rc1 which prevented the 'remove deleted devices from `device_inbox` column' background process from running when updating from a recent Synapse version. ([\#11303](https://github.com/matrix-org/synapse/issues/11303), [\#11353](https://github.com/matrix-org/synapse/issues/11353)) + + +Synapse 1.47.0rc2 (2021-11-10) +============================== + +This fixes an issue with publishing the Debian packages for 1.47.0rc1. +It is otherwise identical to 1.47.0rc1. + + +Synapse 1.47.0rc1 (2021-11-09) +============================== + +Deprecations and Removals +------------------------- + +- The `user_may_create_room_with_invites` module callback is now deprecated. Please refer to the [upgrade notes](https://matrix-org.github.io/synapse/develop/upgrade#upgrading-to-v1470) for more information. ([\#11206](https://github.com/matrix-org/synapse/issues/11206)) +- Remove deprecated admin API to delete rooms (`POST /_synapse/admin/v1/rooms//delete`). ([\#11213](https://github.com/matrix-org/synapse/issues/11213)) + + +Features +-------- + +- Advertise support for Client-Server API r0.6.1. ([\#11097](https://github.com/matrix-org/synapse/issues/11097)) +- Add search by room ID and room alias to the List Room admin API. ([\#11099](https://github.com/matrix-org/synapse/issues/11099)) +- Add an `on_new_event` third-party rules callback to allow Synapse modules to act after an event has been sent into a room. ([\#11126](https://github.com/matrix-org/synapse/issues/11126)) +- Add a module API method to update a user's membership in a room. ([\#11147](https://github.com/matrix-org/synapse/issues/11147)) +- Add metrics for thread pool usage. ([\#11178](https://github.com/matrix-org/synapse/issues/11178)) +- Support the stable room type field for [MSC3288](https://github.com/matrix-org/matrix-doc/pull/3288). ([\#11187](https://github.com/matrix-org/synapse/issues/11187)) +- Add a module API method to retrieve the current state of a room. ([\#11204](https://github.com/matrix-org/synapse/issues/11204)) +- Calculate a default value for `public_baseurl` based on `server_name`. ([\#11210](https://github.com/matrix-org/synapse/issues/11210)) +- Add support for serving `/.well-known/matrix/server` files, to redirect federation traffic to port 443. ([\#11211](https://github.com/matrix-org/synapse/issues/11211)) +- Add admin APIs to pause, start and check the status of background updates. ([\#11263](https://github.com/matrix-org/synapse/issues/11263)) + + +Bugfixes +-------- + +- Fix a long-standing bug which allowed hidden devices to receive to-device messages, resulting in unnecessary database bloat. ([\#10097](https://github.com/matrix-org/synapse/issues/10097)) +- Fix a long-standing bug where messages in the `device_inbox` table for deleted devices would persist indefinitely. Contributed by @dklimpel and @JohannesKleine. ([\#10969](https://github.com/matrix-org/synapse/issues/10969), [\#11212](https://github.com/matrix-org/synapse/issues/11212)) +- Do not accept events if a third-party rule `check_event_allowed` callback raises an exception. ([\#11033](https://github.com/matrix-org/synapse/issues/11033)) +- Fix long-standing bug where verification requests could fail in certain cases if a federation whitelist was in place but did not include your own homeserver. ([\#11129](https://github.com/matrix-org/synapse/issues/11129)) +- Allow an empty list of `state_events_at_start` to be sent when using the [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` endpoint and the author of the historical messages is already part of the current room state at the given `?prev_event_id`. ([\#11188](https://github.com/matrix-org/synapse/issues/11188)) +- Fix a bug introduced in Synapse 1.45.0 which prevented the `synapse_review_recent_signups` script from running. Contributed by @samuel-p. ([\#11191](https://github.com/matrix-org/synapse/issues/11191)) +- Delete `to_device` messages for hidden devices that will never be read, reducing database size. ([\#11199](https://github.com/matrix-org/synapse/issues/11199)) +- Fix a long-standing bug wherein a missing `Content-Type` header when downloading remote media would cause Synapse to throw an error. ([\#11200](https://github.com/matrix-org/synapse/issues/11200)) +- Fix a long-standing bug which could result in serialization errors and potentially duplicate transaction data when sending ephemeral events to application services. Contributed by @Fizzadar at Beeper. ([\#11207](https://github.com/matrix-org/synapse/issues/11207)) +- Fix a bug introduced in Synapse 1.35.0 which made it impossible to join rooms that return a `send_join` response containing floats. ([\#11217](https://github.com/matrix-org/synapse/issues/11217)) +- Fix long-standing bug where cross signing keys were not included in the response to `/r0/keys/query` the first time a remote user was queried. ([\#11234](https://github.com/matrix-org/synapse/issues/11234)) +- Fix a long-standing bug where all requests that read events from the database could get stuck as a result of losing the database connection. ([\#11240](https://github.com/matrix-org/synapse/issues/11240)) +- Fix a bug preventing Synapse from being rolled back to an earlier version when using workers. ([\#11255](https://github.com/matrix-org/synapse/issues/11255), [\#11276](https://github.com/matrix-org/synapse/issues/11276)) +- Fix a bug introduced in Synapse 1.37.1 which caused a remote event being processed by a worker to not get processed on restart if the worker was killed. ([\#11262](https://github.com/matrix-org/synapse/issues/11262)) +- Only allow old Element/Riot Android clients to send read receipts without a request body. All other clients must include a request body as required by the specification. Contributed by @rogersheu. ([\#11157](https://github.com/matrix-org/synapse/issues/11157)) + + +Updates to the Docker image +--------------------------- + +- Avoid changing user ID when started as a non-root user, and no explicit `UID` is set. ([\#11209](https://github.com/matrix-org/synapse/issues/11209)) + + +Improved Documentation +---------------------- + +- Improve example HAProxy config in the docs to properly handle HTTP `Host` headers with port information. This is required for federation over port 443 to work correctly. ([\#11128](https://github.com/matrix-org/synapse/issues/11128)) +- Add documentation for using Authentik as an OpenID Connect Identity Provider. Contributed by @samip5. ([\#11151](https://github.com/matrix-org/synapse/issues/11151)) +- Clarify lack of support for Windows. ([\#11198](https://github.com/matrix-org/synapse/issues/11198)) +- Improve code formatting and fix a few typos in docs. Contributed by @sumnerevans at Beeper. ([\#11221](https://github.com/matrix-org/synapse/issues/11221)) +- Add documentation for using LemonLDAP as an OpenID Connect Identity Provider. Contributed by @l00ptr. ([\#11257](https://github.com/matrix-org/synapse/issues/11257)) + + +Internal Changes +---------------- + +- Add type annotations for the `log_function` decorator. ([\#10943](https://github.com/matrix-org/synapse/issues/10943)) +- Add type hints to `synapse.events`. ([\#11098](https://github.com/matrix-org/synapse/issues/11098)) +- Remove and document unnecessary `RoomStreamToken` checks in application service ephemeral event code. ([\#11137](https://github.com/matrix-org/synapse/issues/11137)) +- Add type hints so that `synapse.http` passes `mypy` checks. ([\#11164](https://github.com/matrix-org/synapse/issues/11164)) +- Update scripts to pass Shellcheck lints. ([\#11166](https://github.com/matrix-org/synapse/issues/11166)) +- Add knock information in admin export. Contributed by Rafael Gonçalves. ([\#11171](https://github.com/matrix-org/synapse/issues/11171)) +- Add tests to check that `ClientIpStore.get_last_client_ip_by_device` and `get_user_ip_and_agents` combine database and in-memory data correctly. ([\#11179](https://github.com/matrix-org/synapse/issues/11179)) +- Refactor `Filter` to check different fields depending on the data type. ([\#11194](https://github.com/matrix-org/synapse/issues/11194)) +- Improve type hints for the relations datastore. ([\#11205](https://github.com/matrix-org/synapse/issues/11205)) +- Replace outdated links in the pull request checklist with links to the rendered documentation. ([\#11225](https://github.com/matrix-org/synapse/issues/11225)) +- Fix a bug in unit test `test_block_room_and_not_purge`. ([\#11226](https://github.com/matrix-org/synapse/issues/11226)) +- In `ObservableDeferred`, run observers in the order they were registered. ([\#11229](https://github.com/matrix-org/synapse/issues/11229)) +- Minor speed up to start up times and getting updates for groups by adding missing index to `local_group_updates.stream_id`. ([\#11231](https://github.com/matrix-org/synapse/issues/11231)) +- Add `twine` and `towncrier` as dev dependencies, as they're used by the release script. ([\#11233](https://github.com/matrix-org/synapse/issues/11233)) +- Allow `stream_writers.typing` config to be a list of one worker. ([\#11237](https://github.com/matrix-org/synapse/issues/11237)) +- Remove debugging statement in tests. ([\#11239](https://github.com/matrix-org/synapse/issues/11239)) +- Fix [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) historical messages backfilling in random order on remote homeservers. ([\#11244](https://github.com/matrix-org/synapse/issues/11244)) +- Add an additional test for the `cachedList` method decorator. ([\#11246](https://github.com/matrix-org/synapse/issues/11246)) +- Make minor correction to the type of `auth_checkers` callbacks. ([\#11253](https://github.com/matrix-org/synapse/issues/11253)) +- Clean up trivial aspects of the Debian package build tooling. ([\#11269](https://github.com/matrix-org/synapse/issues/11269), [\#11273](https://github.com/matrix-org/synapse/issues/11273)) +- Blacklist new SyTest that checks that key uploads are valid pending the validation being implemented in Synapse. ([\#11270](https://github.com/matrix-org/synapse/issues/11270)) + + +Synapse 1.46.0 (2021-11-02) +=========================== + +The cause of the [performance regression affecting Synapse 1.44](https://github.com/matrix-org/synapse/issues/11049) has been identified and fixed. ([\#11177](https://github.com/matrix-org/synapse/issues/11177)) + +Bugfixes +-------- + +- Fix a bug introduced in v1.46.0rc1 where URL previews of some XML documents would fail. ([\#11196](https://github.com/matrix-org/synapse/issues/11196)) + + +Synapse 1.46.0rc1 (2021-10-27) +============================== + +Features +-------- + +- Add support for Ubuntu 21.10 "Impish Indri". ([\#11024](https://github.com/matrix-org/synapse/issues/11024)) +- Port the Password Auth Providers module interface to the new generic interface. ([\#10548](https://github.com/matrix-org/synapse/issues/10548), [\#11180](https://github.com/matrix-org/synapse/issues/11180)) +- Experimental support for the thread relation defined in [MSC3440](https://github.com/matrix-org/matrix-doc/pull/3440). ([\#11088](https://github.com/matrix-org/synapse/issues/11088), [\#11181](https://github.com/matrix-org/synapse/issues/11181), [\#11192](https://github.com/matrix-org/synapse/issues/11192)) +- Users admin API can now also modify user type in addition to allowing it to be set on user creation. ([\#11174](https://github.com/matrix-org/synapse/issues/11174)) + + +Bugfixes +-------- + +- Newly-created public rooms are now only assigned an alias if the room's creation has not been blocked by permission settings. Contributed by @AndrewFerr. ([\#10930](https://github.com/matrix-org/synapse/issues/10930)) +- Fix a long-standing bug which meant that events received over federation were sometimes incorrectly accepted into the room state. ([\#11001](https://github.com/matrix-org/synapse/issues/11001), [\#11009](https://github.com/matrix-org/synapse/issues/11009), [\#11012](https://github.com/matrix-org/synapse/issues/11012)) +- Fix 500 error on `/messages` when the server accumulates more than 5 backwards extremities at a given depth for a room. ([\#11027](https://github.com/matrix-org/synapse/issues/11027)) +- Fix a bug where setting a user's `external_id` via the admin API returns 500 and deletes user's existing external mappings if that external ID is already mapped. ([\#11051](https://github.com/matrix-org/synapse/issues/11051)) +- Fix a long-standing bug where users excluded from the user directory were added into the directory if they belonged to a room which became public or private. ([\#11075](https://github.com/matrix-org/synapse/issues/11075)) +- Fix a long-standing bug when attempting to preview URLs which are in the `windows-1252` character encoding. ([\#11077](https://github.com/matrix-org/synapse/issues/11077), [\#11089](https://github.com/matrix-org/synapse/issues/11089)) +- Fix broken export-data admin command and add test script checking the command to CI. ([\#11078](https://github.com/matrix-org/synapse/issues/11078)) +- Show an error when timestamp in seconds is provided to the `/purge_media_cache` Admin API. ([\#11101](https://github.com/matrix-org/synapse/issues/11101)) +- Fix local users who left all their rooms being removed from the user directory, even if the `search_all_users` config option was enabled. ([\#11103](https://github.com/matrix-org/synapse/issues/11103)) +- Fix a bug which caused the module API's `get_user_ip_and_agents` function to always fail on workers. `get_user_ip_and_agents` was introduced in 1.44.0 and did not function correctly on worker processes at the time. ([\#11112](https://github.com/matrix-org/synapse/issues/11112)) +- Identity server connection is no longer ignoring `ip_range_whitelist`. ([\#11120](https://github.com/matrix-org/synapse/issues/11120)) +- Fix a bug introduced in Synapse 1.45.0 breaking the configuration file parsing script. ([\#11145](https://github.com/matrix-org/synapse/issues/11145)) +- Fix a performance regression introduced in 1.44.0 which could cause client requests to time out when making large numbers of outbound requests. ([\#11177](https://github.com/matrix-org/synapse/issues/11177), [\#11190](https://github.com/matrix-org/synapse/issues/11190)) +- Resolve and share `state_groups` for all [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) historical events in batch. ([\#10975](https://github.com/matrix-org/synapse/issues/10975)) + + +Improved Documentation +---------------------- + +- Fix broken links relating to module API deprecation in the upgrade notes. ([\#11069](https://github.com/matrix-org/synapse/issues/11069)) +- Add more information about what happens when a user is deactivated. ([\#11083](https://github.com/matrix-org/synapse/issues/11083)) +- Clarify the the sample log config can be copied from the documentation without issue. ([\#11092](https://github.com/matrix-org/synapse/issues/11092)) +- Update the admin API documentation with an updated list of the characters allowed in registration tokens. ([\#11093](https://github.com/matrix-org/synapse/issues/11093)) +- Document Synapse's behaviour when dealing with multiple modules registering the same callbacks and/or handlers for the same HTTP endpoints. ([\#11096](https://github.com/matrix-org/synapse/issues/11096)) +- Fix instances of `[example]{.title-ref}` in the upgrade documentation as a result of prior RST to Markdown conversion. ([\#11118](https://github.com/matrix-org/synapse/issues/11118)) +- Document the version of Synapse each module callback was introduced in. ([\#11132](https://github.com/matrix-org/synapse/issues/11132)) +- Document the version of Synapse that introduced each module API method. ([\#11183](https://github.com/matrix-org/synapse/issues/11183)) + + +Internal Changes +---------------- +- Fix spurious warnings about losing the logging context on the `ReplicationCommandHandler` when losing the replication connection. ([\#10984](https://github.com/matrix-org/synapse/issues/10984)) +- Include rejected status when we log events. ([\#11008](https://github.com/matrix-org/synapse/issues/11008)) +- Add some extra logging to the event persistence code. ([\#11014](https://github.com/matrix-org/synapse/issues/11014)) +- Rearrange the internal workings of the incremental user directory updates. ([\#11035](https://github.com/matrix-org/synapse/issues/11035)) +- Fix a long-standing bug where users excluded from the directory could still be added to the `users_who_share_private_rooms` table after a regular user joins a private room. ([\#11143](https://github.com/matrix-org/synapse/issues/11143)) +- Add and improve type hints. ([\#10972](https://github.com/matrix-org/synapse/issues/10972), [\#11055](https://github.com/matrix-org/synapse/issues/11055), [\#11066](https://github.com/matrix-org/synapse/issues/11066), [\#11076](https://github.com/matrix-org/synapse/issues/11076), [\#11095](https://github.com/matrix-org/synapse/issues/11095), [\#11109](https://github.com/matrix-org/synapse/issues/11109), [\#11121](https://github.com/matrix-org/synapse/issues/11121), [\#11146](https://github.com/matrix-org/synapse/issues/11146)) +- Mark the Synapse package as containing type annotations and fix export declarations so that Synapse pluggable modules may be type checked against Synapse. ([\#11054](https://github.com/matrix-org/synapse/issues/11054)) +- Remove dead code from `MediaFilePaths`. ([\#11056](https://github.com/matrix-org/synapse/issues/11056)) +- Be more lenient when parsing oEmbed response versions. ([\#11065](https://github.com/matrix-org/synapse/issues/11065)) +- Create a separate module for the retention configuration. ([\#11070](https://github.com/matrix-org/synapse/issues/11070)) +- Clean up some of the federation event authentication code for clarity. ([\#11115](https://github.com/matrix-org/synapse/issues/11115), [\#11116](https://github.com/matrix-org/synapse/issues/11116), [\#11122](https://github.com/matrix-org/synapse/issues/11122)) +- Add docstrings and comments to the application service ephemeral event sending code. ([\#11138](https://github.com/matrix-org/synapse/issues/11138)) +- Update the `sign_json` script to support inline configuration of the signing key. ([\#11139](https://github.com/matrix-org/synapse/issues/11139)) +- Fix broken link in the docker image README. ([\#11144](https://github.com/matrix-org/synapse/issues/11144)) +- Always dump logs from unit tests during CI runs. ([\#11068](https://github.com/matrix-org/synapse/issues/11068)) +- Add tests for `MediaFilePaths` class. ([\#11057](https://github.com/matrix-org/synapse/issues/11057)) +- Simplify the user admin API tests. ([\#11048](https://github.com/matrix-org/synapse/issues/11048)) +- Add a test for the workaround introduced in [\#11042](https://github.com/matrix-org/synapse/pull/11042) concerning the behaviour of third-party rule modules and `SynapseError`s. ([\#11071](https://github.com/matrix-org/synapse/issues/11071)) + + +Synapse 1.45.1 (2021-10-20) +=========================== + +Bugfixes +-------- + +- Revert change to counting of deactivated users towards the monthly active users limit, introduced in 1.45.0rc1. ([\#11127](https://github.com/matrix-org/synapse/issues/11127)) + + +Synapse 1.45.0 (2021-10-19) +=========================== + +No functional changes since Synapse 1.45.0rc2. + +Known Issues +------------ + +- A suspected [performance regression](https://github.com/matrix-org/synapse/issues/11049) which was first reported after the release of 1.44.0 remains unresolved. + + We have not been able to identify a probable cause. Affected users report that setting up a federation sender worker appears to alleviate symptoms of the regression. + +Improved Documentation +---------------------- + +- Reword changelog to clarify concerns about a suspected performance regression in 1.44.0. ([\#11117](https://github.com/matrix-org/synapse/issues/11117)) + + +Synapse 1.45.0rc2 (2021-10-14) +============================== + +This release candidate [fixes](https://github.com/matrix-org/synapse/issues/11053) a user directory [bug](https://github.com/matrix-org/synapse/issues/11025) present in 1.45.0rc1. + +Known Issues +------------ + +- A suspected [performance regression](https://github.com/matrix-org/synapse/issues/11049) which was first reported after the release of 1.44.0 remains unresolved. + + We have not been able to identify a probable cause. Affected users report that setting up a federation sender worker appears to alleviate symptoms of the regression. + +Bugfixes +-------- + +- Fix a long-standing bug when using multiple event persister workers where events were not correctly sent down `/sync` due to a race. ([\#11045](https://github.com/matrix-org/synapse/issues/11045)) +- Fix a bug introduced in Synapse 1.45.0rc1 where the user directory would stop updating if it processed an event from a + user not in the `users` table. ([\#11053](https://github.com/matrix-org/synapse/issues/11053)) +- Fix a bug introduced in Synapse 1.44.0 when logging errors during oEmbed processing. ([\#11061](https://github.com/matrix-org/synapse/issues/11061)) + + +Internal Changes +---------------- + +- Add an 'approximate difference' method to `StateFilter`. ([\#10825](https://github.com/matrix-org/synapse/issues/10825)) +- Fix inconsistent behavior of `get_last_client_by_ip` when reporting data that has not been stored in the database yet. ([\#10970](https://github.com/matrix-org/synapse/issues/10970)) +- Fix a bug introduced in Synapse 1.21.0 that causes opentracing and Prometheus metrics for replication requests to be measured incorrectly. ([\#10996](https://github.com/matrix-org/synapse/issues/10996)) +- Ensure that cache config tests do not share state. ([\#11036](https://github.com/matrix-org/synapse/issues/11036)) + + +Synapse 1.45.0rc1 (2021-10-12) +============================== + +**Note:** Media storage providers module that read from Synapse's configuration need changes as of this version, see the [upgrade notes](https://matrix-org.github.io/synapse/develop/upgrade#upgrading-to-v1450) for more information. + +Known Issues +------------ + +- We are investigating [a performance issue](https://github.com/matrix-org/synapse/issues/11049) which was reported after the release of 1.44.0. +- We are aware of [a bug](https://github.com/matrix-org/synapse/issues/11025) with the user directory when using application services. A second release candidate is expected which will resolve this. + +Features +-------- + +- Add [MSC3069](https://github.com/matrix-org/matrix-doc/pull/3069) support to `/account/whoami`. ([\#9655](https://github.com/matrix-org/synapse/issues/9655)) +- Support autodiscovery of oEmbed previews. ([\#10822](https://github.com/matrix-org/synapse/issues/10822)) +- Add a `user_may_send_3pid_invite` spam checker callback for modules to allow or deny 3PID invites. ([\#10894](https://github.com/matrix-org/synapse/issues/10894)) +- Add a spam checker callback to allow or deny room joins. ([\#10910](https://github.com/matrix-org/synapse/issues/10910)) +- Include an `update_synapse_database` script in the distribution. Contributed by @Fizzadar at Beeper. ([\#10954](https://github.com/matrix-org/synapse/issues/10954)) +- Include exception information in JSON logging output. Contributed by @Fizzadar at Beeper. ([\#11028](https://github.com/matrix-org/synapse/issues/11028)) + + +Bugfixes +-------- + +- Fix a minor bug in the response to `/_matrix/client/r0/voip/turnServer`. Contributed by @lukaslihotzki. ([\#10922](https://github.com/matrix-org/synapse/issues/10922)) +- Fix a bug where empty `yyyy-mm-dd/` directories would be left behind in the media store's `url_cache_thumbnails/` directory. ([\#10924](https://github.com/matrix-org/synapse/issues/10924)) +- Fix a bug introduced in Synapse v1.40.0 where the signature checks for room version 8 and 9 could be applied to earlier room versions in some situations. ([\#10927](https://github.com/matrix-org/synapse/issues/10927)) +- Fix a long-standing bug wherein deactivated users still count towards the monthly active users limit. ([\#10947](https://github.com/matrix-org/synapse/issues/10947)) +- Fix a long-standing bug which meant that events received over federation were sometimes incorrectly accepted into the room state. ([\#10956](https://github.com/matrix-org/synapse/issues/10956)) +- Fix a long-standing bug where rebuilding the user directory wouldn't exclude support and deactivated users. ([\#10960](https://github.com/matrix-org/synapse/issues/10960)) +- Fix [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` endpoint rejecting subsequent batches with unknown batch ID error in existing room versions from the room creator. ([\#10962](https://github.com/matrix-org/synapse/issues/10962)) +- Fix a bug that could leak local users' per-room nicknames and avatars when the user directory is rebuilt. ([\#10981](https://github.com/matrix-org/synapse/issues/10981)) +- Fix a long-standing bug where the remainder of a batch of user directory changes would be silently dropped if the server left a room early in the batch. ([\#10982](https://github.com/matrix-org/synapse/issues/10982)) +- Correct a bugfix introduced in Synapse v1.44.0 that would catch the wrong error if a connection is lost before a response could be written to it. ([\#10995](https://github.com/matrix-org/synapse/issues/10995)) +- Fix a long-standing bug where local users' per-room nicknames/avatars were visible to anyone who could see you in the user directory. ([\#11002](https://github.com/matrix-org/synapse/issues/11002)) +- Fix a long-standing bug where a user's per-room nickname/avatar would overwrite their profile in the user directory when a room was made public. ([\#11003](https://github.com/matrix-org/synapse/issues/11003)) +- Work around a regression, introduced in Synapse v1.39.0, that caused `SynapseError`s raised by the experimental third-party rules module callback `check_event_allowed` to be ignored. ([\#11042](https://github.com/matrix-org/synapse/issues/11042)) +- Fix a bug in [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) insertion events in rooms that could cause cross-talk/conflicts between batches. ([\#10877](https://github.com/matrix-org/synapse/issues/10877)) + + +Improved Documentation +---------------------- + +- Change wording ("reference homeserver") in Synapse repository documentation. Contributed by @maxkratz. ([\#10971](https://github.com/matrix-org/synapse/issues/10971)) +- Fix a dead URL in development documentation (SAML) and change wording from "Riot" to "Element". Contributed by @maxkratz. ([\#10973](https://github.com/matrix-org/synapse/issues/10973)) +- Add additional content to the Welcome and Overview page of the documentation. ([\#10990](https://github.com/matrix-org/synapse/issues/10990)) +- Update links to MSCs in documentation. Contributed by @dklimpel. ([\#10991](https://github.com/matrix-org/synapse/issues/10991)) + + +Internal Changes +---------------- + +- Improve type hinting in `synapse.util`. ([\#10888](https://github.com/matrix-org/synapse/issues/10888)) +- Add further type hints to `synapse.storage.util`. ([\#10892](https://github.com/matrix-org/synapse/issues/10892)) +- Fix type hints to be compatible with an upcoming change to Twisted. ([\#10895](https://github.com/matrix-org/synapse/issues/10895)) +- Update utility code to handle C implementations of frozendict. ([\#10902](https://github.com/matrix-org/synapse/issues/10902)) +- Drop old functionality which maintained database compatibility with Synapse versions before v1.31. ([\#10903](https://github.com/matrix-org/synapse/issues/10903)) +- Clean-up configuration helper classes for the `ServerConfig` class. ([\#10915](https://github.com/matrix-org/synapse/issues/10915)) +- Use direct references to config flags. ([\#10916](https://github.com/matrix-org/synapse/issues/10916), [\#10959](https://github.com/matrix-org/synapse/issues/10959), [\#10985](https://github.com/matrix-org/synapse/issues/10985)) +- Clean up some of the federation event authentication code for clarity. ([\#10926](https://github.com/matrix-org/synapse/issues/10926), [\#10940](https://github.com/matrix-org/synapse/issues/10940), [\#10986](https://github.com/matrix-org/synapse/issues/10986), [\#10987](https://github.com/matrix-org/synapse/issues/10987), [\#10988](https://github.com/matrix-org/synapse/issues/10988), [\#11010](https://github.com/matrix-org/synapse/issues/11010), [\#11011](https://github.com/matrix-org/synapse/issues/11011)) +- Refactor various parts of the codebase to use `RoomVersion` objects instead of room version identifier strings. ([\#10934](https://github.com/matrix-org/synapse/issues/10934)) +- Refactor user directory tests in preparation for upcoming changes. ([\#10935](https://github.com/matrix-org/synapse/issues/10935)) +- Include the event id in the logcontext when handling PDUs received over federation. ([\#10936](https://github.com/matrix-org/synapse/issues/10936)) +- Fix logged errors in unit tests. ([\#10939](https://github.com/matrix-org/synapse/issues/10939)) +- Fix a broken test to ensure that consent configuration works during registration. ([\#10945](https://github.com/matrix-org/synapse/issues/10945)) +- Add type hints to filtering classes. ([\#10958](https://github.com/matrix-org/synapse/issues/10958)) +- Add type-hint to `HomeserverTestcase.setup_test_homeserver`. ([\#10961](https://github.com/matrix-org/synapse/issues/10961)) +- Fix the test utility function `create_room_as` so that `is_public=True` will explicitly set the `visibility` parameter of room creation requests to `public`. Contributed by @AndrewFerr. ([\#10963](https://github.com/matrix-org/synapse/issues/10963)) +- Make the release script more robust and transparent. ([\#10966](https://github.com/matrix-org/synapse/issues/10966)) +- Refactor [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` mega function into smaller handler functions. ([\#10974](https://github.com/matrix-org/synapse/issues/10974)) +- Log stack traces when a missing opentracing span is detected. ([\#10983](https://github.com/matrix-org/synapse/issues/10983)) +- Update GHA config to run tests against Python 3.10 and PostgreSQL 14. ([\#10992](https://github.com/matrix-org/synapse/issues/10992)) +- Fix a long-standing bug where `ReadWriteLock`s could drop logging contexts on exit. ([\#10993](https://github.com/matrix-org/synapse/issues/10993)) +- Add a `CODEOWNERS` file to automatically request reviews from the `@matrix-org/synapse-core` team on new pull requests. ([\#10994](https://github.com/matrix-org/synapse/issues/10994)) +- Add further type hints to `synapse.state`. ([\#11004](https://github.com/matrix-org/synapse/issues/11004)) +- Remove the deprecated `BaseHandler` object. ([\#11005](https://github.com/matrix-org/synapse/issues/11005)) +- Bump mypy version for CI to 0.910, and pull in new type stubs for dependencies. ([\#11006](https://github.com/matrix-org/synapse/issues/11006)) +- Fix CI to run the unit tests without optional deps. ([\#11017](https://github.com/matrix-org/synapse/issues/11017)) +- Ensure that cache config tests do not share state. ([\#11019](https://github.com/matrix-org/synapse/issues/11019)) +- Add additional type hints to `synapse.server_notices`. ([\#11021](https://github.com/matrix-org/synapse/issues/11021)) +- Add additional type hints for `synapse.push`. ([\#11023](https://github.com/matrix-org/synapse/issues/11023)) +- When installing the optional developer dependencies, also include the dependencies needed for type-checking and unit testing. ([\#11034](https://github.com/matrix-org/synapse/issues/11034)) +- Remove unnecessary list comprehension from `synapse_port_db` to satisfy code style requirements. ([\#11043](https://github.com/matrix-org/synapse/issues/11043)) + + +Synapse 1.44.0 (2021-10-05) +=========================== + +No significant changes since 1.44.0rc3. + + +Synapse 1.44.0rc3 (2021-10-04) +============================== + +Bugfixes +-------- + +- Fix a bug introduced in Synapse v1.40.0 where changing a user's display name or avatar in a restricted room would cause an authentication error. ([\#10933](https://github.com/matrix-org/synapse/issues/10933)) +- Fix `/admin/whois/{user_id}` endpoint, which was broken in v1.44.0rc1. ([\#10968](https://github.com/matrix-org/synapse/issues/10968)) + + +Synapse 1.44.0rc2 (2021-09-30) +============================== + +Bugfixes +-------- + +- Fix a bug introduced in v1.44.0rc1 which caused the experimental [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` endpoint to return a 500 error. ([\#10938](https://github.com/matrix-org/synapse/issues/10938)) +- Fix a bug introduced in v1.44.0rc1 which prevented sending presence events to application services. ([\#10944](https://github.com/matrix-org/synapse/issues/10944)) + + +Improved Documentation +---------------------- + +- Minor updates to the installation instructions. ([\#10919](https://github.com/matrix-org/synapse/issues/10919)) + + +Synapse 1.44.0rc1 (2021-09-29) +============================== + +Features +-------- + +- Only allow the [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send?chunk_id=xxx` endpoint to connect to an already existing insertion event. ([\#10776](https://github.com/matrix-org/synapse/issues/10776)) +- Improve oEmbed URL previews by processing the author name, photo, and video information. ([\#10814](https://github.com/matrix-org/synapse/issues/10814), [\#10819](https://github.com/matrix-org/synapse/issues/10819)) +- Speed up responding with large JSON objects to requests. ([\#10868](https://github.com/matrix-org/synapse/issues/10868), [\#10905](https://github.com/matrix-org/synapse/issues/10905)) +- Add a `user_may_create_room_with_invites` spam checker callback to allow modules to allow or deny a room creation request based on the invites and/or 3PID invites it includes. ([\#10898](https://github.com/matrix-org/synapse/issues/10898)) + + +Bugfixes +-------- + +- Fix a long-standing bug that caused an `AssertionError` when purging history in certain rooms. Contributed by @Kokokokoka. ([\#10690](https://github.com/matrix-org/synapse/issues/10690)) +- Fix a long-standing bug which caused deactivated users that were later reactivated to be missing from the user directory. ([\#10782](https://github.com/matrix-org/synapse/issues/10782)) +- Fix a long-standing bug that caused unbanning a user by sending a membership event to fail. Contributed by @aaronraimist. ([\#10807](https://github.com/matrix-org/synapse/issues/10807)) +- Fix a long-standing bug where logging contexts would go missing when federation requests time out. ([\#10810](https://github.com/matrix-org/synapse/issues/10810)) +- Fix a long-standing bug causing an error in the deprecated `/initialSync` endpoint when using the undocumented `from` and `to` parameters. ([\#10827](https://github.com/matrix-org/synapse/issues/10827)) +- Fix a bug causing the `remove_stale_pushers` background job to repeatedly fail and log errors. This bug affected Synapse servers that had been upgraded from version 1.28 or older and are using SQLite. ([\#10843](https://github.com/matrix-org/synapse/issues/10843)) +- Fix a long-standing bug in Unicode support of the room search admin API breaking search for rooms with non-ASCII characters. ([\#10859](https://github.com/matrix-org/synapse/issues/10859)) +- Fix a bug introduced in Synapse 1.37.0 which caused `knock` membership events which we sent to remote servers to be incorrectly stored in the local database. ([\#10873](https://github.com/matrix-org/synapse/issues/10873)) +- Fix invalidating one-time key count cache after claiming keys. The bug was introduced in Synapse v1.41.0. Contributed by Tulir at Beeper. ([\#10875](https://github.com/matrix-org/synapse/issues/10875)) +- Fix a long-standing bug causing application service users to be subject to MAU blocking if the MAU limit had been reached, even if configured not to be blocked. ([\#10881](https://github.com/matrix-org/synapse/issues/10881)) +- Fix a long-standing bug which could cause events pulled over federation to be incorrectly rejected. ([\#10907](https://github.com/matrix-org/synapse/issues/10907)) +- Fix a long-standing bug causing URL cache files to be stored in storage providers. Server admins may safely delete the `url_cache/` and `url_cache_thumbnails/` directories from any configured storage providers to reclaim space. ([\#10911](https://github.com/matrix-org/synapse/issues/10911)) +- Fix a long-standing bug leading to race conditions when creating media store and config directories. ([\#10913](https://github.com/matrix-org/synapse/issues/10913)) + + +Improved Documentation +---------------------- + +- Fix some crashes in the Module API example code, by adding JSON encoding/decoding. ([\#10845](https://github.com/matrix-org/synapse/issues/10845)) +- Add developer documentation about experimental configuration flags. ([\#10865](https://github.com/matrix-org/synapse/issues/10865)) +- Properly remove deleted files from GitHub pages when generating the documentation. ([\#10869](https://github.com/matrix-org/synapse/issues/10869)) + + +Internal Changes +---------------- + +- Fix GitHub Actions config so we can run sytest on synapse from parallel branches. ([\#10659](https://github.com/matrix-org/synapse/issues/10659)) +- Split out [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) meta events to their own fields in the `/batch_send` response. ([\#10777](https://github.com/matrix-org/synapse/issues/10777)) +- Add missing type hints to REST servlets. ([\#10785](https://github.com/matrix-org/synapse/issues/10785), [\#10817](https://github.com/matrix-org/synapse/issues/10817)) +- Simplify the internal logic which maintains the user directory database tables. ([\#10796](https://github.com/matrix-org/synapse/issues/10796)) +- Use direct references to config flags. ([\#10812](https://github.com/matrix-org/synapse/issues/10812), [\#10885](https://github.com/matrix-org/synapse/issues/10885), [\#10893](https://github.com/matrix-org/synapse/issues/10893), [\#10897](https://github.com/matrix-org/synapse/issues/10897)) +- Specify the type of token in generic "Invalid token" error messages. ([\#10815](https://github.com/matrix-org/synapse/issues/10815)) +- Make `StateFilter` frozen so it is hashable. ([\#10816](https://github.com/matrix-org/synapse/issues/10816)) +- Fix a long-standing bug where an `m.room.message` event containing a null byte would cause an internal server error. ([\#10820](https://github.com/matrix-org/synapse/issues/10820)) +- Add type hints to the state database. ([\#10823](https://github.com/matrix-org/synapse/issues/10823)) +- Opt out of cache expiry for `get_users_who_share_room_with_user`, to hopefully improve `/sync` performance when you + haven't synced recently. ([\#10826](https://github.com/matrix-org/synapse/issues/10826)) +- Track cache eviction rates more finely in Prometheus's monitoring. ([\#10829](https://github.com/matrix-org/synapse/issues/10829)) +- Add missing type hints to `synapse.handlers`. ([\#10831](https://github.com/matrix-org/synapse/issues/10831), [\#10856](https://github.com/matrix-org/synapse/issues/10856)) +- Extend the Module API to let plug-ins check whether an ID is local and to access IP + User Agent data. ([\#10833](https://github.com/matrix-org/synapse/issues/10833)) +- Factor out PNG image data to a constant to be used in several tests. ([\#10834](https://github.com/matrix-org/synapse/issues/10834)) +- Add a test to ensure state events sent by modules get persisted correctly. ([\#10835](https://github.com/matrix-org/synapse/issues/10835)) +- Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) fields and event types from `chunk` to `batch` to match the `/batch_send` endpoint. ([\#10838](https://github.com/matrix-org/synapse/issues/10838)) +- Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` query parameter from `?prev_event` to more obvious usage with `?prev_event_id`. ([\#10839](https://github.com/matrix-org/synapse/issues/10839)) +- Add type hints to `synapse.http.site`. ([\#10867](https://github.com/matrix-org/synapse/issues/10867)) +- Include outlier status when we log V2 or V3 events. ([\#10879](https://github.com/matrix-org/synapse/issues/10879)) +- Break down Grafana's cache expiry time series based on reason for eviction, c.f. [\#10829](https://github.com/matrix-org/synapse/issues/10829). ([\#10880](https://github.com/matrix-org/synapse/issues/10880)) +- Clean up some of the federation event authentication code for clarity. ([\#10883](https://github.com/matrix-org/synapse/issues/10883), [\#10884](https://github.com/matrix-org/synapse/issues/10884), [\#10896](https://github.com/matrix-org/synapse/issues/10896), [\#10901](https://github.com/matrix-org/synapse/issues/10901)) +- Allow the `.` and `~` characters when creating registration tokens as per the change to [MSC3231](https://github.com/matrix-org/matrix-doc/pull/3231). ([\#10887](https://github.com/matrix-org/synapse/issues/10887)) +- Clean up some unnecessary parentheses in places around the codebase. ([\#10889](https://github.com/matrix-org/synapse/issues/10889)) +- Improve type hinting in the user directory code. ([\#10891](https://github.com/matrix-org/synapse/issues/10891)) +- Update development testing script `test_postgresql.sh` to use a supported Python version and make re-runs quicker. ([\#10906](https://github.com/matrix-org/synapse/issues/10906)) +- Document and summarize changes in schema version `61` – `64`. ([\#10917](https://github.com/matrix-org/synapse/issues/10917)) +- Update release script to sign the newly created git tags. ([\#10925](https://github.com/matrix-org/synapse/issues/10925)) +- Fix Debian builds due to `dh-virtualenv` no longer being able to build their docs. ([\#10931](https://github.com/matrix-org/synapse/issues/10931)) + + +Synapse 1.43.0 (2021-09-21) +=========================== + +This release drops support for the deprecated, unstable API for [MSC2858 (Multiple SSO Identity Providers)](https://github.com/matrix-org/matrix-doc/blob/master/proposals/2858-Multiple-SSO-Identity-Providers.md#unstable-prefix), as well as the undocumented `experimental.msc2858_enabled` config option. Client authors should update their clients to use the stable API, available since Synapse 1.30. + +The documentation has been updated with configuration for routing `/spaces`, `/hierarchy` and `/summary` to workers. See [the upgrade notes](https://github.com/matrix-org/synapse/blob/release-v1.43/docs/upgrade.md#upgrading-to-v1430) for more details. + +No significant changes since 1.43.0rc2. + +Synapse 1.43.0rc2 (2021-09-17) +============================== + +Bugfixes +-------- + +- Added opentracing logging to help debug [\#9424](https://github.com/matrix-org/synapse/issues/9424). ([\#10828](https://github.com/matrix-org/synapse/issues/10828)) + + +Synapse 1.43.0rc1 (2021-09-14) +============================== + +Features +-------- + +- Allow room creators to send historical events specified by [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) in existing room versions. ([\#10566](https://github.com/matrix-org/synapse/issues/10566)) +- Add config option to use non-default manhole password and keys. ([\#10643](https://github.com/matrix-org/synapse/issues/10643)) +- Skip final GC at shutdown to improve restart performance. ([\#10712](https://github.com/matrix-org/synapse/issues/10712)) +- Allow configuration of the oEmbed URLs used for URL previews. ([\#10714](https://github.com/matrix-org/synapse/issues/10714), [\#10759](https://github.com/matrix-org/synapse/issues/10759)) +- Prefer [room version 9](https://github.com/matrix-org/matrix-doc/pull/3375) for restricted rooms per the [room version capabilities](https://github.com/matrix-org/matrix-doc/pull/3244) API. ([\#10772](https://github.com/matrix-org/synapse/issues/10772)) + + +Bugfixes +-------- + +- Fix a long-standing bug where room avatars were not included in email notifications. ([\#10658](https://github.com/matrix-org/synapse/issues/10658)) +- Fix a bug where the ordering algorithm was skipping the `origin_server_ts` step in the spaces summary resulting in unstable room orderings. ([\#10730](https://github.com/matrix-org/synapse/issues/10730)) +- Fix edge case when persisting events into a room where there are multiple events we previously hadn't calculated auth chains for (and hadn't marked as needing to be calculated). ([\#10743](https://github.com/matrix-org/synapse/issues/10743)) +- Fix a bug which prevented calls to `/createRoom` that included the `room_alias_name` parameter from being handled by worker processes. ([\#10757](https://github.com/matrix-org/synapse/issues/10757)) +- Fix a bug which prevented user registration via SSO to require consent tracking for SSO mapping providers that don't prompt for Matrix ID selection. Contributed by @AndrewFerr. ([\#10733](https://github.com/matrix-org/synapse/issues/10733)) +- Only return the stripped state events for the `m.space.child` events in a room for the spaces summary from [MSC2946](https://github.com/matrix-org/matrix-doc/pull/2946). ([\#10760](https://github.com/matrix-org/synapse/issues/10760)) +- Properly handle room upgrades of spaces. ([\#10774](https://github.com/matrix-org/synapse/issues/10774)) +- Fix a bug which generated invalid homeserver config when the `frontend_proxy` worker type was passed to the Synapse Worker-based Complement image. ([\#10783](https://github.com/matrix-org/synapse/issues/10783)) + + +Improved Documentation +---------------------- + +- Minor fix to the `media_repository` developer documentation. Contributed by @cuttingedge1109. ([\#10556](https://github.com/matrix-org/synapse/issues/10556)) +- Update the documentation to note that the `/spaces` and `/hierarchy` endpoints can be routed to workers. ([\#10648](https://github.com/matrix-org/synapse/issues/10648)) +- Clarify admin API documentation on undoing room deletions. ([\#10735](https://github.com/matrix-org/synapse/issues/10735)) +- Split up the modules documentation and add examples for module developers. ([\#10758](https://github.com/matrix-org/synapse/issues/10758)) +- Correct 2 typographical errors in the [Log Contexts documentation](https://matrix-org.github.io/synapse/latest/log_contexts.html). ([\#10795](https://github.com/matrix-org/synapse/issues/10795)) +- Fix a wording mistake in the sample configuration. Contributed by @bramvdnheuvel:nltrix.net. ([\#10804](https://github.com/matrix-org/synapse/issues/10804)) + + +Deprecations and Removals +------------------------- + +- Remove the [unstable MSC2858 API](https://github.com/matrix-org/matrix-doc/blob/master/proposals/2858-Multiple-SSO-Identity-Providers.md#unstable-prefix), including the undocumented `experimental.msc2858_enabled` config option. The unstable API has been deprecated since Synapse 1.35. Client authors should update their clients to use the stable API introduced in Synapse 1.30 if they have not already done so. ([\#10693](https://github.com/matrix-org/synapse/issues/10693)) + + +Internal Changes +---------------- + +- Add OpenTracing logging to help debug stuck messages (as described by issue [#9424](https://github.com/matrix-org/synapse/issues/9424)). ([\#10704](https://github.com/matrix-org/synapse/issues/10704)) +- Add type annotations to the `synapse.util` package. ([\#10601](https://github.com/matrix-org/synapse/issues/10601)) +- Ensure `rooms.creator` field is always populated for easy lookup in [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) usage later. ([\#10697](https://github.com/matrix-org/synapse/issues/10697)) +- Add missing type hints to REST servlets. ([\#10707](https://github.com/matrix-org/synapse/issues/10707), [\#10728](https://github.com/matrix-org/synapse/issues/10728), [\#10736](https://github.com/matrix-org/synapse/issues/10736)) +- Do not include rooms with unknown room versions in the spaces summary results. ([\#10727](https://github.com/matrix-org/synapse/issues/10727)) +- Additional error checking for the `preset` field when creating a room. ([\#10738](https://github.com/matrix-org/synapse/issues/10738)) +- Clean up some of the federation event authentication code for clarity. ([\#10744](https://github.com/matrix-org/synapse/issues/10744), [\#10745](https://github.com/matrix-org/synapse/issues/10745), [\#10746](https://github.com/matrix-org/synapse/issues/10746), [\#10771](https://github.com/matrix-org/synapse/issues/10771), [\#10773](https://github.com/matrix-org/synapse/issues/10773), [\#10781](https://github.com/matrix-org/synapse/issues/10781)) +- Add an index to `presence_stream` to hopefully speed up startups a little. ([\#10748](https://github.com/matrix-org/synapse/issues/10748)) +- Refactor event size checking code to simplify searching the codebase for the origins of certain error strings that are occasionally emitted. ([\#10750](https://github.com/matrix-org/synapse/issues/10750)) +- Move tests relating to rooms having encryption out of the user directory tests. ([\#10752](https://github.com/matrix-org/synapse/issues/10752)) +- Use `attrs` internally for the URL preview code & update documentation. ([\#10753](https://github.com/matrix-org/synapse/issues/10753)) +- Minor speed ups when joining large rooms over federation. ([\#10754](https://github.com/matrix-org/synapse/issues/10754), [\#10755](https://github.com/matrix-org/synapse/issues/10755), [\#10756](https://github.com/matrix-org/synapse/issues/10756), [\#10780](https://github.com/matrix-org/synapse/issues/10780), [\#10784](https://github.com/matrix-org/synapse/issues/10784)) +- Add a constant for `m.federate`. ([\#10775](https://github.com/matrix-org/synapse/issues/10775)) +- Add a script to update the Debian changelog in a Docker container for systems that are not Debian-based. ([\#10778](https://github.com/matrix-org/synapse/issues/10778)) +- Change the format of authenticated users in logs when a user is being puppeted by and admin user. ([\#10779](https://github.com/matrix-org/synapse/issues/10779)) +- Remove fixed and flakey tests from the Sytest blacklist. ([\#10788](https://github.com/matrix-org/synapse/issues/10788)) +- Improve internal details of the user directory code. ([\#10789](https://github.com/matrix-org/synapse/issues/10789)) +- Use direct references to config flags. ([\#10798](https://github.com/matrix-org/synapse/issues/10798)) +- Ensure the Rust reporter passes type checking with jaeger-client 4.7's type annotations. ([\#10799](https://github.com/matrix-org/synapse/issues/10799)) + + +Synapse 1.42.0 (2021-09-07) +=========================== + +This version of Synapse removes deprecated room-management admin APIs, removes out-of-date email pushers, and improves error handling for fallback templates for user-interactive authentication. For more information on these points, server administrators are encouraged to read [the upgrade notes](docs/upgrade.md#upgrading-to-v1420). + +No significant changes since 1.42.0rc2. + + +Synapse 1.42.0rc2 (2021-09-06) +============================== + +Features +-------- + +- Support room version 9 from [MSC3375](https://github.com/matrix-org/matrix-doc/pull/3375). ([\#10747](https://github.com/matrix-org/synapse/issues/10747)) + + +Internal Changes +---------------- + +- Print a warning when using one of the deprecated `template_dir` settings. ([\#10768](https://github.com/matrix-org/synapse/issues/10768)) + + +Synapse 1.42.0rc1 (2021-09-01) +============================== + +Features +-------- + +- Add support for [MSC3231](https://github.com/matrix-org/matrix-doc/pull/3231): Token authenticated registration. Users can be required to submit a token during registration to authenticate themselves. Contributed by Callum Brown. ([\#10142](https://github.com/matrix-org/synapse/issues/10142)) +- Add support for [MSC3283](https://github.com/matrix-org/matrix-doc/pull/3283): Expose `enable_set_displayname` in capabilities. ([\#10452](https://github.com/matrix-org/synapse/issues/10452)) +- Port the `PresenceRouter` module interface to the new generic interface. ([\#10524](https://github.com/matrix-org/synapse/issues/10524)) +- Add pagination to the spaces summary based on updates to [MSC2946](https://github.com/matrix-org/matrix-doc/pull/2946). ([\#10613](https://github.com/matrix-org/synapse/issues/10613), [\#10725](https://github.com/matrix-org/synapse/issues/10725)) + + +Bugfixes +-------- + +- Validate new `m.room.power_levels` events. Contributed by @aaronraimist. ([\#10232](https://github.com/matrix-org/synapse/issues/10232)) +- Display an error on User-Interactive Authentication fallback pages when authentication fails. Contributed by Callum Brown. ([\#10561](https://github.com/matrix-org/synapse/issues/10561)) +- Remove pushers when deleting an e-mail address from an account. Pushers for old unlinked emails will also be deleted. ([\#10581](https://github.com/matrix-org/synapse/issues/10581), [\#10734](https://github.com/matrix-org/synapse/issues/10734)) +- Reject Client-Server `/keys/query` requests which provide `device_ids` incorrectly. ([\#10593](https://github.com/matrix-org/synapse/issues/10593)) +- Rooms with unsupported room versions are no longer returned via `/sync`. ([\#10644](https://github.com/matrix-org/synapse/issues/10644)) +- Enforce the maximum length for per-room display names and avatar URLs. ([\#10654](https://github.com/matrix-org/synapse/issues/10654)) +- Fix a bug which caused the `synapse_user_logins_total` Prometheus metric not to be correctly initialised on restart. ([\#10677](https://github.com/matrix-org/synapse/issues/10677)) +- Improve `ServerNoticeServlet` to avoid duplicate requests and add unit tests. ([\#10679](https://github.com/matrix-org/synapse/issues/10679)) +- Fix long-standing issue which caused an error when a thumbnail is requested and there are multiple thumbnails with the same quality rating. ([\#10684](https://github.com/matrix-org/synapse/issues/10684)) +- Fix a regression introduced in v1.41.0 which affected the performance of concurrent fetches of large sets of events, in extreme cases causing the process to hang. ([\#10703](https://github.com/matrix-org/synapse/issues/10703)) +- Fix a regression introduced in Synapse 1.41 which broke email transmission on Systems using older versions of the Twisted library. ([\#10713](https://github.com/matrix-org/synapse/issues/10713)) + + +Improved Documentation +---------------------- + +- Add documentation on how to connect Django with Synapse using OpenID Connect and django-oauth-toolkit. Contributed by @HugoDelval. ([\#10192](https://github.com/matrix-org/synapse/issues/10192)) +- Advertise https://matrix-org.github.io/synapse documentation in the `README` and `CONTRIBUTING` files. ([\#10595](https://github.com/matrix-org/synapse/issues/10595)) +- Fix some of the titles not rendering in the OpenID Connect documentation. ([\#10639](https://github.com/matrix-org/synapse/issues/10639)) +- Minor clarifications to the documentation for reverse proxies. ([\#10708](https://github.com/matrix-org/synapse/issues/10708)) +- Remove table of contents from the top of installation and contributing documentation pages. ([\#10711](https://github.com/matrix-org/synapse/issues/10711)) + + +Deprecations and Removals +------------------------- + +- Remove deprecated Shutdown Room and Purge Room Admin API. ([\#8830](https://github.com/matrix-org/synapse/issues/8830)) + + +Internal Changes +---------------- + +- Improve type hints for the proxy agent and SRV resolver modules. Contributed by @dklimpel. ([\#10608](https://github.com/matrix-org/synapse/issues/10608)) +- Clean up some of the federation event authentication code for clarity. ([\#10614](https://github.com/matrix-org/synapse/issues/10614), [\#10615](https://github.com/matrix-org/synapse/issues/10615), [\#10624](https://github.com/matrix-org/synapse/issues/10624), [\#10640](https://github.com/matrix-org/synapse/issues/10640)) +- Add a comment asking developers to leave a reason when bumping the database schema version. ([\#10621](https://github.com/matrix-org/synapse/issues/10621)) +- Remove not needed database updates in modify user admin API. ([\#10627](https://github.com/matrix-org/synapse/issues/10627)) +- Convert room member storage tuples to `attrs` classes. ([\#10629](https://github.com/matrix-org/synapse/issues/10629), [\#10642](https://github.com/matrix-org/synapse/issues/10642)) +- Use auto-attribs for the attrs classes used in sync. ([\#10630](https://github.com/matrix-org/synapse/issues/10630)) +- Make `backfill` and `get_missing_events` use the same codepath. ([\#10645](https://github.com/matrix-org/synapse/issues/10645)) +- Improve the performance of the `/hierarchy` API (from [MSC2946](https://github.com/matrix-org/matrix-doc/pull/2946)) by caching responses received over federation. ([\#10647](https://github.com/matrix-org/synapse/issues/10647)) +- Run a nightly CI build against Twisted trunk. ([\#10651](https://github.com/matrix-org/synapse/issues/10651), [\#10672](https://github.com/matrix-org/synapse/issues/10672)) +- Do not print out stack traces for network errors when fetching data over federation. ([\#10662](https://github.com/matrix-org/synapse/issues/10662)) +- Simplify tests for device admin rest API. ([\#10664](https://github.com/matrix-org/synapse/issues/10664)) +- Add missing type hints to REST servlets. ([\#10665](https://github.com/matrix-org/synapse/issues/10665), [\#10666](https://github.com/matrix-org/synapse/issues/10666), [\#10674](https://github.com/matrix-org/synapse/issues/10674)) +- Flatten the `tests.synapse.rests` package by moving the contents of `v1` and `v2_alpha` into the parent. ([\#10667](https://github.com/matrix-org/synapse/issues/10667)) +- Update `complement.sh` to rebuild the base Docker image when run with workers. ([\#10686](https://github.com/matrix-org/synapse/issues/10686)) +- Split the event-processing methods in `FederationHandler` into a separate `FederationEventHandler`. ([\#10692](https://github.com/matrix-org/synapse/issues/10692)) +- Remove unused `compare_digest` function. ([\#10706](https://github.com/matrix-org/synapse/issues/10706)) + + +Synapse 1.41.1 (2021-08-31) +=========================== + +Due to the two security issues highlighted below, server administrators are encouraged to update Synapse. We are not aware of these vulnerabilities being exploited in the wild. + +Security advisory +----------------- + +The following issues are fixed in v1.41.1. + +- **[GHSA-3x4c-pq33-4w3q](https://github.com/matrix-org/synapse/security/advisories/GHSA-3x4c-pq33-4w3q) / [CVE-2021-39164](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-39164): Enumerating a private room's list of members and their display names.** + + If an unauthorized user both knows the Room ID of a private room *and* that room's history visibility is set to `shared`, then they may be able to enumerate the room's members, including their display names. + + The unauthorized user must be on the same homeserver as a user who is a member of the target room. + + Fixed by [52c7a51cf](https://github.com/matrix-org/synapse/commit/52c7a51cf). + +- **[GHSA-jj53-8fmw-f2w2](https://github.com/matrix-org/synapse/security/advisories/GHSA-jj53-8fmw-f2w2) / [CVE-2021-39163](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-39163): Disclosing a private room's name, avatar, topic, and number of members.** + + If an unauthorized user knows the Room ID of a private room, then its name, avatar, topic, and number of members may be disclosed through Group / Community features. + + The unauthorized user must be on the same homeserver as a user who is a member of the target room, and their homeserver must allow non-administrators to create groups (`enable_group_creation` in the Synapse configuration; off by default). + + Fixed by [cb35df940a](https://github.com/matrix-org/synapse/commit/cb35df940a), [\#10723](https://github.com/matrix-org/synapse/issues/10723). + +Bugfixes +-------- + +- Fix a regression introduced in Synapse 1.41 which broke email transmission on systems using older versions of the Twisted library. ([\#10713](https://github.com/matrix-org/synapse/issues/10713)) + +Synapse 1.41.0 (2021-08-24) +=========================== + +This release adds support for Debian 12 (Bookworm), but **removes support for Ubuntu 20.10 (Groovy Gorilla)**, which reached End of Life last month. + +Note that when using workers the `/_synapse/admin/v1/users/{userId}/media` must now be handled by media workers. See the [upgrade notes](https://matrix-org.github.io/synapse/latest/upgrade.html) for more information. + + +Features +-------- + +- Enable room capabilities ([MSC3244](https://github.com/matrix-org/matrix-doc/pull/3244)) by default and set room version 8 as the preferred room version when creating restricted rooms. ([\#10571](https://github.com/matrix-org/synapse/issues/10571)) + + +Synapse 1.41.0rc1 (2021-08-18) +============================== + +Features +-------- + +- Add `get_userinfo_by_id` method to ModuleApi. ([\#9581](https://github.com/matrix-org/synapse/issues/9581)) +- Initial local support for [MSC3266](https://github.com/matrix-org/synapse/pull/10394), Room Summary over the unstable `/rooms/{roomIdOrAlias}/summary` API. ([\#10394](https://github.com/matrix-org/synapse/issues/10394)) +- Experimental support for [MSC3288](https://github.com/matrix-org/matrix-doc/pull/3288), sending `room_type` to the identity server for 3pid invites over the `/store-invite` API. ([\#10435](https://github.com/matrix-org/synapse/issues/10435)) +- Add support for sending federation requests through a proxy. Contributed by @Bubu and @dklimpel. See the [upgrade notes](https://matrix-org.github.io/synapse/latest/upgrade.html) for more information. ([\#10596](https://github.com/matrix-org/synapse/issues/10596)). ([\#10475](https://github.com/matrix-org/synapse/issues/10475)) +- Add support for "marker" events which makes historical events discoverable for servers that already have all of the scrollback history (part of [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716)). ([\#10498](https://github.com/matrix-org/synapse/issues/10498)) +- Add a configuration setting for the time a `/sync` response is cached for. ([\#10513](https://github.com/matrix-org/synapse/issues/10513)) +- The default logging handler for new installations is now `PeriodicallyFlushingMemoryHandler`, a buffered logging handler which periodically flushes itself. ([\#10518](https://github.com/matrix-org/synapse/issues/10518)) +- Add support for new redaction rules for historical events specified in [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716). ([\#10538](https://github.com/matrix-org/synapse/issues/10538)) +- Add a setting to disable TLS when sending email. ([\#10546](https://github.com/matrix-org/synapse/issues/10546)) +- Add pagination to the spaces summary based on updates to [MSC2946](https://github.com/matrix-org/matrix-doc/pull/2946). ([\#10549](https://github.com/matrix-org/synapse/issues/10549), [\#10560](https://github.com/matrix-org/synapse/issues/10560), [\#10569](https://github.com/matrix-org/synapse/issues/10569), [\#10574](https://github.com/matrix-org/synapse/issues/10574), [\#10575](https://github.com/matrix-org/synapse/issues/10575), [\#10579](https://github.com/matrix-org/synapse/issues/10579), [\#10583](https://github.com/matrix-org/synapse/issues/10583)) +- Admin API to delete several media for a specific user. Contributed by @dklimpel. ([\#10558](https://github.com/matrix-org/synapse/issues/10558), [\#10628](https://github.com/matrix-org/synapse/issues/10628)) +- Add support for routing `/createRoom` to workers. ([\#10564](https://github.com/matrix-org/synapse/issues/10564)) +- Update the Synapse Grafana dashboard. ([\#10570](https://github.com/matrix-org/synapse/issues/10570)) +- Add an admin API (`GET /_synapse/admin/username_available`) to check if a username is available (regardless of registration settings). ([\#10578](https://github.com/matrix-org/synapse/issues/10578)) +- Allow editing a user's `external_ids` via the "Edit User" admin API. Contributed by @dklimpel. ([\#10598](https://github.com/matrix-org/synapse/issues/10598)) +- The Synapse manhole no longer needs coroutines to be wrapped in `defer.ensureDeferred`. ([\#10602](https://github.com/matrix-org/synapse/issues/10602)) +- Add option to allow modules to run periodic tasks on all instances, rather than just the one configured to run background tasks. ([\#10638](https://github.com/matrix-org/synapse/issues/10638)) + + +Bugfixes +-------- + +- Add some clarification to the sample config file. Contributed by @Kentokamoto. ([\#10129](https://github.com/matrix-org/synapse/issues/10129)) +- Fix a long-standing bug where protocols which are not implemented by any appservices were incorrectly returned via `GET /_matrix/client/r0/thirdparty/protocols`. ([\#10532](https://github.com/matrix-org/synapse/issues/10532)) +- Fix exceptions in logs when failing to get remote room list. ([\#10541](https://github.com/matrix-org/synapse/issues/10541)) +- Fix longstanding bug which caused the user's presence "status message" to be reset when the user went offline. Contributed by @dklimpel. ([\#10550](https://github.com/matrix-org/synapse/issues/10550)) +- Allow public rooms to be previewed in the spaces summary APIs from [MSC2946](https://github.com/matrix-org/matrix-doc/pull/2946). ([\#10580](https://github.com/matrix-org/synapse/issues/10580)) +- Fix a bug introduced in v1.37.1 where an error could occur in the asynchronous processing of PDUs when the queue was empty. ([\#10592](https://github.com/matrix-org/synapse/issues/10592)) +- Fix errors on /sync when read receipt data is a string. Only affects homeservers with the experimental flag for [MSC2285](https://github.com/matrix-org/matrix-doc/pull/2285) enabled. Contributed by @SimonBrandner. ([\#10606](https://github.com/matrix-org/synapse/issues/10606)) +- Additional validation for the spaces summary API to avoid errors like `ValueError: Stop argument for islice() must be None or an integer`. The missing validation has existed since v1.31.0. ([\#10611](https://github.com/matrix-org/synapse/issues/10611)) +- Revert behaviour introduced in v1.38.0 that strips `org.matrix.msc2732.device_unused_fallback_key_types` from `/sync` when its value is empty. This field should instead always be present according to [MSC2732](https://github.com/matrix-org/matrix-doc/blob/master/proposals/2732-olm-fallback-keys.md). ([\#10623](https://github.com/matrix-org/synapse/issues/10623)) + + +Improved Documentation +---------------------- + +- Add documentation for configuring a forward proxy. ([\#10443](https://github.com/matrix-org/synapse/issues/10443)) +- Updated the reverse proxy documentation to highlight the homserver configuration that is needed to make Synapse aware that is is intentionally reverse proxied. ([\#10551](https://github.com/matrix-org/synapse/issues/10551)) +- Update CONTRIBUTING.md to fix index links and the instructions for SyTest in docker. ([\#10599](https://github.com/matrix-org/synapse/issues/10599)) + + +Deprecations and Removals +------------------------- + +- No longer build `.deb` packages for Ubuntu 20.10 Groovy Gorilla, which has now EOLed. ([\#10588](https://github.com/matrix-org/synapse/issues/10588)) +- The `template_dir` configuration settings in the `sso`, `account_validity` and `email` sections of the configuration file are now deprecated in favour of the global `templates.custom_template_directory` setting. See the [upgrade notes](https://matrix-org.github.io/synapse/latest/upgrade.html) for more information. ([\#10596](https://github.com/matrix-org/synapse/issues/10596)) + + +Internal Changes +---------------- + +- Improve event caching mechanism to avoid having multiple copies of an event in memory at a time. ([\#10119](https://github.com/matrix-org/synapse/issues/10119)) +- Reduce errors in PostgreSQL logs due to concurrent serialization errors. ([\#10504](https://github.com/matrix-org/synapse/issues/10504)) +- Include room ID in ignored EDU log messages. Contributed by @ilmari. ([\#10507](https://github.com/matrix-org/synapse/issues/10507)) +- Add pagination to the spaces summary based on updates to [MSC2946](https://github.com/matrix-org/matrix-doc/pull/2946). ([\#10527](https://github.com/matrix-org/synapse/issues/10527), [\#10530](https://github.com/matrix-org/synapse/issues/10530)) +- Fix CI to not break when run against branches rather than pull requests. ([\#10529](https://github.com/matrix-org/synapse/issues/10529)) +- Mark all events stemming from the [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` endpoint as historical. ([\#10537](https://github.com/matrix-org/synapse/issues/10537)) +- Clean up some of the federation event authentication code for clarity. ([\#10539](https://github.com/matrix-org/synapse/issues/10539), [\#10591](https://github.com/matrix-org/synapse/issues/10591)) +- Convert `Transaction` and `Edu` objects to attrs. ([\#10542](https://github.com/matrix-org/synapse/issues/10542)) +- Update `/batch_send` endpoint to only return `state_events` created by the `state_events_from_before` passed in. ([\#10552](https://github.com/matrix-org/synapse/issues/10552)) +- Update contributing.md to warn against rebasing an open PR. ([\#10563](https://github.com/matrix-org/synapse/issues/10563)) +- Remove the unused public rooms replication stream. ([\#10565](https://github.com/matrix-org/synapse/issues/10565)) +- Clarify error message when failing to join a restricted room. ([\#10572](https://github.com/matrix-org/synapse/issues/10572)) +- Remove references to BuildKite in favour of GitHub Actions. ([\#10573](https://github.com/matrix-org/synapse/issues/10573)) +- Move `/batch_send` endpoint defined by [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) to the `/v2_alpha` directory. ([\#10576](https://github.com/matrix-org/synapse/issues/10576)) +- Allow multiple custom directories in `read_templates`. ([\#10587](https://github.com/matrix-org/synapse/issues/10587)) +- Re-organize the `synapse.federation.transport.server` module to create smaller files. ([\#10590](https://github.com/matrix-org/synapse/issues/10590)) +- Flatten the `synapse.rest.client` package by moving the contents of `v1` and `v2_alpha` into the parent. ([\#10600](https://github.com/matrix-org/synapse/issues/10600)) +- Build Debian packages for Debian 12 (Bookworm). ([\#10612](https://github.com/matrix-org/synapse/issues/10612)) +- Fix up a couple of links to the database schema documentation. ([\#10620](https://github.com/matrix-org/synapse/issues/10620)) +- Fix a broken link to the upgrade notes. ([\#10631](https://github.com/matrix-org/synapse/issues/10631)) + + Synapse 1.40.0 (2021-08-10) =========================== @@ -7963,14 +8942,14 @@ General: Federation: -- Add key distribution mechanisms for fetching public keys of unavailable remote home servers. See [Retrieving Server Keys](https://github.com/matrix-org/matrix-doc/blob/6f2698/specification/30_server_server_api.rst#retrieving-server-keys) in the spec. +- Add key distribution mechanisms for fetching public keys of unavailable remote homeservers. See [Retrieving Server Keys](https://github.com/matrix-org/matrix-doc/blob/6f2698/specification/30_server_server_api.rst#retrieving-server-keys) in the spec. Configuration: - Add support for multiple config files. - Add support for dictionaries in config files. - Remove support for specifying config options on the command line, except for: - - `--daemonize` - Daemonize the home server. + - `--daemonize` - Daemonize the homeserver. - `--manhole` - Turn on the twisted telnet manhole service on the given port. - `--database-path` - The path to a sqlite database to use. - `--verbose` - The verbosity level. @@ -8175,7 +9154,7 @@ This version adds support for using a TURN server. See docs/turn-howto.rst on ho Homeserver: - Add support for redaction of messages. -- Fix bug where inviting a user on a remote home server could take up to 20-30s. +- Fix bug where inviting a user on a remote homeserver could take up to 20-30s. - Implement a get current room state API. - Add support specifying and retrieving turn server configuration. @@ -8265,7 +9244,7 @@ Changes in synapse 0.2.3 (2014-09-12) Homeserver: -- Fix bug where we stopped sending events to remote home servers if a user from that home server left, even if there were some still in the room. +- Fix bug where we stopped sending events to remote homeservers if a user from that homeserver left, even if there were some still in the room. - Fix bugs in the state conflict resolution where it was incorrectly rejecting events. Webclient: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e7eef23419d5..2c85edf71279 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,441 +1,3 @@ -Welcome to Synapse +# Welcome to Synapse -This document aims to get you started with contributing to this repo! - -- [1. Who can contribute to Synapse?](#1-who-can-contribute-to-synapse) -- [2. What do I need?](#2-what-do-i-need) -- [3. Get the source.](#3-get-the-source) -- [4. Install the dependencies](#4-install-the-dependencies) - * [Under Unix (macOS, Linux, BSD, ...)](#under-unix-macos-linux-bsd-) - * [Under Windows](#under-windows) -- [5. Get in touch.](#5-get-in-touch) -- [6. Pick an issue.](#6-pick-an-issue) -- [7. Turn coffee and documentation into code and documentation!](#7-turn-coffee-and-documentation-into-code-and-documentation) -- [8. Test, test, test!](#8-test-test-test) - * [Run the linters.](#run-the-linters) - * [Run the unit tests.](#run-the-unit-tests) - * [Run the integration tests.](#run-the-integration-tests) -- [9. Submit your patch.](#9-submit-your-patch) - * [Changelog](#changelog) - + [How do I know what to call the changelog file before I create the PR?](#how-do-i-know-what-to-call-the-changelog-file-before-i-create-the-pr) - + [Debian changelog](#debian-changelog) - * [Sign off](#sign-off) -- [10. Turn feedback into better code.](#10-turn-feedback-into-better-code) -- [11. Find a new issue.](#11-find-a-new-issue) -- [Notes for maintainers on merging PRs etc](#notes-for-maintainers-on-merging-prs-etc) -- [Conclusion](#conclusion) - -# 1. Who can contribute to Synapse? - -Everyone is welcome to contribute code to [matrix.org -projects](https://github.com/matrix-org), provided that they are willing to -license their contributions under the same license as the project itself. We -follow a simple 'inbound=outbound' model for contributions: the act of -submitting an 'inbound' contribution means that the contributor agrees to -license the code under the same terms as the project's overall 'outbound' -license - in our case, this is almost always Apache Software License v2 (see -[LICENSE](LICENSE)). - -# 2. What do I need? - -The code of Synapse is written in Python 3. To do pretty much anything, you'll need [a recent version of Python 3](https://wiki.python.org/moin/BeginnersGuide/Download). - -The source code of Synapse is hosted on GitHub. You will also need [a recent version of git](https://github.com/git-guides/install-git). - -For some tests, you will need [a recent version of Docker](https://docs.docker.com/get-docker/). - - -# 3. Get the source. - -The preferred and easiest way to contribute changes is to fork the relevant -project on GitHub, and then [create a pull request]( -https://help.github.com/articles/using-pull-requests/) to ask us to pull your -changes into our repo. - -Please base your changes on the `develop` branch. - -```sh -git clone git@github.com:YOUR_GITHUB_USER_NAME/synapse.git -git checkout develop -``` - -If you need help getting started with git, this is beyond the scope of the document, but you -can find many good git tutorials on the web. - -# 4. Install the dependencies - -## Under Unix (macOS, Linux, BSD, ...) - -Once you have installed Python 3 and added the source, please open a terminal and -setup a *virtualenv*, as follows: - -```sh -cd path/where/you/have/cloned/the/repository -python3 -m venv ./env -source ./env/bin/activate -pip install -e ".[all,lint,mypy,test]" -pip install tox -``` - -This will install the developer dependencies for the project. - -## Under Windows - -TBD - - -# 5. Get in touch. - -Join our developer community on Matrix: #synapse-dev:matrix.org ! - - -# 6. Pick an issue. - -Fix your favorite problem or perhaps find a [Good First Issue](https://github.com/matrix-org/synapse/issues?q=is%3Aopen+is%3Aissue+label%3A%22Good+First+Issue%22) -to work on. - - -# 7. Turn coffee and documentation into code and documentation! - -Synapse's code style is documented [here](docs/code_style.md). Please follow -it, including the conventions for the [sample configuration -file](docs/code_style.md#configuration-file-format). - -There is a growing amount of documentation located in the [docs](docs) -directory. This documentation is intended primarily for sysadmins running their -own Synapse instance, as well as developers interacting externally with -Synapse. [docs/dev](docs/dev) exists primarily to house documentation for -Synapse developers. [docs/admin_api](docs/admin_api) houses documentation -regarding Synapse's Admin API, which is used mostly by sysadmins and external -service developers. - -If you add new files added to either of these folders, please use [GitHub-Flavoured -Markdown](https://guides.github.com/features/mastering-markdown/). - -Some documentation also exists in [Synapse's GitHub -Wiki](https://github.com/matrix-org/synapse/wiki), although this is primarily -contributed to by community authors. - - -# 8. Test, test, test! - - -While you're developing and before submitting a patch, you'll -want to test your code. - -## Run the linters. - -The linters look at your code and do two things: - -- ensure that your code follows the coding style adopted by the project; -- catch a number of errors in your code. - -They're pretty fast, don't hesitate! - -```sh -source ./env/bin/activate -./scripts-dev/lint.sh -``` - -Note that this script *will modify your files* to fix styling errors. -Make sure that you have saved all your files. - -If you wish to restrict the linters to only the files changed since the last commit -(much faster!), you can instead run: - -```sh -source ./env/bin/activate -./scripts-dev/lint.sh -d -``` - -Or if you know exactly which files you wish to lint, you can instead run: - -```sh -source ./env/bin/activate -./scripts-dev/lint.sh path/to/file1.py path/to/file2.py path/to/folder -``` - -## Run the unit tests (Twisted trial). - -The unit tests run parts of Synapse, including your changes, to see if anything -was broken. They are slower than the linters but will typically catch more errors. - -```sh -source ./env/bin/activate -trial tests -``` - -If you wish to only run *some* unit tests, you may specify -another module instead of `tests` - or a test class or a method: - -```sh -source ./env/bin/activate -trial tests.rest.admin.test_room tests.handlers.test_admin.ExfiltrateData.test_invite -``` - -If your tests fail, you may wish to look at the logs (the default log level is `ERROR`): - -```sh -less _trial_temp/test.log -``` - -To increase the log level for the tests, set `SYNAPSE_TEST_LOG_LEVEL`: - -```sh -SYNAPSE_TEST_LOG_LEVEL=DEBUG trial tests -``` - - -## Run the integration tests ([Sytest](https://github.com/matrix-org/sytest)). - -The integration tests are a more comprehensive suite of tests. They -run a full version of Synapse, including your changes, to check if -anything was broken. They are slower than the unit tests but will -typically catch more errors. - -The following command will let you run the integration test with the most common -configuration: - -```sh -$ docker run --rm -it -v /path/where/you/have/cloned/the/repository\:/src:ro -v /path/to/where/you/want/logs\:/logs matrixdotorg/sytest-synapse:py37 -``` - -This configuration should generally cover your needs. For more details about other configurations, see [documentation in the SyTest repo](https://github.com/matrix-org/sytest/blob/develop/docker/README.md). - - -## Run the integration tests ([Complement](https://github.com/matrix-org/complement)). - -[Complement](https://github.com/matrix-org/complement) is a suite of black box tests that can be run on any homeserver implementation. It can also be thought of as end-to-end (e2e) tests. - -It's often nice to develop on Synapse and write Complement tests at the same time. -Here is how to run your local Synapse checkout against your local Complement checkout. - -(checkout [`complement`](https://github.com/matrix-org/complement) alongside your `synapse` checkout) -```sh -COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh -``` - -To run a specific test file, you can pass the test name at the end of the command. The name passed comes from the naming structure in your Complement tests. If you're unsure of the name, you can do a full run and copy it from the test output: - -```sh -COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh TestBackfillingHistory -``` - -To run a specific test, you can specify the whole name structure: - -```sh -COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh TestBackfillingHistory/parallel/Backfilled_historical_events_resolve_with_proper_state_in_correct_order -``` - - -### Access database for homeserver after Complement test runs. - -If you're curious what the database looks like after you run some tests, here are some steps to get you going in Synapse: - - 1. In your Complement test comment out `defer deployment.Destroy(t)` and replace with `defer time.Sleep(2 * time.Hour)` to keep the homeserver running after the tests complete - 1. Start the Complement tests - 1. Find the name of the container, `docker ps -f name=complement_` (this will filter for just the Compelement related Docker containers) - 1. Access the container replacing the name with what you found in the previous step: `docker exec -it complement_1_hs_with_application_service.hs1_2 /bin/bash` - 1. Install sqlite (database driver), `apt-get update && apt-get install -y sqlite3` - 1. Then run `sqlite3` and open the database `.open /conf/homeserver.db` (this db path comes from the Synapse homeserver.yaml) - - -# 9. Submit your patch. - -Once you're happy with your patch, it's time to prepare a Pull Request. - -To prepare a Pull Request, please: - -1. verify that [all the tests pass](#test-test-test), including the coding style; -2. [sign off](#sign-off) your contribution; -3. `git push` your commit to your fork of Synapse; -4. on GitHub, [create the Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request); -5. add a [changelog entry](#changelog) and push it to your Pull Request; -6. for most contributors, that's all - however, if you are a member of the organization `matrix-org`, on GitHub, please request a review from `matrix.org / Synapse Core`. - - -## Changelog - -All changes, even minor ones, need a corresponding changelog / newsfragment -entry. These are managed by [Towncrier](https://github.com/hawkowl/towncrier). - -To create a changelog entry, make a new file in the `changelog.d` directory named -in the format of `PRnumber.type`. The type can be one of the following: - -* `feature` -* `bugfix` -* `docker` (for updates to the Docker image) -* `doc` (for updates to the documentation) -* `removal` (also used for deprecations) -* `misc` (for internal-only changes) - -This file will become part of our [changelog]( -https://github.com/matrix-org/synapse/blob/master/CHANGES.md) at the next -release, so the content of the file should be a short description of your -change in the same style as the rest of the changelog. The file can contain Markdown -formatting, and should end with a full stop (.) or an exclamation mark (!) for -consistency. - -Adding credits to the changelog is encouraged, we value your -contributions and would like to have you shouted out in the release notes! - -For example, a fix in PR #1234 would have its changelog entry in -`changelog.d/1234.bugfix`, and contain content like: - -> The security levels of Florbs are now validated when received -> via the `/federation/florb` endpoint. Contributed by Jane Matrix. - -If there are multiple pull requests involved in a single bugfix/feature/etc, -then the content for each `changelog.d` file should be the same. Towncrier will -merge the matching files together into a single changelog entry when we come to -release. - -### How do I know what to call the changelog file before I create the PR? - -Obviously, you don't know if you should call your newsfile -`1234.bugfix` or `5678.bugfix` until you create the PR, which leads to a -chicken-and-egg problem. - -There are two options for solving this: - - 1. Open the PR without a changelog file, see what number you got, and *then* - add the changelog file to your branch (see [Updating your pull - request](#updating-your-pull-request)), or: - - 1. Look at the [list of all - issues/PRs](https://github.com/matrix-org/synapse/issues?q=), add one to the - highest number you see, and quickly open the PR before somebody else claims - your number. - - [This - script](https://github.com/richvdh/scripts/blob/master/next_github_number.sh) - might be helpful if you find yourself doing this a lot. - -Sorry, we know it's a bit fiddly, but it's *really* helpful for us when we come -to put together a release! - -### Debian changelog - -Changes which affect the debian packaging files (in `debian`) are an -exception to the rule that all changes require a `changelog.d` file. - -In this case, you will need to add an entry to the debian changelog for the -next release. For this, run the following command: - -``` -dch -``` - -This will make up a new version number (if there isn't already an unreleased -version in flight), and open an editor where you can add a new changelog entry. -(Our release process will ensure that the version number and maintainer name is -corrected for the release.) - -If your change affects both the debian packaging *and* files outside the debian -directory, you will need both a regular newsfragment *and* an entry in the -debian changelog. (Though typically such changes should be submitted as two -separate pull requests.) - -## Sign off - -In order to have a concrete record that your contribution is intentional -and you agree to license it under the same terms as the project's license, we've adopted the -same lightweight approach that the Linux Kernel -[submitting patches process]( -https://www.kernel.org/doc/html/latest/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin>), -[Docker](https://github.com/docker/docker/blob/master/CONTRIBUTING.md), and many other -projects use: the DCO (Developer Certificate of Origin: -http://developercertificate.org/). This is a simple declaration that you wrote -the contribution or otherwise have the right to contribute it to Matrix: - -``` -Developer Certificate of Origin -Version 1.1 - -Copyright (C) 2004, 2006 The Linux Foundation and its contributors. -660 York Street, Suite 102, -San Francisco, CA 94110 USA - -Everyone is permitted to copy and distribute verbatim copies of this -license document, but changing it is not allowed. - -Developer's Certificate of Origin 1.1 - -By making a contribution to this project, I certify that: - -(a) The contribution was created in whole or in part by me and I - have the right to submit it under the open source license - indicated in the file; or - -(b) The contribution is based upon previous work that, to the best - of my knowledge, is covered under an appropriate open source - license and I have the right under that license to submit that - work with modifications, whether created in whole or in part - by me, under the same open source license (unless I am - permitted to submit under a different license), as indicated - in the file; or - -(c) The contribution was provided directly to me by some other - person who certified (a), (b) or (c) and I have not modified - it. - -(d) I understand and agree that this project and the contribution - are public and that a record of the contribution (including all - personal information I submit with it, including my sign-off) is - maintained indefinitely and may be redistributed consistent with - this project or the open source license(s) involved. -``` - -If you agree to this for your contribution, then all that's needed is to -include the line in your commit or pull request comment: - -``` -Signed-off-by: Your Name -``` - -We accept contributions under a legally identifiable name, such as -your name on government documentation or common-law names (names -claimed by legitimate usage or repute). Unfortunately, we cannot -accept anonymous contributions at this time. - -Git allows you to add this signoff automatically when using the `-s` -flag to `git commit`, which uses the name and email set in your -`user.name` and `user.email` git configs. - - -# 10. Turn feedback into better code. - -Once the Pull Request is opened, you will see a few things: - -1. our automated CI (Continuous Integration) pipeline will run (again) the linters, the unit tests, the integration tests and more; -2. one or more of the developers will take a look at your Pull Request and offer feedback. - -From this point, you should: - -1. Look at the results of the CI pipeline. - - If there is any error, fix the error. -2. If a developer has requested changes, make these changes and let us know if it is ready for a developer to review again. -3. Create a new commit with the changes. - - Please do NOT overwrite the history. New commits make the reviewer's life easier. - - Push this commits to your Pull Request. -4. Back to 1. - -Once both the CI and the developers are happy, the patch will be merged into Synapse and released shortly! - -# 11. Find a new issue. - -By now, you know the drill! - -# Notes for maintainers on merging PRs etc - -There are some notes for those with commit access to the project on how we -manage git [here](docs/development/git.md). - -# Conclusion - -That's it! Matrix is a very open and collaborative project as you might expect -given our obsession with open communication. If we're going to successfully -matrix together all the fragmented communication technologies out there we are -reliant on contributions and collaboration from the community to do so. So -please get involved - and we hope you have as much fun hacking on Matrix as we -do! +Please see the [contributors' guide](https://matrix-org.github.io/synapse/latest/development/contributing_guide.html) in our rendered documentation. diff --git a/MANIFEST.in b/MANIFEST.in index 0522319c4000..c24786c3b371 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -8,6 +8,7 @@ include demo/demo.tls.dh include demo/*.py include demo/*.sh +include synapse/py.typed recursive-include synapse/storage *.sql recursive-include synapse/storage *.sql.postgres recursive-include synapse/storage *.sql.sqlite @@ -44,9 +45,9 @@ include book.toml include pyproject.toml recursive-include changelog.d * -prune .buildkite prune .circleci prune .github +prune .ci prune contrib prune debian prune demo/etc diff --git a/README.rst b/README.rst index 0ae05616e77b..50de3a49b05f 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,6 @@ -========================================================= -Synapse |support| |development| |license| |pypi| |python| -========================================================= +========================================================================= +Synapse |support| |development| |documentation| |license| |pypi| |python| +========================================================================= .. contents:: @@ -55,11 +55,8 @@ solutions. The hope is for Matrix to act as the building blocks for a new generation of fully open and interoperable messaging and VoIP apps for the internet. -Synapse is a reference "homeserver" implementation of Matrix from the core -development team at matrix.org, written in Python/Twisted. It is intended to -showcase the concept of Matrix and let folks see the spec in the context of a -codebase and let you run your own homeserver and generally help bootstrap the -ecosystem. +Synapse is a Matrix "homeserver" implementation developed by the matrix.org core +team, written in Python 3/Twisted. In Matrix, every user runs one or more Matrix clients, which connect through to a Matrix homeserver. The homeserver stores all their personal chat history and @@ -85,9 +82,14 @@ For support installing or managing Synapse, please join |room|_ (from a matrix.o account if necessary) and ask questions there. We do not use GitHub issues for support requests, only for bug reports and feature requests. +Synapse's documentation is `nicely rendered on GitHub Pages `_, +with its source available in |docs|_. + .. |room| replace:: ``#synapse:matrix.org`` .. _room: https://matrix.to/#/#synapse:matrix.org +.. |docs| replace:: ``docs`` +.. _docs: docs Synapse Installation ==================== @@ -263,11 +265,27 @@ Then update the ``users`` table in the database:: Synapse Development =================== -Join our developer community on Matrix: `#synapse-dev:matrix.org `_ +The best place to get started is our +`guide for contributors `_. +This is part of our larger `documentation `_, which includes +information for synapse developers as well as synapse administrators. + +Developers might be particularly interested in: + +* `Synapse's database schema `_, +* `notes on Synapse's implementation details `_, and +* `how we use git `_. + +Alongside all that, join our developer community on Matrix: +`#synapse-dev:matrix.org `_, featuring real humans! + + +Quick start +----------- Before setting up a development environment for synapse, make sure you have the system dependencies (such as the python header files) installed - see -`Installing from source `_. +`Platform-specific prerequisites `_. To check out a synapse for development, clone the git repo into a working directory of your choice:: @@ -280,7 +298,7 @@ to install using pip and a virtualenv:: python3 -m venv ./env source ./env/bin/activate - pip install -e ".[all,test]" + pip install -e ".[all,dev]" This will run a process of downloading and installing all the needed dependencies into a virtual env. If any dependencies fail to install, @@ -308,7 +326,7 @@ If you just want to start a single instance of the app and run it directly:: Running the unit tests -====================== +---------------------- After getting up and running, you may wish to run Synapse's unit tests to check that everything is installed correctly:: @@ -327,7 +345,7 @@ to see the logging output, see the `CONTRIBUTING doc `_, a Matrix homeserver integration testing suite, which uses HTTP requests to @@ -445,6 +463,10 @@ This is normally caused by a misconfiguration in your reverse-proxy. See :alt: (discuss development on #synapse-dev:matrix.org) :target: https://matrix.to/#/#synapse-dev:matrix.org +.. |documentation| image:: https://img.shields.io/badge/documentation-%E2%9C%93-success + :alt: (Rendered documentation on GitHub Pages) + :target: https://matrix-org.github.io/synapse/latest/ + .. |license| image:: https://img.shields.io/github/license/matrix-org/synapse :alt: (check license in LICENSE file) :target: LICENSE diff --git a/UPGRADE.rst b/UPGRADE.rst index 17ecd935fdbb..6c7f9cb18e9f 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -1,7 +1,7 @@ Upgrading Synapse ================= -This document has moved to the `Synapse documentation website `_. +This document has moved to the `Synapse documentation website `_. Please update your links. The markdown source is available in `docs/upgrade.md `_. diff --git a/contrib/grafana/synapse.json b/contrib/grafana/synapse.json index 0c4816b7cd53..2c839c30d036 100644 --- a/contrib/grafana/synapse.json +++ b/contrib/grafana/synapse.json @@ -54,7 +54,7 @@ "gnetId": null, "graphTooltip": 0, "id": null, - "iteration": 1621258266004, + "iteration": 1628606819564, "links": [ { "asDropdown": false, @@ -307,7 +307,6 @@ ], "thresholds": [ { - "$$hashKey": "object:283", "colorMode": "warning", "fill": false, "line": true, @@ -316,7 +315,6 @@ "yaxis": "left" }, { - "$$hashKey": "object:284", "colorMode": "critical", "fill": false, "line": true, @@ -344,7 +342,6 @@ }, "yaxes": [ { - "$$hashKey": "object:255", "decimals": null, "format": "s", "label": "", @@ -354,7 +351,6 @@ "show": true }, { - "$$hashKey": "object:256", "format": "hertz", "label": "", "logBase": 1, @@ -429,7 +425,6 @@ ], "thresholds": [ { - "$$hashKey": "object:566", "colorMode": "critical", "fill": true, "line": true, @@ -457,7 +452,6 @@ }, "yaxes": [ { - "$$hashKey": "object:538", "decimals": null, "format": "percentunit", "label": null, @@ -467,7 +461,6 @@ "show": true }, { - "$$hashKey": "object:539", "format": "short", "label": null, "logBase": 1, @@ -573,7 +566,6 @@ }, "yaxes": [ { - "$$hashKey": "object:1560", "format": "bytes", "logBase": 1, "max": null, @@ -581,7 +573,6 @@ "show": true }, { - "$$hashKey": "object:1561", "format": "short", "logBase": 1, "max": null, @@ -641,7 +632,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:639", "alias": "/max$/", "color": "#890F02", "fill": 0, @@ -693,7 +683,6 @@ }, "yaxes": [ { - "$$hashKey": "object:650", "decimals": null, "format": "none", "label": "", @@ -703,7 +692,6 @@ "show": true }, { - "$$hashKey": "object:651", "decimals": null, "format": "short", "label": null, @@ -783,11 +771,9 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:1240", "alias": "/user/" }, { - "$$hashKey": "object:1241", "alias": "/system/" } ], @@ -817,7 +803,6 @@ ], "thresholds": [ { - "$$hashKey": "object:1278", "colorMode": "custom", "fillColor": "rgba(255, 255, 255, 1)", "line": true, @@ -827,7 +812,6 @@ "yaxis": "left" }, { - "$$hashKey": "object:1279", "colorMode": "custom", "fillColor": "rgba(255, 255, 255, 1)", "line": true, @@ -837,7 +821,6 @@ "yaxis": "left" }, { - "$$hashKey": "object:1498", "colorMode": "critical", "fill": true, "line": true, @@ -865,7 +848,6 @@ }, "yaxes": [ { - "$$hashKey": "object:1250", "decimals": null, "format": "percentunit", "label": "", @@ -875,7 +857,6 @@ "show": true }, { - "$$hashKey": "object:1251", "format": "short", "logBase": 1, "max": null, @@ -1427,7 +1408,6 @@ }, "yaxes": [ { - "$$hashKey": "object:572", "format": "percentunit", "label": null, "logBase": 1, @@ -1436,7 +1416,6 @@ "show": true }, { - "$$hashKey": "object:573", "format": "short", "label": null, "logBase": 1, @@ -1720,7 +1699,6 @@ }, "yaxes": [ { - "$$hashKey": "object:102", "format": "hertz", "logBase": 1, "max": null, @@ -1728,7 +1706,6 @@ "show": true }, { - "$$hashKey": "object:103", "format": "short", "logBase": 1, "max": null, @@ -3425,7 +3402,7 @@ "h": 9, "w": 12, "x": 0, - "y": 33 + "y": 6 }, "hiddenSeries": false, "id": 79, @@ -3442,9 +3419,12 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.1.3", + "pluginVersion": "7.3.7", "pointradius": 5, "points": false, "renderer": "flot", @@ -3526,7 +3506,7 @@ "h": 9, "w": 12, "x": 12, - "y": 33 + "y": 6 }, "hiddenSeries": false, "id": 83, @@ -3543,9 +3523,12 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.1.3", + "pluginVersion": "7.3.7", "pointradius": 5, "points": false, "renderer": "flot", @@ -3629,7 +3612,7 @@ "h": 9, "w": 12, "x": 0, - "y": 42 + "y": 15 }, "hiddenSeries": false, "id": 109, @@ -3646,9 +3629,12 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.1.3", + "pluginVersion": "7.3.7", "pointradius": 5, "points": false, "renderer": "flot", @@ -3733,7 +3719,7 @@ "h": 9, "w": 12, "x": 12, - "y": 42 + "y": 15 }, "hiddenSeries": false, "id": 111, @@ -3750,9 +3736,12 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.1.3", + "pluginVersion": "7.3.7", "pointradius": 5, "points": false, "renderer": "flot", @@ -3831,7 +3820,7 @@ "h": 8, "w": 12, "x": 0, - "y": 51 + "y": 24 }, "hiddenSeries": false, "id": 142, @@ -3847,8 +3836,11 @@ "lines": true, "linewidth": 1, "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, - "pluginVersion": "7.1.3", + "pluginVersion": "7.3.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -3931,7 +3923,7 @@ "h": 9, "w": 12, "x": 12, - "y": 51 + "y": 24 }, "hiddenSeries": false, "id": 140, @@ -3948,9 +3940,12 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.1.3", + "pluginVersion": "7.3.7", "pointradius": 5, "points": false, "renderer": "flot", @@ -4079,7 +4074,7 @@ "h": 9, "w": 12, "x": 0, - "y": 59 + "y": 32 }, "heatmap": {}, "hideZeroBuckets": false, @@ -4145,7 +4140,7 @@ "h": 9, "w": 12, "x": 12, - "y": 60 + "y": 33 }, "hiddenSeries": false, "id": 162, @@ -4163,9 +4158,12 @@ "linewidth": 0, "links": [], "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.1.3", + "pluginVersion": "7.3.7", "pointradius": 5, "points": false, "renderer": "flot", @@ -4350,7 +4348,7 @@ "h": 9, "w": 12, "x": 0, - "y": 68 + "y": 41 }, "heatmap": {}, "hideZeroBuckets": false, @@ -4396,6 +4394,311 @@ "yBucketBound": "auto", "yBucketNumber": null, "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 42 + }, + "hiddenSeries": false, + "id": 203, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.3.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "synapse_federation_server_oldest_inbound_pdu_in_staging{job=\"$job\",index=~\"$index\",instance=\"$instance\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "rss {{index}}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Age of oldest event in staging area", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 50 + }, + "hiddenSeries": false, + "id": 202, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.3.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "synapse_federation_server_number_inbound_pdu_in_staging{job=\"$job\",index=~\"$index\",instance=\"$instance\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "rss {{index}}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Number of events in federation staging area", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 51 + }, + "hiddenSeries": false, + "id": 205, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(synapse_federation_soft_failed_events_total{instance=\"$instance\"}[$bucket_size]))", + "interval": "", + "legendFormat": "soft-failed events", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Soft-failed event rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "hertz", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "title": "Federation", @@ -4647,7 +4950,7 @@ "h": 7, "w": 12, "x": 0, - "y": 8 + "y": 33 }, "hiddenSeries": false, "id": 48, @@ -4749,7 +5052,7 @@ "h": 7, "w": 12, "x": 12, - "y": 8 + "y": 33 }, "hiddenSeries": false, "id": 104, @@ -4877,7 +5180,7 @@ "h": 7, "w": 12, "x": 0, - "y": 15 + "y": 40 }, "hiddenSeries": false, "id": 10, @@ -4981,7 +5284,7 @@ "h": 7, "w": 12, "x": 12, - "y": 15 + "y": 40 }, "hiddenSeries": false, "id": 11, @@ -5086,7 +5389,7 @@ "h": 7, "w": 12, "x": 0, - "y": 22 + "y": 47 }, "hiddenSeries": false, "id": 180, @@ -5168,6 +5471,126 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 6, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 47 + }, + "hiddenSeries": false, + "id": 200, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(synapse_storage_schedule_time_bucket{index=~\"$index\",instance=\"$instance\",job=\"$job\"}[$bucket_size])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "D" + }, + { + "expr": "histogram_quantile(0.9, sum(rate(synapse_storage_schedule_time_bucket{index=~\"$index\",instance=\"$instance\",job=\"$job\"}[$bucket_size])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "90%", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.75, sum(rate(synapse_storage_schedule_time_bucket{index=~\"$index\",instance=\"$instance\",job=\"$job\"}[$bucket_size])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "75%", + "refId": "C" + }, + { + "expr": "histogram_quantile(0.5, sum(rate(synapse_storage_schedule_time_bucket{index=~\"$index\",instance=\"$instance\",job=\"$job\"}[$bucket_size])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "50%", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Time waiting for DB connection quantiles", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, @@ -5916,7 +6339,7 @@ "h": 10, "w": 12, "x": 0, - "y": 84 + "y": 35 }, "hiddenSeries": false, "id": 1, @@ -6022,7 +6445,7 @@ "h": 10, "w": 12, "x": 12, - "y": 84 + "y": 35 }, "hiddenSeries": false, "id": 8, @@ -6126,7 +6549,7 @@ "h": 10, "w": 12, "x": 0, - "y": 94 + "y": 45 }, "hiddenSeries": false, "id": 38, @@ -6226,7 +6649,7 @@ "h": 10, "w": 12, "x": 12, - "y": 94 + "y": 45 }, "hiddenSeries": false, "id": 39, @@ -6258,8 +6681,9 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10, rate(synapse_util_caches_cache:total{job=\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size]) - rate(synapse_util_caches_cache:hits{job=\"$job\",instance=\"$instance\"}[$bucket_size]))", + "expr": "topk(10, rate(synapse_util_caches_cache:total{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size]) - rate(synapse_util_caches_cache:hits{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size]))", "format": "time_series", + "interval": "", "intervalFactor": 2, "legendFormat": "{{name}} {{job}}-{{index}}", "refId": "A", @@ -6326,7 +6750,7 @@ "h": 9, "w": 12, "x": 0, - "y": 104 + "y": 55 }, "hiddenSeries": false, "id": 65, @@ -6361,7 +6785,7 @@ "expr": "rate(synapse_util_caches_cache:evicted_size{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{name}} {{job}}-{{index}}", + "legendFormat": "{{name}} ({{reason}}) {{job}}-{{index}}", "refId": "A" } ], @@ -9051,7 +9475,7 @@ "h": 8, "w": 12, "x": 0, - "y": 119 + "y": 41 }, "hiddenSeries": false, "id": 156, @@ -9089,7 +9513,7 @@ "steppedLine": false, "targets": [ { - "expr": "synapse_admin_mau:current{instance=\"$instance\"}", + "expr": "synapse_admin_mau:current{instance=\"$instance\", job=~\"$job\"}", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -9097,7 +9521,7 @@ "refId": "A" }, { - "expr": "synapse_admin_mau:max{instance=\"$instance\"}", + "expr": "synapse_admin_mau:max{instance=\"$instance\", job=~\"$job\"}", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -9164,7 +9588,7 @@ "h": 8, "w": 12, "x": 12, - "y": 119 + "y": 41 }, "hiddenSeries": false, "id": 160, @@ -9484,7 +9908,7 @@ "h": 8, "w": 12, "x": 0, - "y": 73 + "y": 43 }, "hiddenSeries": false, "id": 168, @@ -9516,7 +9940,7 @@ { "expr": "rate(synapse_appservice_api_sent_events{instance=\"$instance\"}[$bucket_size])", "interval": "", - "legendFormat": "{{exported_service}}", + "legendFormat": "{{service}}", "refId": "A" } ], @@ -9579,7 +10003,7 @@ "h": 8, "w": 12, "x": 12, - "y": 73 + "y": 43 }, "hiddenSeries": false, "id": 171, @@ -9611,7 +10035,7 @@ { "expr": "rate(synapse_appservice_api_sent_transactions{instance=\"$instance\"}[$bucket_size])", "interval": "", - "legendFormat": "{{exported_service}}", + "legendFormat": "{{service}}", "refId": "A" } ], @@ -9959,7 +10383,6 @@ }, "yaxes": [ { - "$$hashKey": "object:165", "format": "hertz", "label": null, "logBase": 1, @@ -9968,7 +10391,6 @@ "show": true }, { - "$$hashKey": "object:166", "format": "short", "label": null, "logBase": 1, @@ -10071,7 +10493,6 @@ }, "yaxes": [ { - "$$hashKey": "object:390", "format": "hertz", "label": null, "logBase": 1, @@ -10080,7 +10501,6 @@ "show": true }, { - "$$hashKey": "object:391", "format": "short", "label": null, "logBase": 1, @@ -10169,7 +10589,6 @@ }, "yaxes": [ { - "$$hashKey": "object:390", "format": "hertz", "label": null, "logBase": 1, @@ -10178,7 +10597,6 @@ "show": true }, { - "$$hashKey": "object:391", "format": "short", "label": null, "logBase": 1, @@ -10470,5 +10888,5 @@ "timezone": "", "title": "Synapse", "uid": "000000012", - "version": 90 + "version": 100 } \ No newline at end of file diff --git a/contrib/purge_api/purge_history.sh b/contrib/purge_api/purge_history.sh index 9d5324ea1c47..de58dcdbb78a 100644 --- a/contrib/purge_api/purge_history.sh +++ b/contrib/purge_api/purge_history.sh @@ -84,7 +84,9 @@ AUTH="Authorization: Bearer $TOKEN" ################################################################################################### # finally start pruning the room: ################################################################################################### -POSTDATA='{"delete_local_events":"true"}' # this will really delete local events, so the messages in the room really disappear unless they are restored by remote federation +# this will really delete local events, so the messages in the room really +# disappear unless they are restored by remote federation. This is because +# we pass {"delete_local_events":true} to the curl invocation below. for ROOM in "${ROOMS_ARRAY[@]}"; do echo "########################################### $(date) ################# " @@ -104,7 +106,7 @@ for ROOM in "${ROOMS_ARRAY[@]}"; do SLEEP=2 set -x # call purge - OUT=$(curl --header "$AUTH" -s -d $POSTDATA POST "$API_URL/admin/purge_history/$ROOM/$EVENT_ID") + OUT=$(curl --header "$AUTH" -s -d '{"delete_local_events":true}' POST "$API_URL/admin/purge_history/$ROOM/$EVENT_ID") PURGE_ID=$(echo "$OUT" |grep purge_id|cut -d'"' -f4 ) if [ "$PURGE_ID" == "" ]; then # probably the history purge is already in progress for $ROOM diff --git a/debian/build_virtualenv b/debian/build_virtualenv index 68c86599536c..e6911636192c 100755 --- a/debian/build_virtualenv +++ b/debian/build_virtualenv @@ -15,7 +15,7 @@ export DH_VIRTUALENV_INSTALL_ROOT=/opt/venvs # python won't look in the right directory. At least this way, the error will # be a *bit* more obvious. # -SNAKE=`readlink -e /usr/bin/python3` +SNAKE=$(readlink -e /usr/bin/python3) # try to set the CFLAGS so any compiled C extensions are compiled with the most # generic as possible x64 instructions, so that compiling it on a new Intel chip @@ -24,7 +24,7 @@ SNAKE=`readlink -e /usr/bin/python3` # TODO: add similar things for non-amd64, or figure out a more generic way to # do this. -case `dpkg-architecture -q DEB_HOST_ARCH` in +case $(dpkg-architecture -q DEB_HOST_ARCH) in amd64) export CFLAGS=-march=x86-64 ;; @@ -40,6 +40,7 @@ dh_virtualenv \ --upgrade-pip \ --preinstall="lxml" \ --preinstall="mock" \ + --preinstall="wheel" \ --extra-pip-arg="--no-cache-dir" \ --extra-pip-arg="--compile" \ --extras="all,systemd,test" @@ -56,8 +57,8 @@ case "$DEB_BUILD_OPTIONS" in *) # Copy tests to a temporary directory so that we can put them on the # PYTHONPATH without putting the uninstalled synapse on the pythonpath. - tmpdir=`mktemp -d` - trap "rm -r $tmpdir" EXIT + tmpdir=$(mktemp -d) + trap 'rm -r $tmpdir' EXIT cp -r tests "$tmpdir" @@ -98,5 +99,20 @@ esac --output-file="${PACKAGE_BUILD_DIR}/etc/matrix-synapse/log.yaml" # add a dependency on the right version of python to substvars. -PYPKG=`basename $SNAKE` +PYPKG=$(basename "$SNAKE") echo "synapse:pydepends=$PYPKG" >> debian/matrix-synapse-py3.substvars + + +# add a couple of triggers. This is needed so that dh-virtualenv can rebuild +# the venv when the system python changes (see +# https://dh-virtualenv.readthedocs.io/en/latest/tutorial.html#step-2-set-up-packaging-for-your-project) +# +# we do it here rather than the more conventional way of just adding it to +# debian/matrix-synapse-py3.triggers, because we need to add a trigger on the +# right version of python. +cat >>"debian/.debhelper/generated/matrix-synapse-py3/triggers" < Tue, 21 Dec 2021 17:31:03 +0000 + +matrix-synapse-py3 (1.49.1) stable; urgency=medium + + * New synapse release 1.49.1. + + -- Synapse Packaging team Tue, 21 Dec 2021 11:07:30 +0000 + +matrix-synapse-py3 (1.49.0) stable; urgency=medium + + * New synapse release 1.49.0. + + -- Synapse Packaging team Tue, 14 Dec 2021 12:39:46 +0000 + +matrix-synapse-py3 (1.49.0~rc1) stable; urgency=medium + + * New synapse release 1.49.0~rc1. + + -- Synapse Packaging team Tue, 07 Dec 2021 13:52:21 +0000 + +matrix-synapse-py3 (1.48.0) stable; urgency=medium + + * New synapse release 1.48.0. + + -- Synapse Packaging team Tue, 30 Nov 2021 11:24:15 +0000 + +matrix-synapse-py3 (1.48.0~rc1) stable; urgency=medium + + * New synapse release 1.48.0~rc1. + + -- Synapse Packaging team Thu, 25 Nov 2021 15:56:03 +0000 + +matrix-synapse-py3 (1.47.1) stable; urgency=medium + + * New synapse release 1.47.1. + + -- Synapse Packaging team Fri, 19 Nov 2021 13:44:32 +0000 + +matrix-synapse-py3 (1.47.0) stable; urgency=medium + + * New synapse release 1.47.0. + + -- Synapse Packaging team Wed, 17 Nov 2021 13:09:43 +0000 + +matrix-synapse-py3 (1.47.0~rc3) stable; urgency=medium + + * New synapse release 1.47.0~rc3. + + -- Synapse Packaging team Tue, 16 Nov 2021 14:32:47 +0000 + +matrix-synapse-py3 (1.47.0~rc2) stable; urgency=medium + + [ Dan Callahan ] + * Update scripts to pass Shellcheck lints. + * Remove unused Vagrant scripts from debian/ directory. + * Allow building Debian packages for any architecture, not just amd64. + * Preinstall the "wheel" package when building virtualenvs. + * Do not error if /etc/default/matrix-synapse is missing. + + [ Synapse Packaging team ] + * New synapse release 1.47.0~rc2. + + -- Synapse Packaging team Wed, 10 Nov 2021 09:41:01 +0000 + +matrix-synapse-py3 (1.46.0) stable; urgency=medium + + [ Richard van der Hoff ] + * Compress debs with xz, to fix incompatibility of impish debs with reprepro. + + [ Synapse Packaging team ] + * New synapse release 1.46.0. + + -- Synapse Packaging team Tue, 02 Nov 2021 13:22:53 +0000 + +matrix-synapse-py3 (1.46.0~rc1) stable; urgency=medium + + * New synapse release 1.46.0~rc1. + + -- Synapse Packaging team Tue, 26 Oct 2021 14:04:04 +0100 + +matrix-synapse-py3 (1.45.1) stable; urgency=medium + + * New synapse release 1.45.1. + + -- Synapse Packaging team Wed, 20 Oct 2021 11:58:27 +0100 + +matrix-synapse-py3 (1.45.0) stable; urgency=medium + + * New synapse release 1.45.0. + + -- Synapse Packaging team Tue, 19 Oct 2021 11:18:53 +0100 + +matrix-synapse-py3 (1.45.0~rc2) stable; urgency=medium + + * New synapse release 1.45.0~rc2. + + -- Synapse Packaging team Thu, 14 Oct 2021 10:58:24 +0100 + +matrix-synapse-py3 (1.45.0~rc1) stable; urgency=medium + + [ Nick @ Beeper ] + * Include an `update_synapse_database` script in the distribution. + + [ Synapse Packaging team ] + * New synapse release 1.45.0~rc1. + + -- Synapse Packaging team Tue, 12 Oct 2021 10:46:27 +0100 + +matrix-synapse-py3 (1.44.0) stable; urgency=medium + + * New synapse release 1.44.0. + + -- Synapse Packaging team Tue, 05 Oct 2021 13:43:57 +0100 + +matrix-synapse-py3 (1.44.0~rc3) stable; urgency=medium + + * New synapse release 1.44.0~rc3. + + -- Synapse Packaging team Mon, 04 Oct 2021 14:57:22 +0100 + +matrix-synapse-py3 (1.44.0~rc2) stable; urgency=medium + + * New synapse release 1.44.0~rc2. + + -- Synapse Packaging team Thu, 30 Sep 2021 12:39:10 +0100 + +matrix-synapse-py3 (1.44.0~rc1) stable; urgency=medium + + * New synapse release 1.44.0~rc1. + + -- Synapse Packaging team Tue, 28 Sep 2021 13:41:28 +0100 + +matrix-synapse-py3 (1.43.0) stable; urgency=medium + + * New synapse release 1.43.0. + + -- Synapse Packaging team Tue, 21 Sep 2021 11:49:05 +0100 + +matrix-synapse-py3 (1.43.0~rc2) stable; urgency=medium + + * New synapse release 1.43.0~rc2. + + -- Synapse Packaging team Fri, 17 Sep 2021 10:43:21 +0100 + +matrix-synapse-py3 (1.43.0~rc1) stable; urgency=medium + + * New synapse release 1.43.0~rc1. + + -- Synapse Packaging team Tue, 14 Sep 2021 11:39:46 +0100 + +matrix-synapse-py3 (1.42.0) stable; urgency=medium + + * New synapse release 1.42.0. + + -- Synapse Packaging team Tue, 07 Sep 2021 16:19:09 +0100 + +matrix-synapse-py3 (1.42.0~rc2) stable; urgency=medium + + * New synapse release 1.42.0~rc2. + + -- Synapse Packaging team Mon, 06 Sep 2021 15:25:13 +0100 + +matrix-synapse-py3 (1.42.0~rc1) stable; urgency=medium + + * New synapse release 1.42.0rc1. + + -- Synapse Packaging team Wed, 01 Sep 2021 11:37:48 +0100 + +matrix-synapse-py3 (1.41.1) stable; urgency=high + + * New synapse release 1.41.1. + + -- Synapse Packaging team Tue, 31 Aug 2021 12:59:10 +0100 + +matrix-synapse-py3 (1.41.0) stable; urgency=medium + + * New synapse release 1.41.0. + + -- Synapse Packaging team Tue, 24 Aug 2021 15:31:45 +0100 + +matrix-synapse-py3 (1.41.0~rc1) stable; urgency=medium + + * New synapse release 1.41.0~rc1. + + -- Synapse Packaging team Wed, 18 Aug 2021 15:52:00 +0100 + matrix-synapse-py3 (1.40.0) stable; urgency=medium * New synapse release 1.40.0. @@ -20,6 +210,8 @@ matrix-synapse-py3 (1.40.0~rc1) stable; urgency=medium [ Richard van der Hoff ] * Drop backwards-compatibility code that was required to support Ubuntu Xenial. + * Update package triggers so that the virtualenv is correctly rebuilt + when the system python is rebuilt, on recent Python versions. [ Synapse Packaging team ] * New synapse release 1.40.0~rc1. diff --git a/debian/control b/debian/control index 763fabd6f6ae..412a9e1d4cf0 100644 --- a/debian/control +++ b/debian/control @@ -19,7 +19,7 @@ Standards-Version: 3.9.8 Homepage: https://github.com/matrix-org/synapse Package: matrix-synapse-py3 -Architecture: amd64 +Architecture: any Provides: matrix-synapse Conflicts: matrix-synapse (<< 0.34.0.1-0matrix2), diff --git a/debian/matrix-synapse-py3.config b/debian/matrix-synapse-py3.config index 37a781b3e855..3b2f469e1562 100755 --- a/debian/matrix-synapse-py3.config +++ b/debian/matrix-synapse-py3.config @@ -2,6 +2,7 @@ set -e +# shellcheck disable=SC1091 . /usr/share/debconf/confmodule # try to update the debconf db according to whatever is in the config files diff --git a/debian/matrix-synapse-py3.links b/debian/matrix-synapse-py3.links index 53e29654187a..7eeba180d903 100644 --- a/debian/matrix-synapse-py3.links +++ b/debian/matrix-synapse-py3.links @@ -3,3 +3,4 @@ opt/venvs/matrix-synapse/bin/register_new_matrix_user usr/bin/register_new_matri opt/venvs/matrix-synapse/bin/synapse_port_db usr/bin/synapse_port_db opt/venvs/matrix-synapse/bin/synapse_review_recent_signups usr/bin/synapse_review_recent_signups opt/venvs/matrix-synapse/bin/synctl usr/bin/synctl +opt/venvs/matrix-synapse/bin/update_synapse_database usr/bin/update_synapse_database diff --git a/debian/matrix-synapse-py3.postinst b/debian/matrix-synapse-py3.postinst index c0dd7e5534e7..a8dde1e082f4 100644 --- a/debian/matrix-synapse-py3.postinst +++ b/debian/matrix-synapse-py3.postinst @@ -1,5 +1,6 @@ #!/bin/sh -e +# shellcheck disable=SC1091 . /usr/share/debconf/confmodule CONFIGFILE_SERVERNAME="/etc/matrix-synapse/conf.d/server_name.yaml" diff --git a/debian/matrix-synapse-py3.triggers b/debian/matrix-synapse-py3.triggers deleted file mode 100644 index f8c1fdb021c9..000000000000 --- a/debian/matrix-synapse-py3.triggers +++ /dev/null @@ -1,9 +0,0 @@ -# Register interest in Python interpreter changes and -# don't make the Python package dependent on the virtualenv package -# processing (noawait) -interest-noawait /usr/bin/python3.5 -interest-noawait /usr/bin/python3.6 -interest-noawait /usr/bin/python3.7 - -# Also provide a symbolic trigger for all dh-virtualenv packages -interest dh-virtualenv-interpreter-update diff --git a/debian/matrix-synapse.service b/debian/matrix-synapse.service index 553babf5492d..bde1c6cb9fd9 100644 --- a/debian/matrix-synapse.service +++ b/debian/matrix-synapse.service @@ -5,7 +5,7 @@ Description=Synapse Matrix homeserver Type=notify User=matrix-synapse WorkingDirectory=/var/lib/matrix-synapse -EnvironmentFile=/etc/default/matrix-synapse +EnvironmentFile=-/etc/default/matrix-synapse ExecStartPre=/opt/venvs/matrix-synapse/bin/python -m synapse.app.homeserver --config-path=/etc/matrix-synapse/homeserver.yaml --config-path=/etc/matrix-synapse/conf.d/ --generate-keys ExecStart=/opt/venvs/matrix-synapse/bin/python -m synapse.app.homeserver --config-path=/etc/matrix-synapse/homeserver.yaml --config-path=/etc/matrix-synapse/conf.d/ ExecReload=/bin/kill -HUP $MAINPID diff --git a/debian/rules b/debian/rules index b9d490adc94c..5baf2475f07e 100755 --- a/debian/rules +++ b/debian/rules @@ -51,5 +51,11 @@ override_dh_shlibdeps: override_dh_virtualenv: ./debian/build_virtualenv +override_dh_builddeb: + # force the compression to xzip, to stop dpkg-deb on impish defaulting to zstd + # (which requires reprepro 5.3.0-1.3, which is currently only in 'experimental' in Debian: + # https://metadata.ftp-master.debian.org/changelogs/main/r/reprepro/reprepro_5.3.0-1.3_changelog) + dh_builddeb -- -Zxz + %: dh $@ --with python-virtualenv diff --git a/debian/test/.gitignore b/debian/test/.gitignore deleted file mode 100644 index 95eda73fcc30..000000000000 --- a/debian/test/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -.vagrant -*.log diff --git a/debian/test/provision.sh b/debian/test/provision.sh deleted file mode 100644 index a5c7f59712a1..000000000000 --- a/debian/test/provision.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -# -# provisioning script for vagrant boxes for testing the matrix-synapse debs. -# -# Will install the most recent matrix-synapse-py3 deb for this platform from -# the /debs directory. - -set -e - -apt-get update -apt-get install -y lsb-release - -deb=`ls /debs/matrix-synapse-py3_*+$(lsb_release -cs)*.deb | sort | tail -n1` - -debconf-set-selections <> $DIR/etc/$port.config - - echo "public_baseurl: http://localhost:$port/" >> $DIR/etc/$port.config - - echo 'enable_registration: true' >> $DIR/etc/$port.config - - # Warning, this heredoc depends on the interaction of tabs and spaces. Please don't - # accidentaly bork me with your fancy settings. - listeners=$(cat <<-PORTLISTENERS - # Configure server to listen on both $https_port and $port - # This overides some of the default settings above - listeners: - - port: $https_port - type: http - tls: true - resources: - - names: [client, federation] - - - port: $port - tls: false - bind_addresses: ['::1', '127.0.0.1'] - type: http - x_forwarded: true - resources: - - names: [client, federation] - compress: false - PORTLISTENERS - ) - echo "${listeners}" >> $DIR/etc/$port.config - - # Disable tls for the servers - printf '\n\n# Disable tls on the servers.' >> $DIR/etc/$port.config - echo '# DO NOT USE IN PRODUCTION' >> $DIR/etc/$port.config - echo 'use_insecure_ssl_client_just_for_testing_do_not_use: true' >> $DIR/etc/$port.config - echo 'federation_verify_certificates: false' >> $DIR/etc/$port.config - - # Set tls paths - echo "tls_certificate_path: \"$DIR/etc/localhost:$https_port.tls.crt\"" >> $DIR/etc/$port.config - echo "tls_private_key_path: \"$DIR/etc/localhost:$https_port.tls.key\"" >> $DIR/etc/$port.config - + if ! grep -F "Customisation made by demo/start.sh" -q "$DIR/etc/$port.config"; then # Generate tls keys - openssl req -x509 -newkey rsa:4096 -keyout $DIR/etc/localhost\:$https_port.tls.key -out $DIR/etc/localhost\:$https_port.tls.crt -days 365 -nodes -subj "/O=matrix" - - # Ignore keys from the trusted keys server - echo '# Ignore keys from the trusted keys server' >> $DIR/etc/$port.config - echo 'trusted_key_servers:' >> $DIR/etc/$port.config - echo ' - server_name: "matrix.org"' >> $DIR/etc/$port.config - echo ' accept_keys_insecurely: true' >> $DIR/etc/$port.config - - # Reduce the blacklist - blacklist=$(cat <<-BLACK - # Set the blacklist so that it doesn't include 127.0.0.1, ::1 - federation_ip_range_blacklist: - - '10.0.0.0/8' - - '172.16.0.0/12' - - '192.168.0.0/16' - - '100.64.0.0/10' - - '169.254.0.0/16' - - 'fe80::/64' - - 'fc00::/7' - BLACK - ) - echo "${blacklist}" >> $DIR/etc/$port.config + openssl req -x509 -newkey rsa:4096 -keyout "$DIR/etc/localhost:$https_port.tls.key" -out "$DIR/etc/localhost:$https_port.tls.crt" -days 365 -nodes -subj "/O=matrix" + + # Regenerate configuration + { + printf '\n\n# Customisation made by demo/start.sh\n' + echo "public_baseurl: http://localhost:$port/" + echo 'enable_registration: true' + + # Warning, this heredoc depends on the interaction of tabs and spaces. + # Please don't accidentaly bork me with your fancy settings. + listeners=$(cat <<-PORTLISTENERS + # Configure server to listen on both $https_port and $port + # This overides some of the default settings above + listeners: + - port: $https_port + type: http + tls: true + resources: + - names: [client, federation] + + - port: $port + tls: false + bind_addresses: ['::1', '127.0.0.1'] + type: http + x_forwarded: true + resources: + - names: [client, federation] + compress: false + PORTLISTENERS + ) + + echo "${listeners}" + + # Disable tls for the servers + printf '\n\n# Disable tls on the servers.' + echo '# DO NOT USE IN PRODUCTION' + echo 'use_insecure_ssl_client_just_for_testing_do_not_use: true' + echo 'federation_verify_certificates: false' + + # Set tls paths + echo "tls_certificate_path: \"$DIR/etc/localhost:$https_port.tls.crt\"" + echo "tls_private_key_path: \"$DIR/etc/localhost:$https_port.tls.key\"" + + # Ignore keys from the trusted keys server + echo '# Ignore keys from the trusted keys server' + echo 'trusted_key_servers:' + echo ' - server_name: "matrix.org"' + echo ' accept_keys_insecurely: true' + + # Reduce the blacklist + blacklist=$(cat <<-BLACK + # Set the blacklist so that it doesn't include 127.0.0.1, ::1 + federation_ip_range_blacklist: + - '10.0.0.0/8' + - '172.16.0.0/12' + - '192.168.0.0/16' + - '100.64.0.0/10' + - '169.254.0.0/16' + - 'fe80::/64' + - 'fc00::/7' + BLACK + ) + + echo "${blacklist}" + } >> "$DIR/etc/$port.config" fi # Check script parameters if [ $# -eq 1 ]; then - if [ $1 = "--no-rate-limit" ]; then + if [ "$1" = "--no-rate-limit" ]; then # Disable any rate limiting ratelimiting=$(cat <<-RC @@ -137,22 +141,22 @@ for port in 8080 8081 8082; do burst_count: 1000 RC ) - echo "${ratelimiting}" >> $DIR/etc/$port.config + echo "${ratelimiting}" >> "$DIR/etc/$port.config" fi fi - if ! grep -F "full_twisted_stacktraces" -q $DIR/etc/$port.config; then - echo "full_twisted_stacktraces: true" >> $DIR/etc/$port.config + if ! grep -F "full_twisted_stacktraces" -q "$DIR/etc/$port.config"; then + echo "full_twisted_stacktraces: true" >> "$DIR/etc/$port.config" fi - if ! grep -F "report_stats" -q $DIR/etc/$port.config ; then - echo "report_stats: false" >> $DIR/etc/$port.config + if ! grep -F "report_stats" -q "$DIR/etc/$port.config" ; then + echo "report_stats: false" >> "$DIR/etc/$port.config" fi python3 -m synapse.app.homeserver \ --config-path "$DIR/etc/$port.config" \ -D \ - popd + popd || exit done -cd "$CWD" +cd "$CWD" || exit diff --git a/demo/stop.sh b/demo/stop.sh index f9dddc5914b1..c97e4b8d005d 100755 --- a/demo/stop.sh +++ b/demo/stop.sh @@ -8,7 +8,7 @@ for pid_file in $FILES; do pid=$(cat "$pid_file") if [[ $pid ]]; then echo "Killing $pid_file with $pid" - kill $pid + kill "$pid" fi done diff --git a/docker/Dockerfile-dhvirtualenv b/docker/Dockerfile-dhvirtualenv index 017be8555ea9..1dd88140c7a4 100644 --- a/docker/Dockerfile-dhvirtualenv +++ b/docker/Dockerfile-dhvirtualenv @@ -47,8 +47,9 @@ RUN apt-get update -qq -o Acquire::Languages=none \ && cd /dh-virtualenv \ && env DEBIAN_FRONTEND=noninteractive mk-build-deps -ri -t "apt-get -y --no-install-recommends" -# build it -RUN cd /dh-virtualenv && dpkg-buildpackage -us -uc -b +# Build it. Note that building the docs doesn't work due to differences in +# Sphinx APIs across versions/distros. +RUN cd /dh-virtualenv && DEB_BUILD_OPTIONS=nodoc dpkg-buildpackage -us -uc -b ### ### Stage 1 diff --git a/docker/Dockerfile-pgtests b/docker/Dockerfile-pgtests index 3bfee845c658..92b804d1937f 100644 --- a/docker/Dockerfile-pgtests +++ b/docker/Dockerfile-pgtests @@ -1,6 +1,6 @@ # Use the Sytest image that comes with a lot of the build dependencies # pre-installed -FROM matrixdotorg/sytest:latest +FROM matrixdotorg/sytest:bionic # The Sytest image doesn't come with python, so install that RUN apt-get update && apt-get -qq install -y python3 python3-dev python3-pip @@ -8,5 +8,23 @@ RUN apt-get update && apt-get -qq install -y python3 python3-dev python3-pip # We need tox to run the tests in run_pg_tests.sh RUN python3 -m pip install tox -ADD run_pg_tests.sh /pg_tests.sh -ENTRYPOINT /pg_tests.sh +# Initialise the db +RUN su -c '/usr/lib/postgresql/10/bin/initdb -D /var/lib/postgresql/data -E "UTF-8" --lc-collate="C.UTF-8" --lc-ctype="C.UTF-8" --username=postgres' postgres + +# Add a user with our UID and GID so that files get created on the host owned +# by us, not root. +ARG UID +ARG GID +RUN groupadd --gid $GID user +RUN useradd --uid $UID --gid $GID --groups sudo --no-create-home user + +# Ensure we can start postgres by sudo-ing as the postgres user. +RUN apt-get update && apt-get -qq install -y sudo +RUN echo "user ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + +ADD run_pg_tests.sh /run_pg_tests.sh +# Use the "exec form" of ENTRYPOINT (https://docs.docker.com/engine/reference/builder/#entrypoint) +# so that we can `docker run` this container and pass arguments to pg_tests.sh +ENTRYPOINT ["/run_pg_tests.sh"] + +USER user diff --git a/docker/Dockerfile-workers b/docker/Dockerfile-workers index 969cf9728658..46f2e17382db 100644 --- a/docker/Dockerfile-workers +++ b/docker/Dockerfile-workers @@ -21,3 +21,6 @@ VOLUME ["/data"] # files to run the desired worker configuration. Will start supervisord. COPY ./docker/configure_workers_and_start.py /configure_workers_and_start.py ENTRYPOINT ["/configure_workers_and_start.py"] + +HEALTHCHECK --start-period=5s --interval=15s --timeout=5s \ + CMD /bin/sh /healthcheck.sh diff --git a/docker/README.md b/docker/README.md index edf917bb11cc..4349e71f87bb 100644 --- a/docker/README.md +++ b/docker/README.md @@ -65,7 +65,8 @@ The following environment variables are supported in `generate` mode: * `SYNAPSE_DATA_DIR`: where the generated config will put persistent data such as the database and media store. Defaults to `/data`. * `UID`, `GID`: the user id and group id to use for creating the data - directories. Defaults to `991`, `991`. + directories. If unset, and no user is set via `docker run --user`, defaults + to `991`, `991`. ## Running synapse @@ -97,7 +98,9 @@ The following environment variables are supported in `run` mode: `/homeserver.yaml`. * `SYNAPSE_WORKER`: module to execute, used when running synapse with workers. Defaults to `synapse.app.homeserver`, which is suitable for non-worker mode. -* `UID`, `GID`: the user and group id to run Synapse as. Defaults to `991`, `991`. +* `UID`, `GID`: the user and group id to run Synapse as. If unset, and no user + is set via `docker run --user`, defaults to `991`, `991`. Note that this user + must have permission to read the config files, and write to the data directories. * `TZ`: the [timezone](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) the container will run with. Defaults to `UTC`. For more complex setups (e.g. for workers) you can also pass your args directly to synapse using `run` mode. For example like this: @@ -186,7 +189,7 @@ point to another Dockerfile. ## Disabling the healthcheck If you are using a non-standard port or tls inside docker you can disable the healthcheck -whilst running the above `docker run` commands. +whilst running the above `docker run` commands. ``` --no-healthcheck @@ -212,7 +215,7 @@ If you wish to point the healthcheck at a different port with docker command, ad ## Setting the healthcheck in docker-compose file You can add the following to set a custom healthcheck in a docker compose file. -You will need docker-compose version >2.1 for this to work. +You will need docker-compose version >2.1 for this to work. ``` healthcheck: @@ -226,4 +229,5 @@ healthcheck: ## Using jemalloc Jemalloc is embedded in the image and will be used instead of the default allocator. -You can read about jemalloc by reading the Synapse [README](../README.rst). +You can read about jemalloc by reading the Synapse +[README](https://github.com/matrix-org/synapse/blob/HEAD/README.rst#help-synapse-is-slow-and-eats-all-my-ram-cpu). diff --git a/docker/build_debian.sh b/docker/build_debian.sh index 801ff454716c..9eae38af9191 100644 --- a/docker/build_debian.sh +++ b/docker/build_debian.sh @@ -5,7 +5,7 @@ set -ex # Get the codename from distro env -DIST=`cut -d ':' -f2 <<< $distro` +DIST=$(cut -d ':' -f2 <<< "${distro:?}") # we get a read-only copy of the source: make a writeable copy cp -aT /synapse/source /synapse/build @@ -17,7 +17,7 @@ cd /synapse/build # Section to determine which "component" it should go into (see # https://manpages.debian.org/stretch/reprepro/reprepro.1.en.html#GUESSING) -DEB_VERSION=`dpkg-parsechangelog -SVersion` +DEB_VERSION=$(dpkg-parsechangelog -SVersion) case $DEB_VERSION in *~rc*|*~a*|*~b*|*~c*) sed -ie '/^Section:/c\Section: prerelease' debian/control diff --git a/docker/conf-workers/healthcheck.sh.j2 b/docker/conf-workers/healthcheck.sh.j2 new file mode 100644 index 000000000000..79c621f89ccb --- /dev/null +++ b/docker/conf-workers/healthcheck.sh.j2 @@ -0,0 +1,6 @@ +#!/bin/sh +# This healthcheck script is designed to return OK when every +# host involved returns OK +{%- for healthcheck_url in healthcheck_urls %} +curl -fSs {{ healthcheck_url }} || exit 1 +{%- endfor %} diff --git a/docker/conf/homeserver.yaml b/docker/conf/homeserver.yaml index 3cba594d0228..f10f78a48cd2 100644 --- a/docker/conf/homeserver.yaml +++ b/docker/conf/homeserver.yaml @@ -148,14 +148,6 @@ bcrypt_rounds: 12 allow_guest_access: {{ "True" if SYNAPSE_ALLOW_GUEST else "False" }} enable_group_creation: true -# The list of identity servers trusted to verify third party -# identifiers by this server. -# -# Also defines the ID server which will be called when an account is -# deactivated (one will be picked arbitrarily). -trusted_third_party_id_servers: - - matrix.org - - vector.im ## Metrics ### diff --git a/docker/conf/log.config b/docker/conf/log.config index a99462692628..7a216a36a046 100644 --- a/docker/conf/log.config +++ b/docker/conf/log.config @@ -18,18 +18,31 @@ handlers: backupCount: 6 # Does not include the current log file. encoding: utf8 - # Default to buffering writes to log file for efficiency. This means that - # there will be a delay for INFO/DEBUG logs to get written, but WARNING/ERROR - # logs will still be flushed immediately. + # Default to buffering writes to log file for efficiency. + # WARNING/ERROR logs will still be flushed immediately, but there will be a + # delay (of up to `period` seconds, or until the buffer is full with + # `capacity` messages) before INFO/DEBUG logs get written. buffer: - class: logging.handlers.MemoryHandler + class: synapse.logging.handlers.PeriodicallyFlushingMemoryHandler target: file - # The capacity is the number of log lines that are buffered before - # being written to disk. Increasing this will lead to better + + # The capacity is the maximum number of log lines that are buffered + # before being written to disk. Increasing this will lead to better # performance, at the expensive of it taking longer for log lines to # be written to disk. + # This parameter is required. capacity: 10 - flushLevel: 30 # Flush for WARNING logs as well + + # Logs with a level at or above the flush level will cause the buffer to + # be flushed immediately. + # Default value: 40 (ERROR) + # Other values: 50 (CRITICAL), 30 (WARNING), 20 (INFO), 10 (DEBUG) + flushLevel: 30 # Flush immediately for WARNING logs and higher + + # The period of time, in seconds, between forced flushes. + # Messages will not be delayed for longer than this time. + # Default value: 5 seconds + period: 5 {% endif %} console: diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index 1d22a4d5719d..adbb551cee7f 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -48,7 +48,7 @@ "app": "synapse.app.user_dir", "listener_resources": ["client"], "endpoint_patterns": [ - "^/_matrix/client/(api/v1|r0|unstable)/user_directory/search$" + "^/_matrix/client/(api/v1|r0|v3|unstable)/user_directory/search$" ], "shared_extra_conf": {"update_user_directory": False}, "worker_extra_conf": "", @@ -85,10 +85,10 @@ "app": "synapse.app.generic_worker", "listener_resources": ["client"], "endpoint_patterns": [ - "^/_matrix/client/(v2_alpha|r0)/sync$", - "^/_matrix/client/(api/v1|v2_alpha|r0)/events$", - "^/_matrix/client/(api/v1|r0)/initialSync$", - "^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$", + "^/_matrix/client/(v2_alpha|r0|v3)/sync$", + "^/_matrix/client/(api/v1|v2_alpha|r0|v3)/events$", + "^/_matrix/client/(api/v1|r0|v3)/initialSync$", + "^/_matrix/client/(api/v1|r0|v3)/rooms/[^/]+/initialSync$", ], "shared_extra_conf": {}, "worker_extra_conf": "", @@ -146,11 +146,11 @@ "app": "synapse.app.generic_worker", "listener_resources": ["client"], "endpoint_patterns": [ - "^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/redact", - "^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send", - "^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$", - "^/_matrix/client/(api/v1|r0|unstable)/join/", - "^/_matrix/client/(api/v1|r0|unstable)/profile/", + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/redact", + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/send", + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$", + "^/_matrix/client/(api/v1|r0|v3|unstable)/join/", + "^/_matrix/client/(api/v1|r0|v3|unstable)/profile/", ], "shared_extra_conf": {}, "worker_extra_conf": "", @@ -158,11 +158,11 @@ "frontend_proxy": { "app": "synapse.app.frontend_proxy", "listener_resources": ["client", "replication"], - "endpoint_patterns": ["^/_matrix/client/(api/v1|r0|unstable)/keys/upload"], + "endpoint_patterns": ["^/_matrix/client/(api/v1|r0|v3|unstable)/keys/upload"], "shared_extra_conf": {}, "worker_extra_conf": ( "worker_main_http_uri: http://127.0.0.1:%d" - % (MAIN_PROCESS_HTTP_LISTENER_PORT,), + % (MAIN_PROCESS_HTTP_LISTENER_PORT,) ), }, } @@ -474,10 +474,16 @@ def generate_worker_files(environ, config_path: str, data_dir: str): # Determine the load-balancing upstreams to configure nginx_upstream_config = "" + + # At the same time, prepare a list of internal endpoints to healthcheck + # starting with the main process which exists even if no workers do. + healthcheck_urls = ["http://localhost:8080/health"] + for upstream_worker_type, upstream_worker_ports in nginx_upstreams.items(): body = "" for port in upstream_worker_ports: body += " server localhost:%d;\n" % (port,) + healthcheck_urls.append("http://localhost:%d/health" % (port,)) # Add to the list of configured upstreams nginx_upstream_config += NGINX_UPSTREAM_CONFIG_BLOCK.format( @@ -510,6 +516,13 @@ def generate_worker_files(environ, config_path: str, data_dir: str): worker_config=supervisord_config, ) + # healthcheck config + convert( + "/conf/healthcheck.sh.j2", + "/healthcheck.sh", + healthcheck_urls=healthcheck_urls, + ) + # Ensure the logging directory exists log_dir = data_dir + "/logs" if not os.path.exists(log_dir): diff --git a/docker/run_pg_tests.sh b/docker/run_pg_tests.sh index 1fd08cb62bc6..58e2177d34c2 100755 --- a/docker/run_pg_tests.sh +++ b/docker/run_pg_tests.sh @@ -10,11 +10,10 @@ set -e # Set PGUSER so Synapse's tests know what user to connect to the database with export PGUSER=postgres -# Initialise & start the database -su -c '/usr/lib/postgresql/9.6/bin/initdb -D /var/lib/postgresql/data -E "UTF-8" --lc-collate="en_US.UTF-8" --lc-ctype="en_US.UTF-8" --username=postgres' postgres -su -c '/usr/lib/postgresql/9.6/bin/pg_ctl -w -D /var/lib/postgresql/data start' postgres +# Start the database +sudo -u postgres /usr/lib/postgresql/10/bin/pg_ctl -w -D /var/lib/postgresql/data start # Run the tests cd /src export TRIAL_FLAGS="-j 4" -tox --workdir=/tmp -e py35-postgres +tox --workdir=./.tox-pg-container -e py36-postgres "$@" diff --git a/docker/start.py b/docker/start.py index 16d6a8208a5f..ec9eeb49ae99 100755 --- a/docker/start.py +++ b/docker/start.py @@ -120,6 +120,7 @@ def generate_config_from_template(config_dir, config_path, environ, ownership): ] if ownership is not None: + log(f"Setting ownership on /data to {ownership}") subprocess.check_output(["chown", "-R", ownership, "/data"]) args = ["gosu", ownership] + args @@ -144,12 +145,18 @@ def run_generate_config(environ, ownership): config_path = environ.get("SYNAPSE_CONFIG_PATH", config_dir + "/homeserver.yaml") data_dir = environ.get("SYNAPSE_DATA_DIR", "/data") + if ownership is not None: + # make sure that synapse has perms to write to the data dir. + log(f"Setting ownership on {data_dir} to {ownership}") + subprocess.check_output(["chown", ownership, data_dir]) + # create a suitable log config from our template log_config_file = "%s/%s.log.config" % (config_dir, server_name) if not os.path.exists(log_config_file): log("Creating log config %s" % (log_config_file,)) convert("/conf/log.config", log_config_file, environ) + # generate the main config file, and a signing key. args = [ "python", "-m", @@ -168,29 +175,23 @@ def run_generate_config(environ, ownership): "--open-private-ports", ] # log("running %s" % (args, )) - - if ownership is not None: - # make sure that synapse has perms to write to the data dir. - subprocess.check_output(["chown", ownership, data_dir]) - - args = ["gosu", ownership] + args - os.execv("/usr/sbin/gosu", args) - else: - os.execv("/usr/local/bin/python", args) + os.execv("/usr/local/bin/python", args) def main(args, environ): mode = args[1] if len(args) > 1 else "run" - desired_uid = int(environ.get("UID", "991")) - desired_gid = int(environ.get("GID", "991")) - synapse_worker = environ.get("SYNAPSE_WORKER", "synapse.app.homeserver") - if (desired_uid == os.getuid()) and (desired_gid == os.getgid()): - ownership = None - else: - ownership = "{}:{}".format(desired_uid, desired_gid) - if ownership is None: - log("Will not perform chmod/gosu as UserID already matches request") + # if we were given an explicit user to switch to, do so + ownership = None + if "UID" in environ: + desired_uid = int(environ["UID"]) + desired_gid = int(environ.get("GID", "991")) + ownership = f"{desired_uid}:{desired_gid}" + elif os.getuid() == 0: + # otherwise, if we are running as root, use user 991 + ownership = "991:991" + + synapse_worker = environ.get("SYNAPSE_WORKER", "synapse.app.homeserver") # In generate mode, generate a configuration and missing keys, then exit if mode == "generate": diff --git a/docs/CAPTCHA_SETUP.md b/docs/CAPTCHA_SETUP.md index fabdd7b7265e..49419ce8df92 100644 --- a/docs/CAPTCHA_SETUP.md +++ b/docs/CAPTCHA_SETUP.md @@ -15,12 +15,12 @@ in `homeserver.yaml`, to the list of authorized domains. If you have not set 1. Agree to the terms of service and submit. 1. Copy your site key and secret key and add them to your `homeserver.yaml` configuration file - ``` + ```yaml recaptcha_public_key: YOUR_SITE_KEY recaptcha_private_key: YOUR_SECRET_KEY ``` 1. Enable the CAPTCHA for new registrations - ``` + ```yaml enable_registration_captcha: true ``` 1. Go to the settings page for the CAPTCHA you just created diff --git a/docs/MSC1711_certificates_FAQ.md b/docs/MSC1711_certificates_FAQ.md index 7d71c190abed..086899a9d836 100644 --- a/docs/MSC1711_certificates_FAQ.md +++ b/docs/MSC1711_certificates_FAQ.md @@ -3,7 +3,7 @@ ## Historical Note This document was originally written to guide server admins through the upgrade path towards Synapse 1.0. Specifically, -[MSC1711](https://github.com/matrix-org/matrix-doc/blob/master/proposals/1711-x509-for-federation.md) +[MSC1711](https://github.com/matrix-org/matrix-doc/blob/main/proposals/1711-x509-for-federation.md) required that all servers present valid TLS certificates on their federation API. Admins were encouraged to achieve compliance from version 0.99.0 (released in February 2019) ahead of version 1.0 (released June 2019) enforcing the @@ -282,7 +282,7 @@ coffin of the Perspectives project (which was already pretty dead). So, the Spec Core Team decided that a better approach would be to mandate valid TLS certificates for federation alongside the rest of the Web. More details can be found in -[MSC1711](https://github.com/matrix-org/matrix-doc/blob/master/proposals/1711-x509-for-federation.md#background-the-failure-of-the-perspectives-approach). +[MSC1711](https://github.com/matrix-org/matrix-doc/blob/main/proposals/1711-x509-for-federation.md#background-the-failure-of-the-perspectives-approach). This results in a breaking change, which is disruptive, but absolutely critical for the security model. However, the existence of Let's Encrypt as a trivial diff --git a/docs/README.md b/docs/README.md index e113f55d2a70..5222ee5f03fd 100644 --- a/docs/README.md +++ b/docs/README.md @@ -6,9 +6,9 @@ Please update any links to point to the new website instead. ## About This directory currently holds a series of markdown files documenting how to install, use -and develop Synapse, the reference Matrix homeserver. The documentation is readable directly -from this repository, but it is recommended to instead browse through the -[website](https://matrix-org.github.io/synapse) for easier discoverability. +and develop Synapse. The documentation is readable directly from this repository, but it is +recommended to instead browse through the [website](https://matrix-org.github.io/synapse) for +easier discoverability. ## Adding to the documentation @@ -50,8 +50,10 @@ build the documentation with: mdbook build ``` -The rendered contents will be outputted to a new `book/` directory at the root of the repository. You can -browse the book by opening `book/index.html` in a web browser. +The rendered contents will be outputted to a new `book/` directory at the root of the repository. Please note that +index.html is not built by default, it is created by copying over the file `welcome_and_overview.html` to `index.html` +during deployment. Thus, when running `mdbook serve` locally the book will initially show a 404 in place of the index +due to the above. Do not be alarmed! You can also have mdbook host the docs on a local webserver with hot-reload functionality via: diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 10be12d63865..b05af6d69051 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -7,6 +7,7 @@ - [Installation](setup/installation.md) - [Using Postgres](postgres.md) - [Configuring a Reverse Proxy](reverse_proxy.md) + - [Configuring a Forward/Outbound Proxy](setup/forward_proxy.md) - [Configuring a Turn Server](turn-howto.md) - [Delegation](delegate.md) @@ -20,11 +21,12 @@ - [Homeserver Sample Config File](usage/configuration/homeserver_sample_config.md) - [Logging Sample Config File](usage/configuration/logging_sample_config.md) - [Structured Logging](structured_logging.md) + - [Templates](templates.md) - [User Authentication](usage/configuration/user_authentication/README.md) - - [Single-Sign On]() + - [Single-Sign On](usage/configuration/user_authentication/single_sign_on/README.md) - [OpenID Connect](openid.md) - - [SAML]() - - [CAS]() + - [SAML](usage/configuration/user_authentication/single_sign_on/saml.md) + - [CAS](usage/configuration/user_authentication/single_sign_on/cas.md) - [SSO Mapping Providers](sso_mapping_providers.md) - [Password Auth Providers](password_auth_providers.md) - [JSON Web Tokens](jwt.md) @@ -32,36 +34,46 @@ - [Application Services](application_services.md) - [Server Notices](server_notices.md) - [Consent Tracking](consent_tracking.md) - - [URL Previews](url_previews.md) + - [URL Previews](development/url_previews.md) - [User Directory](user_directory.md) - [Message Retention Policies](message_retention_policies.md) - - [Pluggable Modules](modules.md) - - [Third Party Rules]() - - [Spam Checker](spam_checker.md) - - [Presence Router](presence_router_module.md) - - [Media Storage Providers]() + - [Pluggable Modules](modules/index.md) + - [Writing a module](modules/writing_a_module.md) + - [Spam checker callbacks](modules/spam_checker_callbacks.md) + - [Third-party rules callbacks](modules/third_party_rules_callbacks.md) + - [Presence router callbacks](modules/presence_router_callbacks.md) + - [Account validity callbacks](modules/account_validity_callbacks.md) + - [Password auth provider callbacks](modules/password_auth_provider_callbacks.md) + - [Background update controller callbacks](modules/background_update_controller_callbacks.md) + - [Porting a legacy module to the new interface](modules/porting_legacy_module.md) - [Workers](workers.md) - [Using `synctl` with Workers](synctl_workers.md) - [Systemd](systemd-with-workers/README.md) - [Administration](usage/administration/README.md) - [Admin API](usage/administration/admin_api/README.md) - [Account Validity](admin_api/account_validity.md) + - [Background Updates](usage/administration/admin_api/background_updates.md) - [Delete Group](admin_api/delete_group.md) - [Event Reports](admin_api/event_reports.md) - [Media](admin_api/media_admin_api.md) - [Purge History](admin_api/purge_history_api.md) - - [Purge Rooms](admin_api/purge_room.md) - [Register Users](admin_api/register_api.md) + - [Registration Tokens](usage/administration/admin_api/registration_tokens.md) - [Manipulate Room Membership](admin_api/room_membership.md) - [Rooms](admin_api/rooms.md) - [Server Notices](admin_api/server_notices.md) - - [Shutdown Room](admin_api/shutdown_room.md) - [Statistics](admin_api/statistics.md) - [Users](admin_api/user_admin_api.md) - [Server Version](admin_api/version_api.md) + - [Federation](usage/administration/admin_api/federation.md) - [Manhole](manhole.md) - [Monitoring](metrics-howto.md) + - [Understanding Synapse Through Grafana Graphs](usage/administration/understanding_synapse_through_grafana_graphs.md) + - [Useful SQL for Admins](usage/administration/useful_sql_for_admins.md) + - [Database Maintenance Tools](usage/administration/database_maintenance_tools.md) + - [State Groups](usage/administration/state_groups.md) - [Request log format](usage/administration/request_log.md) + - [Admin FAQ](usage/administration/admin_faq.md) - [Scripts]() # Development @@ -71,6 +83,7 @@ - [Testing]() - [OpenTracing](opentracing.md) - [Database Schemas](development/database_schema.md) + - [Experimental features](development/experimental_features.md) - [Synapse Architecture]() - [Log Contexts](log_contexts.md) - [Replication](replication.md) @@ -88,3 +101,4 @@ # Other - [Dependency Deprecation Policy](deprecation_policy.md) + - [Running Synapse on a Single-Board Computer](other/running_synapse_on_single_board_computers.md) diff --git a/docs/admin_api/event_reports.md b/docs/admin_api/event_reports.md index 3abb06099c80..f523774ba8e3 100644 --- a/docs/admin_api/event_reports.md +++ b/docs/admin_api/event_reports.md @@ -99,7 +99,7 @@ server admin: see [Admin API](../usage/administration/admin_api). It returns a JSON body like the following: -```jsonc +```json { "event_id": "$bNUFCwGzWca1meCGkjp-zwslF-GfVcXukvRLI1_FaVY", "event_json": { @@ -132,7 +132,7 @@ It returns a JSON body like the following: }, "type": "m.room.message", "unsigned": { - "age_ts": 1592291711430, + "age_ts": 1592291711430 } }, "id": , diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md index 61bed1e0d5d8..60b8bc7379a2 100644 --- a/docs/admin_api/media_admin_api.md +++ b/docs/admin_api/media_admin_api.md @@ -12,6 +12,7 @@ - [Delete local media](#delete-local-media) * [Delete a specific local media](#delete-a-specific-local-media) * [Delete local media by date or size](#delete-local-media-by-date-or-size) + * [Delete media uploaded by a user](#delete-media-uploaded-by-a-user) - [Purge Remote Media API](#purge-remote-media-api) # Querying media @@ -47,7 +48,8 @@ The API returns a JSON body like the following: ## List all media uploaded by a user Listing all media that has been uploaded by a local user can be achieved through -the use of the [List media of a user](user_admin_api.md#list-media-of-a-user) +the use of the +[List media uploaded by a user](user_admin_api.md#list-media-uploaded-by-a-user) Admin API. # Quarantine media @@ -255,9 +257,9 @@ POST /_synapse/admin/v1/media//delete?before_ts= URL Parameters * `server_name`: string - The name of your local server (e.g `matrix.org`). -* `before_ts`: string representing a positive integer - Unix timestamp in ms. +* `before_ts`: string representing a positive integer - Unix timestamp in milliseconds. Files that were last used before this timestamp will be deleted. It is the timestamp of -last access and not the timestamp creation. +last access, not the timestamp when the file was created. * `size_gt`: Optional - string representing a positive integer - Size of the media in bytes. Files that are larger will be deleted. Defaults to `0`. * `keep_profiles`: Optional - string representing a boolean - Switch to also delete files @@ -281,6 +283,11 @@ The following fields are returned in the JSON response body: * `deleted_media`: an array of strings - List of deleted `media_id` * `total`: integer - Total number of deleted `media_id` +## Delete media uploaded by a user + +You can find details of how to delete multiple media uploaded by a user in +[User Admin API](user_admin_api.md#delete-media-uploaded-by-a-user). + # Purge Remote Media API The purge remote media API allows server admins to purge old cached remote media. @@ -295,7 +302,7 @@ POST /_synapse/admin/v1/purge_media_cache?before_ts= URL Parameters -* `unix_timestamp_in_ms`: string representing a positive integer - Unix timestamp in ms. +* `unix_timestamp_in_ms`: string representing a positive integer - Unix timestamp in milliseconds. All cached media that was last accessed before this timestamp will be removed. Response: diff --git a/docs/admin_api/purge_history_api.md b/docs/admin_api/purge_history_api.md index 13b991eacf35..277e28d9cbcb 100644 --- a/docs/admin_api/purge_history_api.md +++ b/docs/admin_api/purge_history_api.md @@ -27,7 +27,7 @@ Room state data (such as joins, leaves, topic) is always preserved. To delete local message events as well, set `delete_local_events` in the body: -``` +```json { "delete_local_events": true } @@ -70,6 +70,8 @@ This API returns a JSON body like the following: The status will be one of `active`, `complete`, or `failed`. +If `status` is `failed` there will be a string `error` with the error message. + ## Reclaim disk space (Postgres) To reclaim the disk space and return it to the operating system, you need to run diff --git a/docs/admin_api/purge_room.md b/docs/admin_api/purge_room.md deleted file mode 100644 index 54fea2db6d85..000000000000 --- a/docs/admin_api/purge_room.md +++ /dev/null @@ -1,21 +0,0 @@ -Deprecated: Purge room API -========================== - -**The old Purge room API is deprecated and will be removed in a future release. -See the new [Delete Room API](rooms.md#delete-room-api) for more details.** - -This API will remove all trace of a room from your database. - -All local users must have left the room before it can be removed. - -The API is: - -``` -POST /_synapse/admin/v1/purge_room - -{ - "room_id": "!room:id" -} -``` - -You must authenticate using the access token of an admin user. diff --git a/docs/admin_api/room_membership.md b/docs/admin_api/room_membership.md index 8a5ce191df2e..548b790a5c0c 100644 --- a/docs/admin_api/room_membership.md +++ b/docs/admin_api/room_membership.md @@ -28,7 +28,7 @@ server admin: see [Admin API](../usage/administration/admin_api). Response: -``` +```json { "room_id": "!636q39766251:server.com" } diff --git a/docs/admin_api/rooms.md b/docs/admin_api/rooms.md index 1a812385d5a5..111a383cd91b 100644 --- a/docs/admin_api/rooms.md +++ b/docs/admin_api/rooms.md @@ -3,7 +3,11 @@ - [Room Details API](#room-details-api) - [Room Members API](#room-members-api) - [Room State API](#room-state-api) +- [Block Room API](#block-room-api) - [Delete Room API](#delete-room-api) + * [Version 1 (old version)](#version-1-old-version) + * [Version 2 (new version)](#version-2-new-version) + * [Status of deleting rooms](#status-of-deleting-rooms) * [Undoing room shutdowns](#undoing-room-shutdowns) - [Make Room Admin API](#make-room-admin-api) - [Forward Extremities Admin API](#forward-extremities-admin-api) @@ -38,9 +42,14 @@ The following query parameters are available: - `history_visibility` - Rooms are ordered alphabetically by visibility of history of the room. - `state_events` - Rooms are ordered by number of state events. Largest to smallest. * `dir` - Direction of room order. Either `f` for forwards or `b` for backwards. Setting - this value to `b` will reverse the above sort order. Defaults to `f`. -* `search_term` - Filter rooms by their room name. Search term can be contained in any - part of the room name. Defaults to no filtering. + this value to `b` will reverse the above sort order. Defaults to `f`. +* `search_term` - Filter rooms by their room name, canonical alias and room id. + Specifically, rooms are selected if the search term is contained in + - the room's name, + - the local part of the room's canonical alias, or + - the complete (local and server part) room's id (case sensitive). + + Defaults to no filtering. **Response** @@ -87,7 +96,7 @@ GET /_synapse/admin/v1/rooms A response body like the following is returned: -```jsonc +```json { "rooms": [ { @@ -170,7 +179,7 @@ GET /_synapse/admin/v1/rooms?order_by=size A response body like the following is returned: -```jsonc +```json { "rooms": [ { @@ -208,7 +217,7 @@ A response body like the following is returned: } ], "offset": 0, - "total_rooms": 150 + "total_rooms": 150, "next_token": 100 } ``` @@ -224,7 +233,7 @@ GET /_synapse/admin/v1/rooms?order_by=size&from=100 A response body like the following is returned: -```jsonc +```json { "rooms": [ { @@ -437,9 +446,86 @@ A response body like the following is returned: } ``` +# Block Room API +The Block Room admin API allows server admins to block and unblock rooms, +and query to see if a given room is blocked. +This API can be used to pre-emptively block a room, even if it's unknown to this +homeserver. Users will be prevented from joining a blocked room. + +## Block or unblock a room + +The API is: + +``` +PUT /_synapse/admin/v1/rooms//block +``` + +with a body of: + +```json +{ + "block": true +} +``` + +A response body like the following is returned: + +```json +{ + "block": true +} +``` + +**Parameters** + +The following parameters should be set in the URL: + +- `room_id` - The ID of the room. + +The following JSON body parameters are available: + +- `block` - If `true` the room will be blocked and if `false` the room will be unblocked. + +**Response** + +The following fields are possible in the JSON response body: + +- `block` - A boolean. `true` if the room is blocked, otherwise `false` + +## Get block status + +The API is: + +``` +GET /_synapse/admin/v1/rooms//block +``` + +A response body like the following is returned: + +```json +{ + "block": true, + "user_id": "" +} +``` + +**Parameters** + +The following parameters should be set in the URL: + +- `room_id` - The ID of the room. + +**Response** + +The following fields are possible in the JSON response body: + +- `block` - A boolean. `true` if the room is blocked, otherwise `false` +- `user_id` - An optional string. If the room is blocked (`block` is `true`) shows + the user who has add the room to blocking list. Otherwise it is not displayed. + # Delete Room API -The Delete Room admin API allows server admins to remove rooms from server +The Delete Room admin API allows server admins to remove rooms from the server and block these rooms. Shuts down a room. Moves all local users and room aliases automatically to a @@ -450,18 +536,33 @@ The new room will be created with the user specified by the `new_room_user_id` p as room administrator and will contain a message explaining what happened. Users invited to the new room will have power level `-10` by default, and thus be unable to speak. -If `block` is `True` it prevents new joins to the old room. +If `block` is `true`, users will be prevented from joining the old room. +This option can in [Version 1](#version-1-old-version) also be used to pre-emptively +block a room, even if it's unknown to this homeserver. In this case, the room will be +blocked, and no further action will be taken. If `block` is `false`, attempting to +delete an unknown room is invalid and will be rejected as a bad request. This API will remove all trace of the old room from your database after removing all local users. If `purge` is `true` (the default), all traces of the old room will be removed from your database after removing all local users. If you do not want this to happen, set `purge` to `false`. -Depending on the amount of history being purged a call to the API may take +Depending on the amount of history being purged, a call to the API may take several minutes or longer. The local server will only have the power to move local user and room aliases to the new room. Users on other servers will be unaffected. +To use it, you will need to authenticate by providing an ``access_token`` for a +server admin: see [Admin API](../usage/administration/admin_api). + +## Version 1 (old version) + +This version works synchronously. That means you only get the response once the server has +finished the action, which may take a long time. If you request the same action +a second time, and the server has not finished the first one, the second request will block. +This is fixed in version 2 of this API. The parameters are the same in both APIs. +This API will become deprecated in the future. + The API is: ``` @@ -480,9 +581,6 @@ with a body of: } ``` -To use it, you will need to authenticate by providing an ``access_token`` for a -server admin: see [Admin API](../usage/administration/admin_api). - A response body like the following is returned: ```json @@ -499,6 +597,44 @@ A response body like the following is returned: } ``` +The parameters and response values have the same format as +[version 2](#version-2-new-version) of the API. + +## Version 2 (new version) + +**Note**: This API is new, experimental and "subject to change". + +This version works asynchronously, meaning you get the response from server immediately +while the server works on that task in background. You can then request the status of the action +to check if it has completed. + +The API is: + +``` +DELETE /_synapse/admin/v2/rooms/ +``` + +with a body of: + +```json +{ + "new_room_user_id": "@someuser:example.com", + "room_name": "Content Violation Notification", + "message": "Bad Room has been shutdown due to content violations on this server. Please review our Terms of Service.", + "block": true, + "purge": true +} +``` + +The API starts the shut down and purge running, and returns immediately with a JSON body with +a purge id: + +```json +{ + "delete_id": "" +} +``` + **Parameters** The following parameters should be set in the URL: @@ -518,8 +654,10 @@ The following JSON body parameters are available: `new_room_user_id` in the new room. Ideally this will clearly convey why the original room was shut down. Defaults to `Sharing illegal content on this server is not permitted and rooms in violation will be blocked.` -* `block` - Optional. If set to `true`, this room will be added to a blocking list, preventing - future attempts to join the room. Defaults to `false`. +* `block` - Optional. If set to `true`, this room will be added to a blocking list, + preventing future attempts to join the room. Rooms can be blocked + even if they're not yet known to the homeserver (only with + [Version 1](#version-1-old-version) of the API). Defaults to `false`. * `purge` - Optional. If set to `true`, it will remove all traces of the room from your database. Defaults to `true`. * `force_purge` - Optional, and ignored unless `purge` is `true`. If set to `true`, it @@ -529,53 +667,163 @@ The following JSON body parameters are available: The JSON body must not be empty. The body must be at least `{}`. -**Response** +## Status of deleting rooms -The following fields are returned in the JSON response body: +**Note**: This API is new, experimental and "subject to change". + +It is possible to query the status of the background task for deleting rooms. +The status can be queried up to 24 hours after completion of the task, +or until Synapse is restarted (whichever happens first). + +### Query by `room_id` + +With this API you can get the status of all active deletion tasks, and all those completed in the last 24h, +for the given `room_id`. + +The API is: + +``` +GET /_synapse/admin/v2/rooms//delete_status +``` + +A response body like the following is returned: + +```json +{ + "results": [ + { + "delete_id": "delete_id1", + "status": "failed", + "error": "error message", + "shutdown_room": { + "kicked_users": [], + "failed_to_kick_users": [], + "local_aliases": [], + "new_room_id": null + } + }, { + "delete_id": "delete_id2", + "status": "purging", + "shutdown_room": { + "kicked_users": [ + "@foobar:example.com" + ], + "failed_to_kick_users": [], + "local_aliases": [ + "#badroom:example.com", + "#evilsaloon:example.com" + ], + "new_room_id": "!newroomid:example.com" + } + } + ] +} +``` + +**Parameters** + +The following parameters should be set in the URL: + +* `room_id` - The ID of the room. + +### Query by `delete_id` + +With this API you can get the status of one specific task by `delete_id`. + +The API is: + +``` +GET /_synapse/admin/v2/rooms/delete_status/ +``` + +A response body like the following is returned: + +```json +{ + "status": "purging", + "shutdown_room": { + "kicked_users": [ + "@foobar:example.com" + ], + "failed_to_kick_users": [], + "local_aliases": [ + "#badroom:example.com", + "#evilsaloon:example.com" + ], + "new_room_id": "!newroomid:example.com" + } +} +``` -* `kicked_users` - An array of users (`user_id`) that were kicked. -* `failed_to_kick_users` - An array of users (`user_id`) that that were not kicked. -* `local_aliases` - An array of strings representing the local aliases that were migrated from - the old room to the new. -* `new_room_id` - A string representing the room ID of the new room. +**Parameters** +The following parameters should be set in the URL: -## Undoing room shutdowns +* `delete_id` - The ID for this delete. -*Note*: This guide may be outdated by the time you read it. By nature of room shutdowns being performed at the database level, +### Response + +The following fields are returned in the JSON response body: + +- `results` - An array of objects, each containing information about one task. + This field is omitted from the result when you query by `delete_id`. + Task objects contain the following fields: + - `delete_id` - The ID for this purge if you query by `room_id`. + - `status` - The status will be one of: + - `shutting_down` - The process is removing users from the room. + - `purging` - The process is purging the room and event data from database. + - `complete` - The process has completed successfully. + - `failed` - The process is aborted, an error has occurred. + - `error` - A string that shows an error message if `status` is `failed`. + Otherwise this field is hidden. + - `shutdown_room` - An object containing information about the result of shutting down the room. + *Note:* The result is shown after removing the room members. + The delete process can still be running. Please pay attention to the `status`. + - `kicked_users` - An array of users (`user_id`) that were kicked. + - `failed_to_kick_users` - An array of users (`user_id`) that that were not kicked. + - `local_aliases` - An array of strings representing the local aliases that were + migrated from the old room to the new. + - `new_room_id` - A string representing the room ID of the new room, or `null` if + no such room was created. + +## Undoing room deletions + +*Note*: This guide may be outdated by the time you read it. By nature of room deletions being performed at the database level, the structure can and does change without notice. -First, it's important to understand that a room shutdown is very destructive. Undoing a shutdown is not as simple as pretending it +First, it's important to understand that a room deletion is very destructive. Undoing a deletion is not as simple as pretending it never happened - work has to be done to move forward instead of resetting the past. In fact, in some cases it might not be possible to recover at all: * If the room was invite-only, your users will need to be re-invited. * If the room no longer has any members at all, it'll be impossible to rejoin. -* The first user to rejoin will have to do so via an alias on a different server. +* The first user to rejoin will have to do so via an alias on a different + server (or receive an invite from a user on a different server). With all that being said, if you still want to try and recover the room: -1. For safety reasons, shut down Synapse. -2. In the database, run `DELETE FROM blocked_rooms WHERE room_id = '!example:example.org';` - * For caution: it's recommended to run this in a transaction: `BEGIN; DELETE ...;`, verify you got 1 result, then `COMMIT;`. - * The room ID is the same one supplied to the shutdown room API, not the Content Violation room. -3. Restart Synapse. - -You will have to manually handle, if you so choose, the following: +1. If the room was `block`ed, you must unblock it on your server. This can be + accomplished as follows: -* Aliases that would have been redirected to the Content Violation room. -* Users that would have been booted from the room (and will have been force-joined to the Content Violation room). -* Removal of the Content Violation room if desired. + 1. For safety reasons, shut down Synapse. + 2. In the database, run `DELETE FROM blocked_rooms WHERE room_id = '!example:example.org';` + * For caution: it's recommended to run this in a transaction: `BEGIN; DELETE ...;`, verify you got 1 result, then `COMMIT;`. + * The room ID is the same one supplied to the delete room API, not the Content Violation room. + 3. Restart Synapse. -## Deprecated endpoint + This step is unnecessary if `block` was not set. -The previous deprecated API will be removed in a future release, it was: +2. Any room aliases on your server that pointed to the deleted room may have + been deleted, or redirected to the Content Violation room. These will need + to be restored manually. -``` -POST /_synapse/admin/v1/rooms//delete -``` +3. Users on your server that were in the deleted room will have been kicked + from the room. Consider whether you want to update their membership + (possibly via the [Edit Room Membership API](room_membership.md)) or let + them handle rejoining themselves. -It behaves the same way than the current endpoint except the path and the method. +4. If `new_room_user_id` was given, a 'Content Violation' will have been + created. Consider whether you want to delete that roomm. # Make Room Admin API @@ -595,7 +843,7 @@ POST /_synapse/admin/v1/rooms//make_room_admin # Forward Extremities Admin API Enables querying and deleting forward extremities from rooms. When a lot of forward -extremities accumulate in a room, performance can become degraded. For details, see +extremities accumulate in a room, performance can become degraded. For details, see [#1760](https://github.com/matrix-org/synapse/issues/1760). ## Check for forward extremities @@ -624,7 +872,7 @@ A response as follows will be returned: ## Deleting forward extremities -**WARNING**: Please ensure you know what you're doing and have read +**WARNING**: Please ensure you know what you're doing and have read the related issue [#1760](https://github.com/matrix-org/synapse/issues/1760). Under no situations should this API be executed as an automated maintenance task! diff --git a/docs/admin_api/shutdown_room.md b/docs/admin_api/shutdown_room.md deleted file mode 100644 index 856a629487f1..000000000000 --- a/docs/admin_api/shutdown_room.md +++ /dev/null @@ -1,102 +0,0 @@ -# Deprecated: Shutdown room API - -**The old Shutdown room API is deprecated and will be removed in a future release. -See the new [Delete Room API](rooms.md#delete-room-api) for more details.** - -Shuts down a room, preventing new joins and moves local users and room aliases automatically -to a new room. The new room will be created with the user specified by the -`new_room_user_id` parameter as room administrator and will contain a message -explaining what happened. Users invited to the new room will have power level --10 by default, and thus be unable to speak. The old room's power levels will be changed to -disallow any further invites or joins. - -The local server will only have the power to move local user and room aliases to -the new room. Users on other servers will be unaffected. - -## API - -You will need to authenticate with an access token for an admin user. - -### URL - -`POST /_synapse/admin/v1/shutdown_room/{room_id}` - -### URL Parameters - -* `room_id` - The ID of the room (e.g `!someroom:example.com`) - -### JSON Body Parameters - -* `new_room_user_id` - Required. A string representing the user ID of the user that will admin - the new room that all users in the old room will be moved to. -* `room_name` - Optional. A string representing the name of the room that new users will be - invited to. -* `message` - Optional. A string containing the first message that will be sent as - `new_room_user_id` in the new room. Ideally this will clearly convey why the - original room was shut down. - -If not specified, the default value of `room_name` is "Content Violation -Notification". The default value of `message` is "Sharing illegal content on -othis server is not permitted and rooms in violation will be blocked." - -### Response Parameters - -* `kicked_users` - An integer number representing the number of users that - were kicked. -* `failed_to_kick_users` - An integer number representing the number of users - that were not kicked. -* `local_aliases` - An array of strings representing the local aliases that were migrated from - the old room to the new. -* `new_room_id` - A string representing the room ID of the new room. - -## Example - -Request: - -``` -POST /_synapse/admin/v1/shutdown_room/!somebadroom%3Aexample.com - -{ - "new_room_user_id": "@someuser:example.com", - "room_name": "Content Violation Notification", - "message": "Bad Room has been shutdown due to content violations on this server. Please review our Terms of Service." -} -``` - -Response: - -``` -{ - "kicked_users": 5, - "failed_to_kick_users": 0, - "local_aliases": ["#badroom:example.com", "#evilsaloon:example.com], - "new_room_id": "!newroomid:example.com", -}, -``` - -## Undoing room shutdowns - -*Note*: This guide may be outdated by the time you read it. By nature of room shutdowns being performed at the database level, -the structure can and does change without notice. - -First, it's important to understand that a room shutdown is very destructive. Undoing a shutdown is not as simple as pretending it -never happened - work has to be done to move forward instead of resetting the past. In fact, in some cases it might not be possible -to recover at all: - -* If the room was invite-only, your users will need to be re-invited. -* If the room no longer has any members at all, it'll be impossible to rejoin. -* The first user to rejoin will have to do so via an alias on a different server. - -With all that being said, if you still want to try and recover the room: - -1. For safety reasons, shut down Synapse. -2. In the database, run `DELETE FROM blocked_rooms WHERE room_id = '!example:example.org';` - * For caution: it's recommended to run this in a transaction: `BEGIN; DELETE ...;`, verify you got 1 result, then `COMMIT;`. - * The room ID is the same one supplied to the shutdown room API, not the Content Violation room. -3. Restart Synapse. - -You will have to manually handle, if you so choose, the following: - -* Aliases that would have been redirected to the Content Violation room. -* Users that would have been booted from the room (and will have been force-joined to the Content Violation room). -* Removal of the Content Violation room if desired. diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md index 160899754ede..ba574d795fcc 100644 --- a/docs/admin_api/user_admin_api.md +++ b/docs/admin_api/user_admin_api.md @@ -21,11 +21,15 @@ It returns a JSON body like the following: "threepids": [ { "medium": "email", - "address": "" + "address": "", + "added_at": 1586458409743, + "validated_at": 1586458409743 }, { "medium": "email", - "address": "" + "address": "", + "added_at": 1586458409743, + "validated_at": 1586458409743 } ], "avatar_url": "", @@ -46,7 +50,8 @@ It returns a JSON body like the following: "auth_provider": "", "external_id": "" } - ] + ], + "user_type": null } ``` @@ -81,38 +86,60 @@ with a body of: "address": "" } ], + "external_ids": [ + { + "auth_provider": "", + "external_id": "" + }, + { + "auth_provider": "", + "external_id": "" + } + ], "avatar_url": "", "admin": false, - "deactivated": false + "deactivated": false, + "user_type": null } ``` To use it, you will need to authenticate by providing an `access_token` for a server admin: [Admin API](../usage/administration/admin_api) +Returns HTTP status code: +- `201` - When a new user object was created. +- `200` - When a user was modified. + URL parameters: - `user_id`: fully-qualified user id: for example, `@user:server.com`. Body parameters: -- `password`, optional. If provided, the user's password is updated and all +- `password` - string, optional. If provided, the user's password is updated and all devices are logged out. - -- `displayname`, optional, defaults to the value of `user_id`. - -- `threepids`, optional, allows setting the third-party IDs (email, msisdn) +- `displayname` - string, optional, defaults to the value of `user_id`. +- `threepids` - array, optional, allows setting the third-party IDs (email, msisdn) + - `medium` - string. Kind of third-party ID, either `email` or `msisdn`. + - `address` - string. Value of third-party ID. belonging to a user. - -- `avatar_url`, optional, must be a +- `external_ids` - array, optional. Allow setting the identifier of the external identity + provider for SSO (Single sign-on). Details in + [Sample Configuration File](../usage/configuration/homeserver_sample_config.html) + section `sso` and `oidc_providers`. + - `auth_provider` - string. ID of the external identity provider. Value of `idp_id` + in homeserver configuration. + - `external_id` - string, user ID in the external identity provider. +- `avatar_url` - string, optional, must be a [MXC URI](https://matrix.org/docs/spec/client_server/r0.6.0#matrix-content-mxc-uris). - -- `admin`, optional, defaults to `false`. - -- `deactivated`, optional. If unspecified, deactivation state will be left +- `admin` - bool, optional, defaults to `false`. +- `deactivated` - bool, optional. If unspecified, deactivation state will be left unchanged on existing accounts and set to `false` for new accounts. A user cannot be erased by deactivating with this API. For details on deactivating users see [Deactivate Account](#deactivate-account). +- `user_type` - string or null, optional. If provided, the user type will be + adjusted. If `null` given, the user type will be cleared. Other + allowed options are: `bot` and `support`. If the user already exists then optional parameters default to the current value. @@ -319,6 +346,7 @@ The following actions are performed when deactivating an user: - Remove all 3PIDs from the homeserver - Delete all devices and E2EE keys - Delete all access tokens +- Delete all pushers - Delete the password hash - Removal from all rooms the user is a member of - Remove the user from the user directory @@ -332,6 +360,15 @@ is set to `true`: - Remove the user's avatar URL - Mark the user as erased +The following actions are **NOT** performed. The list may be incomplete. + +- Remove mappings of SSO IDs +- [Delete media uploaded](#delete-media-uploaded-by-a-user) by user (included avatar images) +- Delete sent and received messages +- Delete E2E cross-signing keys +- Remove the user's creation (registration) timestamp +- [Remove rate limit overrides](#override-ratelimiting-for-users) +- Remove from monthly active users ## Reset password @@ -443,8 +480,9 @@ The following fields are returned in the JSON response body: - `joined_rooms` - An array of `room_id`. - `total` - Number of rooms. +## User media -## List media of a user +### List media uploaded by a user Gets a list of all local media that a specific `user_id` has created. By default, the response is ordered by descending creation date and ascending media ID. The newest media is on top. You can change the order with parameters @@ -543,7 +581,6 @@ The following fields are returned in the JSON response body: - `media` - An array of objects, each containing information about a media. Media objects contain the following fields: - - `created_ts` - integer - Timestamp when the content was uploaded in ms. - `last_access_ts` - integer - Timestamp when the content was last accessed in ms. - `media_id` - string - The id used to refer to the media. @@ -551,13 +588,58 @@ The following fields are returned in the JSON response body: - `media_type` - string - The MIME-type of the media. - `quarantined_by` - string - The user ID that initiated the quarantine request for this media. - - `safe_from_quarantine` - bool - Status if this media is safe from quarantining. - `upload_name` - string - The name the media was uploaded with. - - `next_token`: integer - Indication for pagination. See above. - `total` - integer - Total number of media. +### Delete media uploaded by a user + +This API deletes the *local* media from the disk of your own server +that a specific `user_id` has created. This includes any local thumbnails. + +This API will not affect media that has been uploaded to external +media repositories (e.g https://github.com/turt2live/matrix-media-repo/). + +By default, the API deletes media ordered by descending creation date and ascending media ID. +The newest media is deleted first. You can change the order with parameters +`order_by` and `dir`. If no `limit` is set the API deletes `100` files per request. + +The API is: + +``` +DELETE /_synapse/admin/v1/users//media +``` + +To use it, you will need to authenticate by providing an `access_token` for a +server admin: [Admin API](../usage/administration/admin_api) + +A response body like the following is returned: + +```json +{ + "deleted_media": [ + "abcdefghijklmnopqrstuvwx" + ], + "total": 1 +} +``` + +The following fields are returned in the JSON response body: + +* `deleted_media`: an array of strings - List of deleted `media_id` +* `total`: integer - Total number of deleted `media_id` + +**Note**: There is no `next_token`. This is not useful for deleting media, because +after deleting media the remaining media have a new order. + +**Parameters** + +This API has the same parameters as +[List media uploaded by a user](#list-media-uploaded-by-a-user). +With the parameters you can for example limit the number of files to delete at once or +delete largest/smallest or newest/oldest files first. + ## Login as a user Get an access token that can be used to authenticate as that user. Useful for @@ -866,7 +948,7 @@ The following fields are returned in the JSON response body: See also the [Client-Server API Spec on pushers](https://matrix.org/docs/spec/client_server/latest#get-matrix-client-r0-pushers). -## Shadow-banning users +## Controlling whether a user is shadow-banned Shadow-banning is a useful tool for moderating malicious or egregiously abusive users. A shadow-banned users receives successful responses to their client-server API requests, @@ -879,16 +961,22 @@ or broken behaviour for the client. A shadow-banned user will not receive any notification and it is generally more appropriate to ban or kick abusive users. A shadow-banned user will be unable to contact anyone on the server. -The API is: +To shadow-ban a user the API is: ``` POST /_synapse/admin/v1/users//shadow_ban ``` +To un-shadow-ban a user the API is: + +``` +DELETE /_synapse/admin/v1/users//shadow_ban +``` + To use it, you will need to authenticate by providing an `access_token` for a server admin: [Admin API](../usage/administration/admin_api) -An empty JSON dict is returned. +An empty JSON dict is returned in both cases. **Parameters** @@ -1013,3 +1101,22 @@ The following parameters should be set in the URL: - `user_id` - The fully qualified MXID: for example, `@user:server.com`. The user must be local. +### Check username availability + +Checks to see if a username is available, and valid, for the server. See [the client-server +API](https://matrix.org/docs/spec/client_server/r0.6.0#get-matrix-client-r0-register-available) +for more information. + +This endpoint will work even if registration is disabled on the server, unlike +`/_matrix/client/r0/register/available`. + +The API is: + +``` +GET /_synapse/admin/v1/username_available?username=$localpart +``` + +The request and response format is the same as the [/_matrix/client/r0/register/available](https://matrix.org/docs/spec/client_server/r0.6.0#get-matrix-client-r0-register-available) API. + +To use it, you will need to authenticate by providing an `access_token` for a +server admin: [Admin API](../usage/administration/admin_api) diff --git a/docs/ancient_architecture_notes.md b/docs/ancient_architecture_notes.md index 3ea8976cc72d..07bb199d7afb 100644 --- a/docs/ancient_architecture_notes.md +++ b/docs/ancient_architecture_notes.md @@ -7,7 +7,7 @@ ## Server to Server Stack -To use the server to server stack, home servers should only need to +To use the server to server stack, homeservers should only need to interact with the Messaging layer. The server to server side of things is designed into 4 distinct layers: @@ -23,7 +23,7 @@ Server with a domain specific API. 1. **Messaging Layer** - This is what the rest of the Home Server hits to send messages, join rooms, + This is what the rest of the homeserver hits to send messages, join rooms, etc. It also allows you to register callbacks for when it get's notified by lower levels that e.g. a new message has been received. @@ -45,7 +45,7 @@ Server with a domain specific API. For incoming PDUs, it has to check the PDUs it references to see if we have missed any. If we have go and ask someone (another - home server) for it. + homeserver) for it. 3. **Transaction Layer** diff --git a/docs/code_style.md b/docs/code_style.md index 28fb7277c41b..4d8e7c973d05 100644 --- a/docs/code_style.md +++ b/docs/code_style.md @@ -10,7 +10,9 @@ The necessary tools are detailed below. First install them with: - pip install -e ".[lint,mypy]" +```sh +pip install -e ".[lint,mypy]" +``` - **black** @@ -21,7 +23,9 @@ First install them with: Have `black` auto-format your code (it shouldn't change any functionality) with: - black . --exclude="\.tox|build|env" + ```sh + black . --exclude="\.tox|build|env" + ``` - **flake8** @@ -30,7 +34,9 @@ First install them with: Check all application and test code with: - flake8 synapse tests + ```sh + flake8 synapse tests + ``` - **isort** @@ -39,7 +45,9 @@ First install them with: Auto-fix imports with: - isort -rc synapse tests + ```sh + isort -rc synapse tests + ``` `-rc` means to recursively search the given directories. @@ -66,15 +74,19 @@ save as it takes a while and is very resource intensive. Example: - from synapse.types import UserID - ... - user_id = UserID(local, server) + ```python + from synapse.types import UserID + ... + user_id = UserID(local, server) + ``` is preferred over: - from synapse import types - ... - user_id = types.UserID(local, server) + ```python + from synapse import types + ... + user_id = types.UserID(local, server) + ``` (or any other variant). @@ -134,28 +146,30 @@ Some guidelines follow: Example: - ## Frobnication ## - - # The frobnicator will ensure that all requests are fully frobnicated. - # To enable it, uncomment the following. - # - #frobnicator_enabled: true - - # By default, the frobnicator will frobnicate with the default frobber. - # The following will make it use an alternative frobber. - # - #frobincator_frobber: special_frobber - - # Settings for the frobber - # - frobber: - # frobbing speed. Defaults to 1. - # - #speed: 10 - - # frobbing distance. Defaults to 1000. - # - #distance: 100 +```yaml +## Frobnication ## + +# The frobnicator will ensure that all requests are fully frobnicated. +# To enable it, uncomment the following. +# +#frobnicator_enabled: true + +# By default, the frobnicator will frobnicate with the default frobber. +# The following will make it use an alternative frobber. +# +#frobincator_frobber: special_frobber + +# Settings for the frobber +# +frobber: + # frobbing speed. Defaults to 1. + # + #speed: 10 + + # frobbing distance. Defaults to 1000. + # + #distance: 100 +``` Note that the sample configuration is generated from the synapse code and is maintained by a script, `scripts-dev/generate_sample_config`. diff --git a/docs/consent_tracking.md b/docs/consent_tracking.md index 911a1f95dbb4..fb1fec80fe00 100644 --- a/docs/consent_tracking.md +++ b/docs/consent_tracking.md @@ -99,7 +99,7 @@ construct URIs where users can give their consent. see if an unauthenticated user is viewing the page. This is typically wrapped around the form that would be used to actually agree to the document: - ``` + ```html {% if not public_version %}
diff --git a/docs/delegate.md b/docs/delegate.md index 05cb6350472b..ee9cbb3b1cfc 100644 --- a/docs/delegate.md +++ b/docs/delegate.md @@ -1,4 +1,8 @@ -# Delegation +# Delegation of incoming federation traffic + +In the following documentation, we use the term `server_name` to refer to that setting +in your homeserver configuration file. It appears at the ends of user ids, and tells +other homeservers where they can find your server. By default, other homeservers will expect to be able to reach yours via your `server_name`, on port 8448. For example, if you set your `server_name` @@ -12,13 +16,21 @@ to a different server and/or port (e.g. `synapse.example.com:443`). ## .well-known delegation -To use this method, you need to be able to alter the -`server_name` 's https server to serve the `/.well-known/matrix/server` -URL. Having an active server (with a valid TLS certificate) serving your -`server_name` domain is out of the scope of this documentation. +To use this method, you need to be able to configure the server at +`https://` to serve a file at +`https:///.well-known/matrix/server`. There are two ways to do this, shown below. + +Note that the `.well-known` file is hosted on the default port for `https` (port 443). + +### External server + +For maximum flexibility, you need to configure an external server such as nginx, Apache +or HAProxy to serve the `https:///.well-known/matrix/server` file. Setting +up such a server is out of the scope of this documentation, but note that it is often +possible to configure your [reverse proxy](reverse_proxy.md) for this. -The URL `https:///.well-known/matrix/server` should -return a JSON structure containing the key `m.server` like so: +The URL `https:///.well-known/matrix/server` should be configured +return a JSON structure containing the key `m.server` like this: ```json { @@ -26,8 +38,9 @@ return a JSON structure containing the key `m.server` like so: } ``` -In our example, this would mean that URL `https://example.com/.well-known/matrix/server` -should return: +In our example (where we want federation traffic to be routed to +`https://synapse.example.com`, on port 443), this would mean that +`https://example.com/.well-known/matrix/server` should return: ```json { @@ -38,16 +51,29 @@ should return: Note, specifying a port is optional. If no port is specified, then it defaults to 8448. -With .well-known delegation, federating servers will check for a valid TLS -certificate for the delegated hostname (in our example: `synapse.example.com`). +### Serving a `.well-known/matrix/server` file with Synapse + +If you are able to set up your domain so that `https://` is routed to +Synapse (i.e., the only change needed is to direct federation traffic to port 443 +instead of port 8448), then it is possible to configure Synapse to serve a suitable +`.well-known/matrix/server` file. To do so, add the following to your `homeserver.yaml` +file: + +```yaml +serve_server_wellknown: true +``` + +**Note**: this *only* works if `https://` is routed to Synapse, so is +generally not suitable if Synapse is hosted at a subdomain such as +`https://synapse.example.com`. ## SRV DNS record delegation -It is also possible to do delegation using a SRV DNS record. However, that is -considered an advanced topic since it's a bit complex to set up, and `.well-known` -delegation is already enough in most cases. +It is also possible to do delegation using a SRV DNS record. However, that is generally +not recommended, as it can be difficult to configure the TLS certificates correctly in +this case, and it offers little advantage over `.well-known` delegation. -However, if you really need it, you can find some documentation on how such a +However, if you really need it, you can find some documentation on what such a record should look like and how Synapse will use it in [the Matrix specification](https://matrix.org/docs/spec/server_server/latest#resolving-server-names). @@ -68,27 +94,9 @@ wouldn't need any delegation set up. domain `server_name` points to, you will need to let other servers know how to find it using delegation. -### Do you still recommend against using a reverse proxy on the federation port? - -We no longer actively recommend against using a reverse proxy. Many admins will -find it easier to direct federation traffic to a reverse proxy and manage their -own TLS certificates, and this is a supported configuration. +### Should I use a reverse proxy for federation traffic? -See [the reverse proxy documentation](reverse_proxy.md) for information on setting up a +Generally, using a reverse proxy for both the federation and client traffic is a good +idea, since it saves handling TLS traffic in Synapse. See +[the reverse proxy documentation](reverse_proxy.md) for information on setting up a reverse proxy. - -### Do I still need to give my TLS certificates to Synapse if I am using a reverse proxy? - -This is no longer necessary. If you are using a reverse proxy for all of your -TLS traffic, then you can set `no_tls: True` in the Synapse config. - -In that case, the only reason Synapse needs the certificate is to populate a legacy -`tls_fingerprints` field in the federation API. This is ignored by Synapse 0.99.0 -and later, and the only time pre-0.99 Synapses will check it is when attempting to -fetch the server keys - and generally this is delegated via `matrix.org`, which -is running a modern version of Synapse. - -### Do I need the same certificate for the client and federation port? - -No. There is nothing stopping you from using different certificates, -particularly if you are using a reverse proxy. \ No newline at end of file diff --git a/docs/deprecation_policy.md b/docs/deprecation_policy.md index 06ea34055951..359dac07c3dc 100644 --- a/docs/deprecation_policy.md +++ b/docs/deprecation_policy.md @@ -14,8 +14,8 @@ i.e. when a version reaches End of Life Synapse will withdraw support for that version in future releases. Details on the upstream support life cycles for Python and PostgreSQL are -documented at https://endoflife.date/python and -https://endoflife.date/postgresql. +documented at [https://endoflife.date/python](https://endoflife.date/python) and +[https://endoflife.date/postgresql](https://endoflife.date/postgresql). Context diff --git a/docs/development/cas.md b/docs/development/cas.md index 592b2d8d4fc0..7c0668e034d9 100644 --- a/docs/development/cas.md +++ b/docs/development/cas.md @@ -8,23 +8,23 @@ easy to run CAS implementation built on top of Django. 1. Create a new virtualenv: `python3 -m venv ` 2. Activate your virtualenv: `source /path/to/your/virtualenv/bin/activate` 3. Install Django and django-mama-cas: - ``` + ```sh python -m pip install "django<3" "django-mama-cas==2.4.0" ``` 4. Create a Django project in the current directory: - ``` + ```sh django-admin startproject cas_test . ``` 5. Follow the [install directions](https://django-mama-cas.readthedocs.io/en/latest/installation.html#configuring) for django-mama-cas 6. Setup the SQLite database: `python manage.py migrate` 7. Create a user: - ``` + ```sh python manage.py createsuperuser ``` 1. Use whatever you want as the username and password. 2. Leave the other fields blank. 8. Use the built-in Django test server to serve the CAS endpoints on port 8000: - ``` + ```sh python manage.py runserver ``` diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md index ddf088712393..abdb8084382b 100644 --- a/docs/development/contributing_guide.md +++ b/docs/development/contributing_guide.md @@ -1,7 +1,473 @@ - # Contributing -{{#include ../../CONTRIBUTING.md}} +This document aims to get you started with contributing to Synapse! + +# 1. Who can contribute to Synapse? + +Everyone is welcome to contribute code to [matrix.org +projects](https://github.com/matrix-org), provided that they are willing to +license their contributions under the same license as the project itself. We +follow a simple 'inbound=outbound' model for contributions: the act of +submitting an 'inbound' contribution means that the contributor agrees to +license the code under the same terms as the project's overall 'outbound' +license - in our case, this is almost always Apache Software License v2 (see +[LICENSE](https://github.com/matrix-org/synapse/blob/develop/LICENSE)). + +# 2. What do I need? + +If you are running Windows, the Windows Subsystem for Linux (WSL) is strongly +recommended for development. More information about WSL can be found at +. Running Synapse natively +on Windows is not officially supported. + +The code of Synapse is written in Python 3. To do pretty much anything, you'll need [a recent version of Python 3](https://wiki.python.org/moin/BeginnersGuide/Download). + +The source code of Synapse is hosted on GitHub. You will also need [a recent version of git](https://github.com/git-guides/install-git). + +For some tests, you will need [a recent version of Docker](https://docs.docker.com/get-docker/). + + +# 3. Get the source. + +The preferred and easiest way to contribute changes is to fork the relevant +project on GitHub, and then [create a pull request]( +https://help.github.com/articles/using-pull-requests/) to ask us to pull your +changes into our repo. + +Please base your changes on the `develop` branch. + +```sh +git clone git@github.com:YOUR_GITHUB_USER_NAME/synapse.git +git checkout develop +``` + +If you need help getting started with git, this is beyond the scope of the document, but you +can find many good git tutorials on the web. + +# 4. Install the dependencies + +Once you have installed Python 3 and added the source, please open a terminal and +setup a *virtualenv*, as follows: + +```sh +cd path/where/you/have/cloned/the/repository +python3 -m venv ./env +source ./env/bin/activate +pip install -e ".[all,dev]" +pip install tox +``` + +This will install the developer dependencies for the project. + + +# 5. Get in touch. + +Join our developer community on Matrix: [#synapse-dev:matrix.org](https://matrix.to/#/#synapse-dev:matrix.org)! + + +# 6. Pick an issue. + +Fix your favorite problem or perhaps find a [Good First Issue](https://github.com/matrix-org/synapse/issues?q=is%3Aopen+is%3Aissue+label%3A%22Good+First+Issue%22) +to work on. + + +# 7. Turn coffee into code and documentation! + +There is a growing amount of documentation located in the +[`docs`](https://github.com/matrix-org/synapse/tree/develop/docs) +directory, with a rendered version [available online](https://matrix-org.github.io/synapse). +This documentation is intended primarily for sysadmins running their +own Synapse instance, as well as developers interacting externally with +Synapse. +[`docs/development`](https://github.com/matrix-org/synapse/tree/develop/docs/development) +exists primarily to house documentation for +Synapse developers. +[`docs/admin_api`](https://github.com/matrix-org/synapse/tree/develop/docs/admin_api) houses documentation +regarding Synapse's Admin API, which is used mostly by sysadmins and external +service developers. + +Synapse's code style is documented [here](../code_style.md). Please follow +it, including the conventions for the [sample configuration +file](../code_style.md#configuration-file-format). + +We welcome improvements and additions to our documentation itself! When +writing new pages, please +[build `docs` to a book](https://github.com/matrix-org/synapse/tree/develop/docs#adding-to-the-documentation) +to check that your contributions render correctly. The docs are written in +[GitHub-Flavoured Markdown](https://guides.github.com/features/mastering-markdown/). + +Some documentation also exists in [Synapse's GitHub +Wiki](https://github.com/matrix-org/synapse/wiki), although this is primarily +contributed to by community authors. + + +# 8. Test, test, test! + + +While you're developing and before submitting a patch, you'll +want to test your code. + +## Run the linters. + +The linters look at your code and do two things: + +- ensure that your code follows the coding style adopted by the project; +- catch a number of errors in your code. + +They're pretty fast, don't hesitate! + +```sh +source ./env/bin/activate +./scripts-dev/lint.sh +``` + +Note that this script *will modify your files* to fix styling errors. +Make sure that you have saved all your files. + +If you wish to restrict the linters to only the files changed since the last commit +(much faster!), you can instead run: + +```sh +source ./env/bin/activate +./scripts-dev/lint.sh -d +``` + +Or if you know exactly which files you wish to lint, you can instead run: + +```sh +source ./env/bin/activate +./scripts-dev/lint.sh path/to/file1.py path/to/file2.py path/to/folder +``` + +## Run the unit tests (Twisted trial). + +The unit tests run parts of Synapse, including your changes, to see if anything +was broken. They are slower than the linters but will typically catch more errors. + +```sh +source ./env/bin/activate +trial tests +``` + +If you wish to only run *some* unit tests, you may specify +another module instead of `tests` - or a test class or a method: + +```sh +source ./env/bin/activate +trial tests.rest.admin.test_room tests.handlers.test_admin.ExfiltrateData.test_invite +``` + +If your tests fail, you may wish to look at the logs (the default log level is `ERROR`): + +```sh +less _trial_temp/test.log +``` + +To increase the log level for the tests, set `SYNAPSE_TEST_LOG_LEVEL`: + +```sh +SYNAPSE_TEST_LOG_LEVEL=DEBUG trial tests +``` + +### Running tests under PostgreSQL + +Invoking `trial` as above will use an in-memory SQLite database. This is great for +quick development and testing. However, we recommend using a PostgreSQL database +in production (and indeed, we have some code paths specific to each database). +This means that we need to run our unit tests against PostgreSQL too. Our CI does +this automatically for pull requests and release candidates, but it's sometimes +useful to reproduce this locally. + +To do so, [configure Postgres](../postgres.md) and run `trial` with the +following environment variables matching your configuration: + +- `SYNAPSE_POSTGRES` to anything nonempty +- `SYNAPSE_POSTGRES_HOST` +- `SYNAPSE_POSTGRES_USER` +- `SYNAPSE_POSTGRES_PASSWORD` + +For example: + +```shell +export SYNAPSE_POSTGRES=1 +export SYNAPSE_POSTGRES_HOST=localhost +export SYNAPSE_POSTGRES_USER=postgres +export SYNAPSE_POSTGRES_PASSWORD=mydevenvpassword +trial +``` + +#### Prebuilt container + +Since configuring PostgreSQL can be fiddly, we can make use of a pre-made +Docker container to set up PostgreSQL and run our tests for us. To do so, run + +```shell +scripts-dev/test_postgresql.sh +``` + +Any extra arguments to the script will be passed to `tox` and then to `trial`, +so we can run a specific test in this container with e.g. + +```shell +scripts-dev/test_postgresql.sh tests.replication.test_sharded_event_persister.EventPersisterShardTestCase +``` + +The container creates a folder in your Synapse checkout called +`.tox-pg-container` and uses this as a tox environment. The output of any +`trial` runs goes into `_trial_temp` in your synapse source directory — the same +as running `trial` directly on your host machine. + +## Run the integration tests ([Sytest](https://github.com/matrix-org/sytest)). + +The integration tests are a more comprehensive suite of tests. They +run a full version of Synapse, including your changes, to check if +anything was broken. They are slower than the unit tests but will +typically catch more errors. + +The following command will let you run the integration test with the most common +configuration: + +```sh +$ docker run --rm -it -v /path/where/you/have/cloned/the/repository\:/src:ro -v /path/to/where/you/want/logs\:/logs matrixdotorg/sytest-synapse:buster +``` + +This configuration should generally cover your needs. For more details about other configurations, see [documentation in the SyTest repo](https://github.com/matrix-org/sytest/blob/develop/docker/README.md). + + +## Run the integration tests ([Complement](https://github.com/matrix-org/complement)). + +[Complement](https://github.com/matrix-org/complement) is a suite of black box tests that can be run on any homeserver implementation. It can also be thought of as end-to-end (e2e) tests. + +It's often nice to develop on Synapse and write Complement tests at the same time. +Here is how to run your local Synapse checkout against your local Complement checkout. + +(checkout [`complement`](https://github.com/matrix-org/complement) alongside your `synapse` checkout) +```sh +COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh +``` + +To run a specific test file, you can pass the test name at the end of the command. The name passed comes from the naming structure in your Complement tests. If you're unsure of the name, you can do a full run and copy it from the test output: + +```sh +COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh TestBackfillingHistory +``` + +To run a specific test, you can specify the whole name structure: + +```sh +COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh TestBackfillingHistory/parallel/Backfilled_historical_events_resolve_with_proper_state_in_correct_order +``` + + +### Access database for homeserver after Complement test runs. + +If you're curious what the database looks like after you run some tests, here are some steps to get you going in Synapse: + +1. In your Complement test comment out `defer deployment.Destroy(t)` and replace with `defer time.Sleep(2 * time.Hour)` to keep the homeserver running after the tests complete +1. Start the Complement tests +1. Find the name of the container, `docker ps -f name=complement_` (this will filter for just the Compelement related Docker containers) +1. Access the container replacing the name with what you found in the previous step: `docker exec -it complement_1_hs_with_application_service.hs1_2 /bin/bash` +1. Install sqlite (database driver), `apt-get update && apt-get install -y sqlite3` +1. Then run `sqlite3` and open the database `.open /conf/homeserver.db` (this db path comes from the Synapse homeserver.yaml) + + +# 9. Submit your patch. + +Once you're happy with your patch, it's time to prepare a Pull Request. + +To prepare a Pull Request, please: + +1. verify that [all the tests pass](#test-test-test), including the coding style; +2. [sign off](#sign-off) your contribution; +3. `git push` your commit to your fork of Synapse; +4. on GitHub, [create the Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request); +5. add a [changelog entry](#changelog) and push it to your Pull Request; +6. for most contributors, that's all - however, if you are a member of the organization `matrix-org`, on GitHub, please request a review from `matrix.org / Synapse Core`. +7. if you need to update your PR, please avoid rebasing and just add new commits to your branch. + + +## Changelog + +All changes, even minor ones, need a corresponding changelog / newsfragment +entry. These are managed by [Towncrier](https://github.com/hawkowl/towncrier). + +To create a changelog entry, make a new file in the `changelog.d` directory named +in the format of `PRnumber.type`. The type can be one of the following: + +* `feature` +* `bugfix` +* `docker` (for updates to the Docker image) +* `doc` (for updates to the documentation) +* `removal` (also used for deprecations) +* `misc` (for internal-only changes) + +This file will become part of our [changelog]( +https://github.com/matrix-org/synapse/blob/master/CHANGES.md) at the next +release, so the content of the file should be a short description of your +change in the same style as the rest of the changelog. The file can contain Markdown +formatting, and should end with a full stop (.) or an exclamation mark (!) for +consistency. + +Adding credits to the changelog is encouraged, we value your +contributions and would like to have you shouted out in the release notes! + +For example, a fix in PR #1234 would have its changelog entry in +`changelog.d/1234.bugfix`, and contain content like: + +> The security levels of Florbs are now validated when received +> via the `/federation/florb` endpoint. Contributed by Jane Matrix. + +If there are multiple pull requests involved in a single bugfix/feature/etc, +then the content for each `changelog.d` file should be the same. Towncrier will +merge the matching files together into a single changelog entry when we come to +release. + +### How do I know what to call the changelog file before I create the PR? + +Obviously, you don't know if you should call your newsfile +`1234.bugfix` or `5678.bugfix` until you create the PR, which leads to a +chicken-and-egg problem. + +There are two options for solving this: + +1. Open the PR without a changelog file, see what number you got, and *then* + add the changelog file to your branch (see [Updating your pull + request](#updating-your-pull-request)), or: + +1. Look at the [list of all + issues/PRs](https://github.com/matrix-org/synapse/issues?q=), add one to the + highest number you see, and quickly open the PR before somebody else claims + your number. + + [This + script](https://github.com/richvdh/scripts/blob/master/next_github_number.sh) + might be helpful if you find yourself doing this a lot. + +Sorry, we know it's a bit fiddly, but it's *really* helpful for us when we come +to put together a release! + +### Debian changelog + +Changes which affect the debian packaging files (in `debian`) are an +exception to the rule that all changes require a `changelog.d` file. + +In this case, you will need to add an entry to the debian changelog for the +next release. For this, run the following command: + +``` +dch +``` + +This will make up a new version number (if there isn't already an unreleased +version in flight), and open an editor where you can add a new changelog entry. +(Our release process will ensure that the version number and maintainer name is +corrected for the release.) + +If your change affects both the debian packaging *and* files outside the debian +directory, you will need both a regular newsfragment *and* an entry in the +debian changelog. (Though typically such changes should be submitted as two +separate pull requests.) + +## Sign off + +In order to have a concrete record that your contribution is intentional +and you agree to license it under the same terms as the project's license, we've adopted the +same lightweight approach that the Linux Kernel +[submitting patches process]( +https://www.kernel.org/doc/html/latest/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin>), +[Docker](https://github.com/docker/docker/blob/master/CONTRIBUTING.md), and many other +projects use: the DCO (Developer Certificate of Origin: +http://developercertificate.org/). This is a simple declaration that you wrote +the contribution or otherwise have the right to contribute it to Matrix: + +``` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +660 York Street, Suite 102, +San Francisco, CA 94110 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +If you agree to this for your contribution, then all that's needed is to +include the line in your commit or pull request comment: + +``` +Signed-off-by: Your Name +``` + +We accept contributions under a legally identifiable name, such as +your name on government documentation or common-law names (names +claimed by legitimate usage or repute). Unfortunately, we cannot +accept anonymous contributions at this time. + +Git allows you to add this signoff automatically when using the `-s` +flag to `git commit`, which uses the name and email set in your +`user.name` and `user.email` git configs. + + +# 10. Turn feedback into better code. + +Once the Pull Request is opened, you will see a few things: + +1. our automated CI (Continuous Integration) pipeline will run (again) the linters, the unit tests, the integration tests and more; +2. one or more of the developers will take a look at your Pull Request and offer feedback. + +From this point, you should: + +1. Look at the results of the CI pipeline. + - If there is any error, fix the error. +2. If a developer has requested changes, make these changes and let us know if it is ready for a developer to review again. +3. Create a new commit with the changes. + - Please do NOT overwrite the history. New commits make the reviewer's life easier. + - Push this commits to your Pull Request. +4. Back to 1. + +Once both the CI and the developers are happy, the patch will be merged into Synapse and released shortly! + +# 11. Find a new issue. + +By now, you know the drill! + +# Notes for maintainers on merging PRs etc + +There are some notes for those with commit access to the project on how we +manage git [here](git.md). + +# Conclusion + +That's it! Matrix is a very open and collaborative project as you might expect +given our obsession with open communication. If we're going to successfully +matrix together all the fragmented communication technologies out there we are +reliant on contributions and collaboration from the community to do so. So +please get involved - and we hope you have as much fun hacking on Matrix as we +do! diff --git a/docs/development/database_schema.md b/docs/development/database_schema.md index 20740cf5acd9..256a62921059 100644 --- a/docs/development/database_schema.md +++ b/docs/development/database_schema.md @@ -89,7 +89,9 @@ To do so, use `scripts-dev/make_full_schema.sh`. This will produce new Ensure postgres is installed, then run: - ./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/ +```sh +./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/ +``` NB at the time of writing, this script predates the split into separate `state`/`main` databases so will require updates to handle that correctly. diff --git a/docs/development/experimental_features.md b/docs/development/experimental_features.md new file mode 100644 index 000000000000..d6b11496cc4d --- /dev/null +++ b/docs/development/experimental_features.md @@ -0,0 +1,37 @@ +# Implementing experimental features in Synapse + +It can be desirable to implement "experimental" features which are disabled by +default and must be explicitly enabled via the Synapse configuration. This is +applicable for features which: + +* Are unstable in the Matrix spec (e.g. those defined by an MSC that has not yet been merged). +* Developers are not confident in their use by general Synapse administrators/users + (e.g. a feature is incomplete, buggy, performs poorly, or needs further testing). + +Note that this only really applies to features which are expected to be desirable +to a broad audience. The [module infrastructure](../modules/index.md) should +instead be investigated for non-standard features. + +Guarding experimental features behind configuration flags should help with some +of the following scenarios: + +* Ensure that clients do not assume that unstable features exist (failing + gracefully if they do not). +* Unstable features do not become de-facto standards and can be removed + aggressively (since only those who have opted-in will be affected). +* Ease finding the implementation of unstable features in Synapse (for future + removal or stabilization). +* Ease testing a feature (or removal of feature) due to enabling/disabling without + code changes. It also becomes possible to ask for wider testing, if desired. + +Experimental configuration flags should be disabled by default (requiring Synapse +administrators to explicitly opt-in), although there are situations where it makes +sense (from a product point-of-view) to enable features by default. This is +expected and not an issue. + +It is not a requirement for experimental features to be behind a configuration flag, +but one should be used if unsure. + +New experimental configuration flags should be added under the `experimental` +configuration key (see the `synapse.config.experimental` file) and either explain +(briefly) what is being enabled, or include the MSC number. diff --git a/docs/development/room-dag-concepts.md b/docs/development/room-dag-concepts.md index 5eed72bec662..cbc7cf29491c 100644 --- a/docs/development/room-dag-concepts.md +++ b/docs/development/room-dag-concepts.md @@ -38,16 +38,15 @@ Most-recent-in-time events in the DAG which are not referenced by any other even The forward extremities of a room are used as the `prev_events` when the next event is sent. -## Backwards extremity +## Backward extremity The current marker of where we have backfilled up to and will generally be the -oldest-in-time events we know of in the DAG. +`prev_events` of the oldest-in-time events we have in the DAG. This gives a starting point when +backfilling history. -This is an event where we haven't fetched all of the `prev_events` for. - -Once we have fetched all of its `prev_events`, it's unmarked as a backwards -extremity (although we may have formed new backwards extremities from the prev -events during the backfilling process). +When we persist a non-outlier event, we clear it as a backward extremity and set +all of its `prev_events` as the new backward extremities if they aren't already +persisted in the `events` table. ## Outliers @@ -56,8 +55,7 @@ We mark an event as an `outlier` when we haven't figured out the state for the room at that point in the DAG yet. We won't *necessarily* have the `prev_events` of an `outlier` in the database, -but it's entirely possible that we *might*. The status of whether we have all of -the `prev_events` is marked as a [backwards extremity](#backwards-extremity). +but it's entirely possible that we *might*. For example, when we fetch the event auth chain or state for a given event, we mark all of those claimed auth events as outliers because we haven't done the diff --git a/docs/development/saml.md b/docs/development/saml.md index a9bfd2dc05d6..b08bcb741900 100644 --- a/docs/development/saml.md +++ b/docs/development/saml.md @@ -1,10 +1,9 @@ # How to test SAML as a developer without a server -https://capriza.github.io/samling/samling.html (https://github.com/capriza/samling) is a great -resource for being able to tinker with the SAML options within Synapse without needing to -deploy and configure a complicated software stack. +https://fujifish.github.io/samling/samling.html (https://github.com/fujifish/samling) is a great resource for being able to tinker with the +SAML options within Synapse without needing to deploy and configure a complicated software stack. -To make Synapse (and therefore Riot) use it: +To make Synapse (and therefore Element) use it: 1. Use the samling.html URL above or deploy your own and visit the IdP Metadata tab. 2. Copy the XML to your clipboard. @@ -16,7 +15,7 @@ To make Synapse (and therefore Riot) use it: sp_config: allow_unknown_attributes: true # Works around a bug with AVA Hashes: https://github.com/IdentityPython/pysaml2/issues/388 metadata: - local: ["samling.xml"] + local: ["samling.xml"] ``` 5. Ensure that your `homeserver.yaml` has a setting for `public_baseurl`: ```yaml @@ -26,9 +25,9 @@ To make Synapse (and therefore Riot) use it: the dependencies are installed and ready to go. 7. Restart Synapse. -Then in Riot: +Then in Element: -1. Visit the login page with a Riot pointing at your homeserver. +1. Visit the login page and point Element towards your homeserver using the `public_baseurl` above. 2. Click the Single Sign-On button. 3. On the samling page, enter a Name Identifier and add a SAML Attribute for `uid=your_localpart`. The response must also be signed. diff --git a/docs/development/url_previews.md b/docs/development/url_previews.md new file mode 100644 index 000000000000..aff38136091d --- /dev/null +++ b/docs/development/url_previews.md @@ -0,0 +1,56 @@ +URL Previews +============ + +The `GET /_matrix/media/r0/preview_url` endpoint provides a generic preview API +for URLs which outputs [Open Graph](https://ogp.me/) responses (with some Matrix +specific additions). + +This does have trade-offs compared to other designs: + +* Pros: + * Simple and flexible; can be used by any clients at any point +* Cons: + * If each homeserver provides one of these independently, all the HSes in a + room may needlessly DoS the target URI + * The URL metadata must be stored somewhere, rather than just using Matrix + itself to store the media. + * Matrix cannot be used to distribute the metadata between homeservers. + +When Synapse is asked to preview a URL it does the following: + +1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the + config). +2. Checks the in-memory cache by URLs and returns the result if it exists. (This + is also used to de-duplicate processing of multiple in-flight requests at once.) +3. Kicks off a background process to generate a preview: + 1. Checks the database cache by URL and timestamp and returns the result if it + has not expired and was successful (a 2xx return code). + 2. Checks if the URL matches an [oEmbed](https://oembed.com/) pattern. If it + does, update the URL to download. + 3. Downloads the URL and stores it into a file via the media storage provider + and saves the local media metadata. + 4. If the media is an image: + 1. Generates thumbnails. + 2. Generates an Open Graph response based on image properties. + 5. If the media is HTML: + 1. Decodes the HTML via the stored file. + 2. Generates an Open Graph response from the HTML. + 3. If an image exists in the Open Graph response: + 1. Downloads the URL and stores it into a file via the media storage + provider and saves the local media metadata. + 2. Generates thumbnails. + 3. Updates the Open Graph response based on image properties. + 6. If the media is JSON and an oEmbed URL was found: + 1. Convert the oEmbed response to an Open Graph response. + 2. If a thumbnail or image is in the oEmbed response: + 1. Downloads the URL and stores it into a file via the media storage + provider and saves the local media metadata. + 2. Generates thumbnails. + 3. Updates the Open Graph response based on image properties. + 7. Stores the result in the database cache. +4. Returns the result. + +The in-memory cache expires after 1 hour. + +Expired entries in the database cache (and their associated media files) are +deleted every 10 seconds. The default expiration time is 1 hour from download. diff --git a/docs/jwt.md b/docs/jwt.md index 5be9fd26e331..32f58cc0cbbf 100644 --- a/docs/jwt.md +++ b/docs/jwt.md @@ -22,8 +22,9 @@ will be removed in a future version of Synapse. The `token` field should include the JSON web token with the following claims: -* The `sub` (subject) claim is required and should encode the local part of the - user ID. +* A claim that encodes the local part of the user ID is required. By default, + the `sub` (subject) claim is used, or a custom claim can be set in the + configuration file. * The expiration time (`exp`), not before time (`nbf`), and issued at (`iat`) claims are optional, but validated if present. * The issuer (`iss`) claim is optional, but required and validated if configured. diff --git a/docs/log_contexts.md b/docs/log_contexts.md index d49dce8830a1..cb15dbe158c0 100644 --- a/docs/log_contexts.md +++ b/docs/log_contexts.md @@ -10,7 +10,7 @@ Logcontexts are also used for CPU and database accounting, so that we can track which requests were responsible for high CPU use or database activity. -The `synapse.logging.context` module provides a facilities for managing +The `synapse.logging.context` module provides facilities for managing the current log context (as well as providing the `LoggingContextFilter` class). @@ -351,7 +351,7 @@ and the awaitable chain is now orphaned, and will be garbage-collected at some point. Note that `await_something_interesting` is a coroutine, which Python implements as a generator function. When Python garbage-collects generator functions, it gives them a chance to -clean up by making the `async` (or `yield`) raise a `GeneratorExit` +clean up by making the `await` (or `yield`) raise a `GeneratorExit` exception. In our case, that means that the `__exit__` handler of `PreserveLoggingContext` will carefully restore the request context, but there is now nothing waiting for its return, so the request context is diff --git a/docs/manhole.md b/docs/manhole.md index 37d1d7823c00..715ed840f2dd 100644 --- a/docs/manhole.md +++ b/docs/manhole.md @@ -11,7 +11,7 @@ Note that this will give administrative access to synapse to **all users** with shell access to the server. It should therefore **not** be enabled in environments where untrusted users have shell access. -*** +## Configuring the manhole To enable it, first uncomment the `manhole` listener configuration in `homeserver.yaml`. The configuration is slightly different if you're using docker. @@ -52,22 +52,43 @@ listeners: type: manhole ``` -#### Accessing synapse manhole +### Security settings + +The following config options are available: + +- `username` - The username for the manhole (defaults to `matrix`) +- `password` - The password for the manhole (defaults to `rabbithole`) +- `ssh_priv_key` - The path to a private SSH key (defaults to a hardcoded value) +- `ssh_pub_key` - The path to a public SSH key (defaults to a hardcoded value) + +For example: + +```yaml +manhole_settings: + username: manhole + password: mypassword + ssh_priv_key: "/home/synapse/manhole_keys/id_rsa" + ssh_pub_key: "/home/synapse/manhole_keys/id_rsa.pub" +``` + + +## Accessing synapse manhole Then restart synapse, and point an ssh client at port 9000 on localhost, using -the username `matrix`: +the username and password configured in `homeserver.yaml` - with the default +configuration, this would be: ```bash ssh -p9000 matrix@localhost ``` -The password is `rabbithole`. +Then enter the password when prompted (the default is `rabbithole`). This gives a Python REPL in which `hs` gives access to the `synapse.server.HomeServer` object - which in turn gives access to many other parts of the process. -Note that any call which returns a coroutine will need to be wrapped in `ensureDeferred`. +Note that, prior to Synapse 1.41, any call which returns a coroutine will need to be wrapped in `ensureDeferred`. As a simple example, retrieving an event from the database: diff --git a/docs/media_repository.md b/docs/media_repository.md index 1bf8f16f557b..ba17f8a856f1 100644 --- a/docs/media_repository.md +++ b/docs/media_repository.md @@ -2,29 +2,80 @@ *Synapse implementation-specific details for the media repository* -The media repository is where attachments and avatar photos are stored. -It stores attachment content and thumbnails for media uploaded by local users. -It caches attachment content and thumbnails for media uploaded by remote users. +The media repository + * stores avatars, attachments and their thumbnails for media uploaded by local + users. + * caches avatars, attachments and their thumbnails for media uploaded by remote + users. + * caches resources and thumbnails used for + [URL previews](development/url_previews.md). -## Storage +All media in Matrix can be identified by a unique +[MXC URI](https://spec.matrix.org/latest/client-server-api/#matrix-content-mxc-uris), +consisting of a server name and media ID: +``` +mxc:/// +``` -Each item of media is assigned a `media_id` when it is uploaded. -The `media_id` is a randomly chosen, URL safe 24 character string. +## Local Media +Synapse generates 24 character media IDs for content uploaded by local users. +These media IDs consist of upper and lowercase letters and are case-sensitive. +Other homeserver implementations may generate media IDs differently. -Metadata such as the MIME type, upload time and length are stored in the -sqlite3 database indexed by `media_id`. +Local media is recorded in the `local_media_repository` table, which includes +metadata such as MIME types, upload times and file sizes. +Note that this table is shared by the URL cache, which has a different media ID +scheme. -Content is stored on the filesystem under a `"local_content"` directory. +### Paths +A file with media ID `aabbcccccccccccccccccccc` and its `128x96` `image/jpeg` +thumbnail, created by scaling, would be stored at: +``` +local_content/aa/bb/cccccccccccccccccccc +local_thumbnails/aa/bb/cccccccccccccccccccc/128-96-image-jpeg-scale +``` -Thumbnails are stored under a `"local_thumbnails"` directory. +## Remote Media +When media from a remote homeserver is requested from Synapse, it is assigned +a local `filesystem_id`, with the same format as locally-generated media IDs, +as described above. -The item with `media_id` `"aabbccccccccdddddddddddd"` is stored under -`"local_content/aa/bb/ccccccccdddddddddddd"`. Its thumbnail with width -`128` and height `96` and type `"image/jpeg"` is stored under -`"local_thumbnails/aa/bb/ccccccccdddddddddddd/128-96-image-jpeg"` +A record of remote media is stored in the `remote_media_cache` table, which +can be used to map remote MXC URIs (server names and media IDs) to local +`filesystem_id`s. -Remote content is cached under `"remote_content"` directory. Each item of -remote content is assigned a local `"filesystem_id"` to ensure that the -directory structure `"remote_content/server_name/aa/bb/ccccccccdddddddddddd"` -is appropriate. Thumbnails for remote content are stored under -`"remote_thumbnails/server_name/..."` +### Paths +A file from `matrix.org` with `filesystem_id` `aabbcccccccccccccccccccc` and its +`128x96` `image/jpeg` thumbnail, created by scaling, would be stored at: +``` +remote_content/matrix.org/aa/bb/cccccccccccccccccccc +remote_thumbnail/matrix.org/aa/bb/cccccccccccccccccccc/128-96-image-jpeg-scale +``` +Older thumbnails may omit the thumbnailing method: +``` +remote_thumbnail/matrix.org/aa/bb/cccccccccccccccccccc/128-96-image-jpeg +``` + +Note that `remote_thumbnail/` does not have an `s`. + +## URL Previews +See [URL Previews](development/url_previews.md) for documentation on the URL preview +process. + +When generating previews for URLs, Synapse may download and cache various +resources, including images. These resources are assigned temporary media IDs +of the form `yyyy-mm-dd_aaaaaaaaaaaaaaaa`, where `yyyy-mm-dd` is the current +date and `aaaaaaaaaaaaaaaa` is a random sequence of 16 case-sensitive letters. + +The metadata for these cached resources is stored in the +`local_media_repository` and `local_media_repository_url_cache` tables. + +Resources for URL previews are deleted after a few days. + +### Paths +The file with media ID `yyyy-mm-dd_aaaaaaaaaaaaaaaa` and its `128x96` +`image/jpeg` thumbnail, created by scaling, would be stored at: +``` +url_cache/yyyy-mm-dd/aaaaaaaaaaaaaaaa +url_cache_thumbnails/yyyy-mm-dd/aaaaaaaaaaaaaaaa/128-96-image-jpeg-scale +``` diff --git a/docs/message_retention_policies.md b/docs/message_retention_policies.md index ea3d46cc10f3..9214d6d7e98b 100644 --- a/docs/message_retention_policies.md +++ b/docs/message_retention_policies.md @@ -69,9 +69,9 @@ A default policy can be defined as such, in the `retention` section of the configuration file: ```yaml - default_policy: - min_lifetime: 1d - max_lifetime: 1y +default_policy: + min_lifetime: 1d + max_lifetime: 1y ``` Here, `min_lifetime` and `max_lifetime` have the same meaning and level @@ -95,14 +95,14 @@ depending on an event's room's policy. This can be done by setting the file. An example of such configuration could be: ```yaml - purge_jobs: - - longest_max_lifetime: 3d - interval: 12h - - shortest_max_lifetime: 3d - longest_max_lifetime: 1w - interval: 1d - - shortest_max_lifetime: 1w - interval: 2d +purge_jobs: + - longest_max_lifetime: 3d + interval: 12h + - shortest_max_lifetime: 3d + longest_max_lifetime: 1w + interval: 1d + - shortest_max_lifetime: 1w + interval: 2d ``` In this example, we define three jobs: @@ -141,8 +141,8 @@ purging old events in a room. These limits can be defined as such in the `retention` section of the configuration file: ```yaml - allowed_lifetime_min: 1d - allowed_lifetime_max: 1y +allowed_lifetime_min: 1d +allowed_lifetime_max: 1y ``` The limits are considered when running purge jobs. If necessary, the diff --git a/docs/modules.md b/docs/modules.md deleted file mode 100644 index 9a430390a4cb..000000000000 --- a/docs/modules.md +++ /dev/null @@ -1,353 +0,0 @@ -# Modules - -Synapse supports extending its functionality by configuring external modules. - -## Using modules - -To use a module on Synapse, add it to the `modules` section of the configuration file: - -```yaml -modules: - - module: my_super_module.MySuperClass - config: - do_thing: true - - module: my_other_super_module.SomeClass - config: {} -``` - -Each module is defined by a path to a Python class as well as a configuration. This -information for a given module should be available in the module's own documentation. - -**Note**: When using third-party modules, you effectively allow someone else to run -custom code on your Synapse homeserver. Server admins are encouraged to verify the -provenance of the modules they use on their homeserver and make sure the modules aren't -running malicious code on their instance. - -Also note that we are currently in the process of migrating module interfaces to this -system. While some interfaces might be compatible with it, others still require -configuring modules in another part of Synapse's configuration file. Currently, only the -spam checker interface is compatible with this new system. - -## Writing a module - -A module is a Python class that uses Synapse's module API to interact with the -homeserver. It can register callbacks that Synapse will call on specific operations, as -well as web resources to attach to Synapse's web server. - -When instantiated, a module is given its parsed configuration as well as an instance of -the `synapse.module_api.ModuleApi` class. The configuration is a dictionary, and is -either the output of the module's `parse_config` static method (see below), or the -configuration associated with the module in Synapse's configuration file. - -See the documentation for the `ModuleApi` class -[here](https://github.com/matrix-org/synapse/blob/master/synapse/module_api/__init__.py). - -### Handling the module's configuration - -A module can implement the following static method: - -```python -@staticmethod -def parse_config(config: dict) -> dict -``` - -This method is given a dictionary resulting from parsing the YAML configuration for the -module. It may modify it (for example by parsing durations expressed as strings (e.g. -"5d") into milliseconds, etc.), and return the modified dictionary. It may also verify -that the configuration is correct, and raise an instance of -`synapse.module_api.errors.ConfigError` if not. - -### Registering a web resource - -Modules can register web resources onto Synapse's web server using the following module -API method: - -```python -def ModuleApi.register_web_resource(path: str, resource: IResource) -> None -``` - -The path is the full absolute path to register the resource at. For example, if you -register a resource for the path `/_synapse/client/my_super_module/say_hello`, Synapse -will serve it at `http(s)://[HS_URL]/_synapse/client/my_super_module/say_hello`. Note -that Synapse does not allow registering resources for several sub-paths in the `/_matrix` -namespace (such as anything under `/_matrix/client` for example). It is strongly -recommended that modules register their web resources under the `/_synapse/client` -namespace. - -The provided resource is a Python class that implements Twisted's [IResource](https://twistedmatrix.com/documents/current/api/twisted.web.resource.IResource.html) -interface (such as [Resource](https://twistedmatrix.com/documents/current/api/twisted.web.resource.Resource.html)). - -Only one resource can be registered for a given path. If several modules attempt to -register a resource for the same path, the module that appears first in Synapse's -configuration file takes priority. - -Modules **must** register their web resources in their `__init__` method. - -### Registering a callback - -Modules can use Synapse's module API to register callbacks. Callbacks are functions that -Synapse will call when performing specific actions. Callbacks must be asynchronous, and -are split in categories. A single module may implement callbacks from multiple categories, -and is under no obligation to implement all callbacks from the categories it registers -callbacks for. - -Modules can register callbacks using one of the module API's `register_[...]_callbacks` -methods. The callback functions are passed to these methods as keyword arguments, with -the callback name as the argument name and the function as its value. This is demonstrated -in the example below. A `register_[...]_callbacks` method exists for each module type -documented in this section. - -#### Spam checker callbacks - -Spam checker callbacks allow module developers to implement spam mitigation actions for -Synapse instances. Spam checker callbacks can be registered using the module API's -`register_spam_checker_callbacks` method. - -The available spam checker callbacks are: - -```python -async def check_event_for_spam(event: "synapse.events.EventBase") -> Union[bool, str] -``` - -Called when receiving an event from a client or via federation. The module can return -either a `bool` to indicate whether the event must be rejected because of spam, or a `str` -to indicate the event must be rejected because of spam and to give a rejection reason to -forward to clients. - -```python -async def user_may_invite(inviter: str, invitee: str, room_id: str) -> bool -``` - -Called when processing an invitation. The module must return a `bool` indicating whether -the inviter can invite the invitee to the given room. Both inviter and invitee are -represented by their Matrix user ID (e.g. `@alice:example.com`). - -```python -async def user_may_create_room(user: str) -> bool -``` - -Called when processing a room creation request. The module must return a `bool` indicating -whether the given user (represented by their Matrix user ID) is allowed to create a room. - -```python -async def user_may_create_room_alias(user: str, room_alias: "synapse.types.RoomAlias") -> bool -``` - -Called when trying to associate an alias with an existing room. The module must return a -`bool` indicating whether the given user (represented by their Matrix user ID) is allowed -to set the given alias. - -```python -async def user_may_publish_room(user: str, room_id: str) -> bool -``` - -Called when trying to publish a room to the homeserver's public rooms directory. The -module must return a `bool` indicating whether the given user (represented by their -Matrix user ID) is allowed to publish the given room. - -```python -async def check_username_for_spam(user_profile: Dict[str, str]) -> bool -``` - -Called when computing search results in the user directory. The module must return a -`bool` indicating whether the given user profile can appear in search results. The profile -is represented as a dictionary with the following keys: - -* `user_id`: The Matrix ID for this user. -* `display_name`: The user's display name. -* `avatar_url`: The `mxc://` URL to the user's avatar. - -The module is given a copy of the original dictionary, so modifying it from within the -module cannot modify a user's profile when included in user directory search results. - -```python -async def check_registration_for_spam( - email_threepid: Optional[dict], - username: Optional[str], - request_info: Collection[Tuple[str, str]], - auth_provider_id: Optional[str] = None, -) -> "synapse.spam_checker_api.RegistrationBehaviour" -``` - -Called when registering a new user. The module must return a `RegistrationBehaviour` -indicating whether the registration can go through or must be denied, or whether the user -may be allowed to register but will be shadow banned. - -The arguments passed to this callback are: - -* `email_threepid`: The email address used for registering, if any. -* `username`: The username the user would like to register. Can be `None`, meaning that - Synapse will generate one later. -* `request_info`: A collection of tuples, which first item is a user agent, and which - second item is an IP address. These user agents and IP addresses are the ones that were - used during the registration process. -* `auth_provider_id`: The identifier of the SSO authentication provider, if any. - -```python -async def check_media_file_for_spam( - file_wrapper: "synapse.rest.media.v1.media_storage.ReadableFileWrapper", - file_info: "synapse.rest.media.v1._base.FileInfo", -) -> bool -``` - -Called when storing a local or remote file. The module must return a boolean indicating -whether the given file can be stored in the homeserver's media store. - -#### Account validity callbacks - -Account validity callbacks allow module developers to add extra steps to verify the -validity on an account, i.e. see if a user can be granted access to their account on the -Synapse instance. Account validity callbacks can be registered using the module API's -`register_account_validity_callbacks` method. - -The available account validity callbacks are: - -```python -async def is_user_expired(user: str) -> Optional[bool] -``` - -Called when processing any authenticated request (except for logout requests). The module -can return a `bool` to indicate whether the user has expired and should be locked out of -their account, or `None` if the module wasn't able to figure it out. The user is -represented by their Matrix user ID (e.g. `@alice:example.com`). - -If the module returns `True`, the current request will be denied with the error code -`ORG_MATRIX_EXPIRED_ACCOUNT` and the HTTP status code 403. Note that this doesn't -invalidate the user's access token. - -```python -async def on_user_registration(user: str) -> None -``` - -Called after successfully registering a user, in case the module needs to perform extra -operations to keep track of them. (e.g. add them to a database table). The user is -represented by their Matrix user ID. - -#### Third party rules callbacks - -Third party rules callbacks allow module developers to add extra checks to verify the -validity of incoming events. Third party event rules callbacks can be registered using -the module API's `register_third_party_rules_callbacks` method. - -The available third party rules callbacks are: - -```python -async def check_event_allowed( - event: "synapse.events.EventBase", - state_events: "synapse.types.StateMap", -) -> Tuple[bool, Optional[dict]] -``` - -** -This callback is very experimental and can and will break without notice. Module developers -are encouraged to implement `check_event_for_spam` from the spam checker category instead. -** - -Called when processing any incoming event, with the event and a `StateMap` -representing the current state of the room the event is being sent into. A `StateMap` is -a dictionary that maps tuples containing an event type and a state key to the -corresponding state event. For example retrieving the room's `m.room.create` event from -the `state_events` argument would look like this: `state_events.get(("m.room.create", ""))`. -The module must return a boolean indicating whether the event can be allowed. - -Note that this callback function processes incoming events coming via federation -traffic (on top of client traffic). This means denying an event might cause the local -copy of the room's history to diverge from that of remote servers. This may cause -federation issues in the room. It is strongly recommended to only deny events using this -callback function if the sender is a local user, or in a private federation in which all -servers are using the same module, with the same configuration. - -If the boolean returned by the module is `True`, it may also tell Synapse to replace the -event with new data by returning the new event's data as a dictionary. In order to do -that, it is recommended the module calls `event.get_dict()` to get the current event as a -dictionary, and modify the returned dictionary accordingly. - -Note that replacing the event only works for events sent by local users, not for events -received over federation. - -```python -async def on_create_room( - requester: "synapse.types.Requester", - request_content: dict, - is_requester_admin: bool, -) -> None -``` - -Called when processing a room creation request, with the `Requester` object for the user -performing the request, a dictionary representing the room creation request's JSON body -(see [the spec](https://matrix.org/docs/spec/client_server/latest#post-matrix-client-r0-createroom) -for a list of possible parameters), and a boolean indicating whether the user performing -the request is a server admin. - -Modules can modify the `request_content` (by e.g. adding events to its `initial_state`), -or deny the room's creation by raising a `module_api.errors.SynapseError`. - - -### Porting an existing module that uses the old interface - -In order to port a module that uses Synapse's old module interface, its author needs to: - -* ensure the module's callbacks are all asynchronous. -* register their callbacks using one or more of the `register_[...]_callbacks` methods - from the `ModuleApi` class in the module's `__init__` method (see [this section](#registering-a-callback) - for more info). - -Additionally, if the module is packaged with an additional web resource, the module -should register this resource in its `__init__` method using the `register_web_resource` -method from the `ModuleApi` class (see [this section](#registering-a-web-resource) for -more info). - -The module's author should also update any example in the module's configuration to only -use the new `modules` section in Synapse's configuration file (see [this section](#using-modules) -for more info). - -### Example - -The example below is a module that implements the spam checker callback -`user_may_create_room` to deny room creation to user `@evilguy:example.com`, and registers -a web resource to the path `/_synapse/client/demo/hello` that returns a JSON object. - -```python -import json - -from twisted.web.resource import Resource -from twisted.web.server import Request - -from synapse.module_api import ModuleApi - - -class DemoResource(Resource): - def __init__(self, config): - super(DemoResource, self).__init__() - self.config = config - - def render_GET(self, request: Request): - name = request.args.get(b"name")[0] - request.setHeader(b"Content-Type", b"application/json") - return json.dumps({"hello": name}) - - -class DemoModule: - def __init__(self, config: dict, api: ModuleApi): - self.config = config - self.api = api - - self.api.register_web_resource( - path="/_synapse/client/demo/hello", - resource=DemoResource(self.config), - ) - - self.api.register_spam_checker_callbacks( - user_may_create_room=self.user_may_create_room, - ) - - @staticmethod - def parse_config(config): - return config - - async def user_may_create_room(self, user: str) -> bool: - if user == "@evilguy:example.com": - return False - - return True -``` diff --git a/docs/modules/account_validity_callbacks.md b/docs/modules/account_validity_callbacks.md new file mode 100644 index 000000000000..3cd0e7219894 --- /dev/null +++ b/docs/modules/account_validity_callbacks.md @@ -0,0 +1,44 @@ +# Account validity callbacks + +Account validity callbacks allow module developers to add extra steps to verify the +validity on an account, i.e. see if a user can be granted access to their account on the +Synapse instance. Account validity callbacks can be registered using the module API's +`register_account_validity_callbacks` method. + +The available account validity callbacks are: + +### `is_user_expired` + +_First introduced in Synapse v1.39.0_ + +```python +async def is_user_expired(user: str) -> Optional[bool] +``` + +Called when processing any authenticated request (except for logout requests). The module +can return a `bool` to indicate whether the user has expired and should be locked out of +their account, or `None` if the module wasn't able to figure it out. The user is +represented by their Matrix user ID (e.g. `@alice:example.com`). + +If the module returns `True`, the current request will be denied with the error code +`ORG_MATRIX_EXPIRED_ACCOUNT` and the HTTP status code 403. Note that this doesn't +invalidate the user's access token. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `None`, Synapse falls through to the next one. The value of the first +callback that does not return `None` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +### `on_user_registration` + +_First introduced in Synapse v1.39.0_ + +```python +async def on_user_registration(user: str) -> None +``` + +Called after successfully registering a user, in case the module needs to perform extra +operations to keep track of them. (e.g. add them to a database table). The user is +represented by their Matrix user ID. + +If multiple modules implement this callback, Synapse runs them all in order. diff --git a/docs/modules/background_update_controller_callbacks.md b/docs/modules/background_update_controller_callbacks.md new file mode 100644 index 000000000000..b3e7c259f4ae --- /dev/null +++ b/docs/modules/background_update_controller_callbacks.md @@ -0,0 +1,71 @@ +# Background update controller callbacks + +Background update controller callbacks allow module developers to control (e.g. rate-limit) +how database background updates are run. A database background update is an operation +Synapse runs on its database in the background after it starts. It's usually used to run +database operations that would take too long if they were run at the same time as schema +updates (which are run on startup) and delay Synapse's startup too much: populating a +table with a big amount of data, adding an index on a big table, deleting superfluous data, +etc. + +Background update controller callbacks can be registered using the module API's +`register_background_update_controller_callbacks` method. Only the first module (in order +of appearance in Synapse's configuration file) calling this method can register background +update controller callbacks, subsequent calls are ignored. + +The available background update controller callbacks are: + +### `on_update` + +_First introduced in Synapse v1.49.0_ + +```python +def on_update(update_name: str, database_name: str, one_shot: bool) -> AsyncContextManager[int] +``` + +Called when about to do an iteration of a background update. The module is given the name +of the update, the name of the database, and a flag to indicate whether the background +update will happen in one go and may take a long time (e.g. creating indices). If this last +argument is set to `False`, the update will be run in batches. + +The module must return an async context manager. It will be entered before Synapse runs a +background update; this should return the desired duration of the iteration, in +milliseconds. + +The context manager will be exited when the iteration completes. Note that the duration +returned by the context manager is a target, and an iteration may take substantially longer +or shorter. If the `one_shot` flag is set to `True`, the duration returned is ignored. + +__Note__: Unlike most module callbacks in Synapse, this one is _synchronous_. This is +because asynchronous operations are expected to be run by the async context manager. + +This callback is required when registering any other background update controller callback. + +### `default_batch_size` + +_First introduced in Synapse v1.49.0_ + +```python +async def default_batch_size(update_name: str, database_name: str) -> int +``` + +Called before the first iteration of a background update, with the name of the update and +of the database. The module must return the number of elements to process in this first +iteration. + +If this callback is not defined, Synapse will use a default value of 100. + +### `min_batch_size` + +_First introduced in Synapse v1.49.0_ + +```python +async def min_batch_size(update_name: str, database_name: str) -> int +``` + +Called before running a new batch for a background update, with the name of the update and +of the database. The module must return an integer representing the minimum number of +elements to process in this iteration. This number must be at least 1, and is used to +ensure that progress is always made. + +If this callback is not defined, Synapse will use a default value of 100. diff --git a/docs/modules/index.md b/docs/modules/index.md new file mode 100644 index 000000000000..0a868b309f2f --- /dev/null +++ b/docs/modules/index.md @@ -0,0 +1,53 @@ +# Modules + +Synapse supports extending its functionality by configuring external modules. + +**Note**: When using third-party modules, you effectively allow someone else to run +custom code on your Synapse homeserver. Server admins are encouraged to verify the +provenance of the modules they use on their homeserver and make sure the modules aren't +running malicious code on their instance. + +## Using modules + +To use a module on Synapse, add it to the `modules` section of the configuration file: + +```yaml +modules: + - module: my_super_module.MySuperClass + config: + do_thing: true + - module: my_other_super_module.SomeClass + config: {} +``` + +Each module is defined by a path to a Python class as well as a configuration. This +information for a given module should be available in the module's own documentation. + +## Using multiple modules + +The order in which modules are listed in this section is important. When processing an +action that can be handled by several modules, Synapse will always prioritise the module +that appears first (i.e. is the highest in the list). This means: + +* If several modules register the same callback, the callback registered by the module + that appears first is used. +* If several modules try to register a handler for the same HTTP path, only the handler + registered by the module that appears first is used. Handlers registered by the other + module(s) are ignored and Synapse will log a warning message about them. + +Note that Synapse doesn't allow multiple modules implementing authentication checkers via +the password auth provider feature for the same login type with different fields. If this +happens, Synapse will refuse to start. + +## Current status + +We are currently in the process of migrating module interfaces to this system. While some +interfaces might be compatible with it, others still require configuring modules in +another part of Synapse's configuration file. + +Currently, only the following pre-existing interfaces are compatible with this new system: + +* spam checker +* third-party rules +* presence router +* password auth providers diff --git a/docs/modules/password_auth_provider_callbacks.md b/docs/modules/password_auth_provider_callbacks.md new file mode 100644 index 000000000000..e53abf640989 --- /dev/null +++ b/docs/modules/password_auth_provider_callbacks.md @@ -0,0 +1,176 @@ +# Password auth provider callbacks + +Password auth providers offer a way for server administrators to integrate +their Synapse installation with an external authentication system. The callbacks can be +registered by using the Module API's `register_password_auth_provider_callbacks` method. + +## Callbacks + +### `auth_checkers` + +_First introduced in Synapse v1.46.0_ + +```python +auth_checkers: Dict[Tuple[str, Tuple[str, ...]], Callable] +``` + +A dict mapping from tuples of a login type identifier (such as `m.login.password`) and a +tuple of field names (such as `("password", "secret_thing")`) to authentication checking +callbacks, which should be of the following form: + +```python +async def check_auth( + user: str, + login_type: str, + login_dict: "synapse.module_api.JsonDict", +) -> Optional[ + Tuple[ + str, + Optional[Callable[["synapse.module_api.LoginResponse"], Awaitable[None]]] + ] +] +``` + +The login type and field names should be provided by the user in the +request to the `/login` API. [The Matrix specification](https://matrix.org/docs/spec/client_server/latest#authentication-types) +defines some types, however user defined ones are also allowed. + +The callback is passed the `user` field provided by the client (which might not be in +`@username:server` form), the login type, and a dictionary of login secrets passed by +the client. + +If the authentication is successful, the module must return the user's Matrix ID (e.g. +`@alice:example.com`) and optionally a callback to be called with the response to the +`/login` request. If the module doesn't wish to return a callback, it must return `None` +instead. + +If the authentication is unsuccessful, the module must return `None`. + +If multiple modules register an auth checker for the same login type but with different +fields, Synapse will refuse to start. + +If multiple modules register an auth checker for the same login type with the same fields, +then the callbacks will be executed in order, until one returns a Matrix User ID (and +optionally a callback). In that case, the return value of that callback will be accepted +and subsequent callbacks will not be fired. If every callback returns `None`, then the +authentication fails. + +### `check_3pid_auth` + +_First introduced in Synapse v1.46.0_ + +```python +async def check_3pid_auth( + medium: str, + address: str, + password: str, +) -> Optional[ + Tuple[ + str, + Optional[Callable[["synapse.module_api.LoginResponse"], Awaitable[None]]] + ] +] +``` + +Called when a user attempts to register or log in with a third party identifier, +such as email. It is passed the medium (eg. `email`), an address (eg. `jdoe@example.com`) +and the user's password. + +If the authentication is successful, the module must return the user's Matrix ID (e.g. +`@alice:example.com`) and optionally a callback to be called with the response to the `/login` request. +If the module doesn't wish to return a callback, it must return None instead. + +If the authentication is unsuccessful, the module must return `None`. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `None`, Synapse falls through to the next one. The value of the first +callback that does not return `None` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. If every callback return `None`, +the authentication is denied. + +### `on_logged_out` + +_First introduced in Synapse v1.46.0_ + +```python +async def on_logged_out( + user_id: str, + device_id: Optional[str], + access_token: str +) -> None +``` +Called during a logout request for a user. It is passed the qualified user ID, the ID of the +deactivated device (if any: access tokens are occasionally created without an associated +device ID), and the (now deactivated) access token. + +If multiple modules implement this callback, Synapse runs them all in order. + +## Example + +The example module below implements authentication checkers for two different login types: +- `my.login.type` + - Expects a `my_field` field to be sent to `/login` + - Is checked by the method: `self.check_my_login` +- `m.login.password` (defined in [the spec](https://matrix.org/docs/spec/client_server/latest#password-based)) + - Expects a `password` field to be sent to `/login` + - Is checked by the method: `self.check_pass` + + +```python +from typing import Awaitable, Callable, Optional, Tuple + +import synapse +from synapse import module_api + + +class MyAuthProvider: + def __init__(self, config: dict, api: module_api): + + self.api = api + + self.credentials = { + "bob": "building", + "@scoop:matrix.org": "digging", + } + + api.register_password_auth_provider_callbacks( + auth_checkers={ + ("my.login_type", ("my_field",)): self.check_my_login, + ("m.login.password", ("password",)): self.check_pass, + }, + ) + + async def check_my_login( + self, + username: str, + login_type: str, + login_dict: "synapse.module_api.JsonDict", + ) -> Optional[ + Tuple[ + str, + Optional[Callable[["synapse.module_api.LoginResponse"], Awaitable[None]]], + ] + ]: + if login_type != "my.login_type": + return None + + if self.credentials.get(username) == login_dict.get("my_field"): + return self.api.get_qualified_user_id(username) + + async def check_pass( + self, + username: str, + login_type: str, + login_dict: "synapse.module_api.JsonDict", + ) -> Optional[ + Tuple[ + str, + Optional[Callable[["synapse.module_api.LoginResponse"], Awaitable[None]]], + ] + ]: + if login_type != "m.login.password": + return None + + if self.credentials.get(username) == login_dict.get("password"): + return self.api.get_qualified_user_id(username) +``` diff --git a/docs/modules/porting_legacy_module.md b/docs/modules/porting_legacy_module.md new file mode 100644 index 000000000000..89084eb7b32b --- /dev/null +++ b/docs/modules/porting_legacy_module.md @@ -0,0 +1,20 @@ +# Porting an existing module that uses the old interface + +In order to port a module that uses Synapse's old module interface, its author needs to: + +* ensure the module's callbacks are all asynchronous. +* register their callbacks using one or more of the `register_[...]_callbacks` methods + from the `ModuleApi` class in the module's `__init__` method (see [this section](writing_a_module.html#registering-a-callback) + for more info). + +Additionally, if the module is packaged with an additional web resource, the module +should register this resource in its `__init__` method using the `register_web_resource` +method from the `ModuleApi` class (see [this section](writing_a_module.html#registering-a-web-resource) for +more info). + +There is no longer a `get_db_schema_files` callback provided for password auth provider modules. Any +changes to the database should now be made by the module using the module API class. + +The module's author should also update any example in the module's configuration to only +use the new `modules` section in Synapse's configuration file (see [this section](index.html#using-modules) +for more info). diff --git a/docs/modules/presence_router_callbacks.md b/docs/modules/presence_router_callbacks.md new file mode 100644 index 000000000000..d3da25cef413 --- /dev/null +++ b/docs/modules/presence_router_callbacks.md @@ -0,0 +1,104 @@ +# Presence router callbacks + +Presence router callbacks allow module developers to specify additional users (local or remote) +to receive certain presence updates from local users. Presence router callbacks can be +registered using the module API's `register_presence_router_callbacks` method. + +## Callbacks + +The available presence router callbacks are: + +### `get_users_for_states` + +_First introduced in Synapse v1.42.0_ + +```python +async def get_users_for_states( + state_updates: Iterable["synapse.api.UserPresenceState"], +) -> Dict[str, Set["synapse.api.UserPresenceState"]] +``` +**Requires** `get_interested_users` to also be registered + +Called when processing updates to the presence state of one or more users. This callback can +be used to instruct the server to forward that presence state to specific users. The module +must return a dictionary that maps from Matrix user IDs (which can be local or remote) to the +`UserPresenceState` changes that they should be forwarded. + +Synapse will then attempt to send the specified presence updates to each user when possible. + +If multiple modules implement this callback, Synapse merges all the dictionaries returned +by the callbacks. If multiple callbacks return a dictionary containing the same key, +Synapse concatenates the sets associated with this key from each dictionary. + +### `get_interested_users` + +_First introduced in Synapse v1.42.0_ + +```python +async def get_interested_users( + user_id: str +) -> Union[Set[str], "synapse.module_api.PRESENCE_ALL_USERS"] +``` +**Requires** `get_users_for_states` to also be registered + +Called when determining which users someone should be able to see the presence state of. This +callback should return complementary results to `get_users_for_state` or the presence information +may not be properly forwarded. + +The callback is given the Matrix user ID for a local user that is requesting presence data and +should return the Matrix user IDs of the users whose presence state they are allowed to +query. The returned users can be local or remote. + +Alternatively the callback can return `synapse.module_api.PRESENCE_ALL_USERS` +to indicate that the user should receive updates from all known users. + +If multiple modules implement this callback, they will be considered in order. Synapse +calls each callback one by one, and use a concatenation of all the `set`s returned by the +callbacks. If one callback returns `synapse.module_api.PRESENCE_ALL_USERS`, Synapse uses +this value instead. If this happens, Synapse does not call any of the subsequent +implementations of this callback. + +## Example + +The example below is a module that implements both presence router callbacks, and ensures +that `@alice:example.org` receives all presence updates from `@bob:example.com` and +`@charlie:somewhere.org`, regardless of whether Alice shares a room with any of them. + +```python +from typing import Dict, Iterable, Set, Union + +from synapse.module_api import ModuleApi + + +class CustomPresenceRouter: + def __init__(self, config: dict, api: ModuleApi): + self.api = api + + self.api.register_presence_router_callbacks( + get_users_for_states=self.get_users_for_states, + get_interested_users=self.get_interested_users, + ) + + async def get_users_for_states( + self, + state_updates: Iterable["synapse.api.UserPresenceState"], + ) -> Dict[str, Set["synapse.api.UserPresenceState"]]: + res = {} + for update in state_updates: + if ( + update.user_id == "@bob:example.com" + or update.user_id == "@charlie:somewhere.org" + ): + res.setdefault("@alice:example.com", set()).add(update) + + return res + + async def get_interested_users( + self, + user_id: str, + ) -> Union[Set[str], "synapse.module_api.PRESENCE_ALL_USERS"]: + if user_id == "@alice:example.com": + return {"@bob:example.com", "@charlie:somewhere.org"} + + return set() +``` diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md new file mode 100644 index 000000000000..2eb9032f4136 --- /dev/null +++ b/docs/modules/spam_checker_callbacks.md @@ -0,0 +1,281 @@ +# Spam checker callbacks + +Spam checker callbacks allow module developers to implement spam mitigation actions for +Synapse instances. Spam checker callbacks can be registered using the module API's +`register_spam_checker_callbacks` method. + +## Callbacks + +The available spam checker callbacks are: + +### `check_event_for_spam` + +_First introduced in Synapse v1.37.0_ + +```python +async def check_event_for_spam(event: "synapse.events.EventBase") -> Union[bool, str] +``` + +Called when receiving an event from a client or via federation. The module can return +either a `bool` to indicate whether the event must be rejected because of spam, or a `str` +to indicate the event must be rejected because of spam and to give a rejection reason to +forward to clients. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `False`, Synapse falls through to the next one. The value of the first +callback that does not return `False` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +### `user_may_join_room` + +_First introduced in Synapse v1.37.0_ + +```python +async def user_may_join_room(user: str, room: str, is_invited: bool) -> bool +``` + +Called when a user is trying to join a room. The module must return a `bool` to indicate +whether the user can join the room. The user is represented by their Matrix user ID (e.g. +`@alice:example.com`) and the room is represented by its Matrix ID (e.g. +`!room:example.com`). The module is also given a boolean to indicate whether the user +currently has a pending invite in the room. + +This callback isn't called if the join is performed by a server administrator, or in the +context of a room creation. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +### `user_may_invite` + +_First introduced in Synapse v1.37.0_ + +```python +async def user_may_invite(inviter: str, invitee: str, room_id: str) -> bool +``` + +Called when processing an invitation. The module must return a `bool` indicating whether +the inviter can invite the invitee to the given room. Both inviter and invitee are +represented by their Matrix user ID (e.g. `@alice:example.com`). + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +### `user_may_send_3pid_invite` + +_First introduced in Synapse v1.45.0_ + +```python +async def user_may_send_3pid_invite( + inviter: str, + medium: str, + address: str, + room_id: str, +) -> bool +``` + +Called when processing an invitation using a third-party identifier (also called a 3PID, +e.g. an email address or a phone number). The module must return a `bool` indicating +whether the inviter can invite the invitee to the given room. + +The inviter is represented by their Matrix user ID (e.g. `@alice:example.com`), and the +invitee is represented by its medium (e.g. "email") and its address +(e.g. `alice@example.com`). See [the Matrix specification](https://matrix.org/docs/spec/appendices#pid-types) +for more information regarding third-party identifiers. + +For example, a call to this callback to send an invitation to the email address +`alice@example.com` would look like this: + +```python +await user_may_send_3pid_invite( + "@bob:example.com", # The inviter's user ID + "email", # The medium of the 3PID to invite + "alice@example.com", # The address of the 3PID to invite + "!some_room:example.com", # The ID of the room to send the invite into +) +``` + +**Note**: If the third-party identifier is already associated with a matrix user ID, +[`user_may_invite`](#user_may_invite) will be used instead. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +### `user_may_create_room` + +_First introduced in Synapse v1.37.0_ + +```python +async def user_may_create_room(user: str) -> bool +``` + +Called when processing a room creation request. The module must return a `bool` indicating +whether the given user (represented by their Matrix user ID) is allowed to create a room. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +### `user_may_create_room_alias` + +_First introduced in Synapse v1.37.0_ + +```python +async def user_may_create_room_alias(user: str, room_alias: "synapse.types.RoomAlias") -> bool +``` + +Called when trying to associate an alias with an existing room. The module must return a +`bool` indicating whether the given user (represented by their Matrix user ID) is allowed +to set the given alias. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +### `user_may_publish_room` + +_First introduced in Synapse v1.37.0_ + +```python +async def user_may_publish_room(user: str, room_id: str) -> bool +``` + +Called when trying to publish a room to the homeserver's public rooms directory. The +module must return a `bool` indicating whether the given user (represented by their +Matrix user ID) is allowed to publish the given room. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +### `check_username_for_spam` + +_First introduced in Synapse v1.37.0_ + +```python +async def check_username_for_spam(user_profile: Dict[str, str]) -> bool +``` + +Called when computing search results in the user directory. The module must return a +`bool` indicating whether the given user profile can appear in search results. The profile +is represented as a dictionary with the following keys: + +* `user_id`: The Matrix ID for this user. +* `display_name`: The user's display name. +* `avatar_url`: The `mxc://` URL to the user's avatar. + +The module is given a copy of the original dictionary, so modifying it from within the +module cannot modify a user's profile when included in user directory search results. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `False`, Synapse falls through to the next one. The value of the first +callback that does not return `False` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +### `check_registration_for_spam` + +_First introduced in Synapse v1.37.0_ + +```python +async def check_registration_for_spam( + email_threepid: Optional[dict], + username: Optional[str], + request_info: Collection[Tuple[str, str]], + auth_provider_id: Optional[str] = None, +) -> "synapse.spam_checker_api.RegistrationBehaviour" +``` + +Called when registering a new user. The module must return a `RegistrationBehaviour` +indicating whether the registration can go through or must be denied, or whether the user +may be allowed to register but will be shadow banned. + +The arguments passed to this callback are: + +* `email_threepid`: The email address used for registering, if any. +* `username`: The username the user would like to register. Can be `None`, meaning that + Synapse will generate one later. +* `request_info`: A collection of tuples, which first item is a user agent, and which + second item is an IP address. These user agents and IP addresses are the ones that were + used during the registration process. +* `auth_provider_id`: The identifier of the SSO authentication provider, if any. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `RegistrationBehaviour.ALLOW`, Synapse falls through to the next one. +The value of the first callback that does not return `RegistrationBehaviour.ALLOW` will +be used. If this happens, Synapse will not call any of the subsequent implementations of +this callback. + +### `check_media_file_for_spam` + +_First introduced in Synapse v1.37.0_ + +```python +async def check_media_file_for_spam( + file_wrapper: "synapse.rest.media.v1.media_storage.ReadableFileWrapper", + file_info: "synapse.rest.media.v1._base.FileInfo", +) -> bool +``` + +Called when storing a local or remote file. The module must return a boolean indicating +whether the given file can be stored in the homeserver's media store. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `False`, Synapse falls through to the next one. The value of the first +callback that does not return `False` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +## Example + +The example below is a module that implements the spam checker callback +`check_event_for_spam` to deny any message sent by users whose Matrix user IDs are +mentioned in a configured list, and registers a web resource to the path +`/_synapse/client/list_spam_checker/is_evil` that returns a JSON object indicating +whether the provided user appears in that list. + +```python +import json +from typing import Union + +from twisted.web.resource import Resource +from twisted.web.server import Request + +from synapse.module_api import ModuleApi + + +class IsUserEvilResource(Resource): + def __init__(self, config): + super(IsUserEvilResource, self).__init__() + self.evil_users = config.get("evil_users") or [] + + def render_GET(self, request: Request): + user = request.args.get(b"user")[0].decode() + request.setHeader(b"Content-Type", b"application/json") + return json.dumps({"evil": user in self.evil_users}).encode() + + +class ListSpamChecker: + def __init__(self, config: dict, api: ModuleApi): + self.api = api + self.evil_users = config.get("evil_users") or [] + + self.api.register_spam_checker_callbacks( + check_event_for_spam=self.check_event_for_spam, + ) + + self.api.register_web_resource( + path="/_synapse/client/list_spam_checker/is_evil", + resource=IsUserEvilResource(config), + ) + + async def check_event_for_spam(self, event: "synapse.events.EventBase") -> Union[bool, str]: + return event.sender not in self.evil_users +``` diff --git a/docs/modules/third_party_rules_callbacks.md b/docs/modules/third_party_rules_callbacks.md new file mode 100644 index 000000000000..a3a17096a8f5 --- /dev/null +++ b/docs/modules/third_party_rules_callbacks.md @@ -0,0 +1,183 @@ +# Third party rules callbacks + +Third party rules callbacks allow module developers to add extra checks to verify the +validity of incoming events. Third party event rules callbacks can be registered using +the module API's `register_third_party_rules_callbacks` method. + +## Callbacks + +The available third party rules callbacks are: + +### `check_event_allowed` + +_First introduced in Synapse v1.39.0_ + +```python +async def check_event_allowed( + event: "synapse.events.EventBase", + state_events: "synapse.types.StateMap", +) -> Tuple[bool, Optional[dict]] +``` + +** +This callback is very experimental and can and will break without notice. Module developers +are encouraged to implement `check_event_for_spam` from the spam checker category instead. +** + +Called when processing any incoming event, with the event and a `StateMap` +representing the current state of the room the event is being sent into. A `StateMap` is +a dictionary that maps tuples containing an event type and a state key to the +corresponding state event. For example retrieving the room's `m.room.create` event from +the `state_events` argument would look like this: `state_events.get(("m.room.create", ""))`. +The module must return a boolean indicating whether the event can be allowed. + +Note that this callback function processes incoming events coming via federation +traffic (on top of client traffic). This means denying an event might cause the local +copy of the room's history to diverge from that of remote servers. This may cause +federation issues in the room. It is strongly recommended to only deny events using this +callback function if the sender is a local user, or in a private federation in which all +servers are using the same module, with the same configuration. + +If the boolean returned by the module is `True`, it may also tell Synapse to replace the +event with new data by returning the new event's data as a dictionary. In order to do +that, it is recommended the module calls `event.get_dict()` to get the current event as a +dictionary, and modify the returned dictionary accordingly. + +If `check_event_allowed` raises an exception, the module is assumed to have failed. +The event will not be accepted but is not treated as explicitly rejected, either. +An HTTP request causing the module check will likely result in a 500 Internal +Server Error. + +When the boolean returned by the module is `False`, the event is rejected. +(Module developers should not use exceptions for rejection.) + +Note that replacing the event only works for events sent by local users, not for events +received over federation. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +### `on_create_room` + +_First introduced in Synapse v1.39.0_ + +```python +async def on_create_room( + requester: "synapse.types.Requester", + request_content: dict, + is_requester_admin: bool, +) -> None +``` + +Called when processing a room creation request, with the `Requester` object for the user +performing the request, a dictionary representing the room creation request's JSON body +(see [the spec](https://matrix.org/docs/spec/client_server/latest#post-matrix-client-r0-createroom) +for a list of possible parameters), and a boolean indicating whether the user performing +the request is a server admin. + +Modules can modify the `request_content` (by e.g. adding events to its `initial_state`), +or deny the room's creation by raising a `module_api.errors.SynapseError`. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns without raising an exception, Synapse falls through to the next one. The +room creation will be forbidden as soon as one of the callbacks raises an exception. If +this happens, Synapse will not call any of the subsequent implementations of this +callback. + +### `check_threepid_can_be_invited` + +_First introduced in Synapse v1.39.0_ + +```python +async def check_threepid_can_be_invited( + medium: str, + address: str, + state_events: "synapse.types.StateMap", +) -> bool: +``` + +Called when processing an invite via a third-party identifier (i.e. email or phone number). +The module must return a boolean indicating whether the invite can go through. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +### `check_visibility_can_be_modified` + +_First introduced in Synapse v1.39.0_ + +```python +async def check_visibility_can_be_modified( + room_id: str, + state_events: "synapse.types.StateMap", + new_visibility: str, +) -> bool: +``` + +Called when changing the visibility of a room in the local public room directory. The +visibility is a string that's either "public" or "private". The module must return a +boolean indicating whether the change can go through. + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `True`, Synapse falls through to the next one. The value of the first +callback that does not return `True` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +### `on_new_event` + +_First introduced in Synapse v1.47.0_ + +```python +async def on_new_event( + event: "synapse.events.EventBase", + state_events: "synapse.types.StateMap", +) -> None: +``` + +Called after sending an event into a room. The module is passed the event, as well +as the state of the room _after_ the event. This means that if the event is a state event, +it will be included in this state. + +Note that this callback is called when the event has already been processed and stored +into the room, which means this callback cannot be used to deny persisting the event. To +deny an incoming event, see [`check_event_for_spam`](spam_checker_callbacks.md#check_event_for_spam) instead. + +If multiple modules implement this callback, Synapse runs them all in order. + +## Example + +The example below is a module that implements the third-party rules callback +`check_event_allowed` to censor incoming messages as dictated by a third-party service. + +```python +from typing import Optional, Tuple + +from synapse.module_api import ModuleApi + +_DEFAULT_CENSOR_ENDPOINT = "https://my-internal-service.local/censor-event" + +class EventCensorer: + def __init__(self, config: dict, api: ModuleApi): + self.api = api + self._endpoint = config.get("endpoint", _DEFAULT_CENSOR_ENDPOINT) + + self.api.register_third_party_rules_callbacks( + check_event_allowed=self.check_event_allowed, + ) + + async def check_event_allowed( + self, + event: "synapse.events.EventBase", + state_events: "synapse.types.StateMap", + ) -> Tuple[bool, Optional[dict]]: + event_dict = event.get_dict() + new_event_content = await self.api.http_client.post_json_get_json( + uri=self._endpoint, post_json=event_dict, + ) + event_dict["content"] = new_event_content + return event_dict +``` diff --git a/docs/modules/writing_a_module.md b/docs/modules/writing_a_module.md new file mode 100644 index 000000000000..e7c0ffad58bf --- /dev/null +++ b/docs/modules/writing_a_module.md @@ -0,0 +1,85 @@ +# Writing a module + +A module is a Python class that uses Synapse's module API to interact with the +homeserver. It can register callbacks that Synapse will call on specific operations, as +well as web resources to attach to Synapse's web server. + +When instantiated, a module is given its parsed configuration as well as an instance of +the `synapse.module_api.ModuleApi` class. The configuration is a dictionary, and is +either the output of the module's `parse_config` static method (see below), or the +configuration associated with the module in Synapse's configuration file. + +See the documentation for the `ModuleApi` class +[here](https://github.com/matrix-org/synapse/blob/master/synapse/module_api/__init__.py). + +## When Synapse runs with several modules configured + +If Synapse is running with other modules configured, the order each module appears in +within the `modules` section of the Synapse configuration file might restrict what it can +or cannot register. See [this section](index.html#using-multiple-modules) for more +information. + +On top of the rules listed in the link above, if a callback returns a value that should +cause the current operation to fail (e.g. if a callback checking an event returns with a +value that should cause the event to be denied), Synapse will fail the operation and +ignore any subsequent callbacks that should have been run after this one. + +The documentation for each callback mentions how Synapse behaves when +multiple modules implement it. + +## Handling the module's configuration + +A module can implement the following static method: + +```python +@staticmethod +def parse_config(config: dict) -> dict +``` + +This method is given a dictionary resulting from parsing the YAML configuration for the +module. It may modify it (for example by parsing durations expressed as strings (e.g. +"5d") into milliseconds, etc.), and return the modified dictionary. It may also verify +that the configuration is correct, and raise an instance of +`synapse.module_api.errors.ConfigError` if not. + +## Registering a web resource + +Modules can register web resources onto Synapse's web server using the following module +API method: + +```python +def ModuleApi.register_web_resource(path: str, resource: IResource) -> None +``` + +The path is the full absolute path to register the resource at. For example, if you +register a resource for the path `/_synapse/client/my_super_module/say_hello`, Synapse +will serve it at `http(s)://[HS_URL]/_synapse/client/my_super_module/say_hello`. Note +that Synapse does not allow registering resources for several sub-paths in the `/_matrix` +namespace (such as anything under `/_matrix/client` for example). It is strongly +recommended that modules register their web resources under the `/_synapse/client` +namespace. + +The provided resource is a Python class that implements Twisted's [IResource](https://twistedmatrix.com/documents/current/api/twisted.web.resource.IResource.html) +interface (such as [Resource](https://twistedmatrix.com/documents/current/api/twisted.web.resource.Resource.html)). + +Only one resource can be registered for a given path. If several modules attempt to +register a resource for the same path, the module that appears first in Synapse's +configuration file takes priority. + +Modules **must** register their web resources in their `__init__` method. + +## Registering a callback + +Modules can use Synapse's module API to register callbacks. Callbacks are functions that +Synapse will call when performing specific actions. Callbacks must be asynchronous (unless +specified otherwise), and are split in categories. A single module may implement callbacks +from multiple categories, and is under no obligation to implement all callbacks from the +categories it registers callbacks for. + +Modules can register callbacks using one of the module API's `register_[...]_callbacks` +methods. The callback functions are passed to these methods as keyword arguments, with +the callback name as the argument name and the function as its value. A +`register_[...]_callbacks` method exists for each category. + +Callbacks for each category can be found on their respective page of the +[Synapse documentation website](https://matrix-org.github.io/synapse). \ No newline at end of file diff --git a/docs/openid.md b/docs/openid.md index f685fd551acc..ff9de9d5b8bf 100644 --- a/docs/openid.md +++ b/docs/openid.md @@ -21,6 +21,8 @@ such as [Github][github-idp]. [google-idp]: https://developers.google.com/identity/protocols/oauth2/openid-connect [auth0]: https://auth0.com/ +[authentik]: https://goauthentik.io/ +[lemonldap]: https://lemonldap-ng.org/ [okta]: https://www.okta.com/ [dex-idp]: https://github.com/dexidp/dex [keycloak-idp]: https://www.keycloak.org/docs/latest/server_admin/#sso-protocols @@ -79,9 +81,9 @@ oidc_providers: display_name_template: "{{ user.name }}" ``` -### [Dex][dex-idp] +### Dex -[Dex][dex-idp] is a simple, open-source, certified OpenID Connect Provider. +[Dex][dex-idp] is a simple, open-source OpenID Connect Provider. Although it is designed to help building a full-blown provider with an external database, it can be configured with static passwords in a config file. @@ -117,7 +119,7 @@ oidc_providers: localpart_template: "{{ user.name }}" display_name_template: "{{ user.name|capitalize }}" ``` -### [Keycloak][keycloak-idp] +### Keycloak [Keycloak][keycloak-idp] is an opensource IdP maintained by Red Hat. @@ -166,7 +168,9 @@ oidc_providers: localpart_template: "{{ user.preferred_username }}" display_name_template: "{{ user.name }}" ``` -### [Auth0][auth0] +### Auth0 + +[Auth0][auth0] is a hosted SaaS IdP solution. 1. Create a regular web application for Synapse 2. Set the Allowed Callback URLs to `[synapse public baseurl]/_synapse/client/oidc/callback` @@ -207,9 +211,79 @@ oidc_providers: display_name_template: "{{ user.name }}" ``` +### Authentik + +[Authentik][authentik] is an open-source IdP solution. + +1. Create a provider in Authentik, with type OAuth2/OpenID. +2. The parameters are: +- Client Type: Confidential +- JWT Algorithm: RS256 +- Scopes: OpenID, Email and Profile +- RSA Key: Select any available key +- Redirect URIs: `[synapse public baseurl]/_synapse/client/oidc/callback` +3. Create an application for synapse in Authentik and link it to the provider. +4. Note the slug of your application, Client ID and Client Secret. + +Synapse config: +```yaml +oidc_providers: + - idp_id: authentik + idp_name: authentik + discover: true + issuer: "https://your.authentik.example.org/application/o/your-app-slug/" # TO BE FILLED: domain and slug + client_id: "your client id" # TO BE FILLED + client_secret: "your client secret" # TO BE FILLED + scopes: + - "openid" + - "profile" + - "email" + user_mapping_provider: + config: + localpart_template: "{{ user.preferred_username }}}" + display_name_template: "{{ user.preferred_username|capitalize }}" # TO BE FILLED: If your users have names in Authentik and you want those in Synapse, this should be replaced with user.name|capitalize. +``` + +### LemonLDAP + +[LemonLDAP::NG][lemonldap] is an open-source IdP solution. + +1. Create an OpenID Connect Relying Parties in LemonLDAP::NG +2. The parameters are: +- Client ID under the basic menu of the new Relying Parties (`Options > Basic > + Client ID`) +- Client secret (`Options > Basic > Client secret`) +- JWT Algorithm: RS256 within the security menu of the new Relying Parties + (`Options > Security > ID Token signature algorithm` and `Options > Security > + Access Token signature algorithm`) +- Scopes: OpenID, Email and Profile +- Allowed redirection addresses for login (`Options > Basic > Allowed + redirection addresses for login` ) : + `[synapse public baseurl]/_synapse/client/oidc/callback` + +Synapse config: +```yaml +oidc_providers: + - idp_id: lemonldap + idp_name: lemonldap + discover: true + issuer: "https://auth.example.org/" # TO BE FILLED: replace with your domain + client_id: "your client id" # TO BE FILLED + client_secret: "your client secret" # TO BE FILLED + scopes: + - "openid" + - "profile" + - "email" + user_mapping_provider: + config: + localpart_template: "{{ user.preferred_username }}}" + # TO BE FILLED: If your users have names in LemonLDAP::NG and you want those in Synapse, this should be replaced with user.name|capitalize or any valid filter. + display_name_template: "{{ user.preferred_username|capitalize }}" +``` + ### GitHub -GitHub is a bit special as it is not an OpenID Connect compliant provider, but +[GitHub][github-idp] is a bit special as it is not an OpenID Connect compliant provider, but just a regular OAuth2 provider. The [`/user` API endpoint](https://developer.github.com/v3/users/#get-the-authenticated-user) @@ -242,11 +316,13 @@ oidc_providers: display_name_template: "{{ user.name }}" ``` -### [Google][google-idp] +### Google + +[Google][google-idp] is an OpenID certified authentication and authorisation provider. 1. Set up a project in the Google API Console (see https://developers.google.com/identity/protocols/oauth2/openid-connect#appsetup). -2. add an "OAuth Client ID" for a Web Application under "Credentials". +2. Add an "OAuth Client ID" for a Web Application under "Credentials". 3. Copy the Client ID and Client Secret, and add the following to your synapse config: ```yaml oidc_providers: @@ -446,3 +522,51 @@ The synapse config will look like this: config: email_template: "{{ user.email }}" ``` + +### Django OAuth Toolkit + +[django-oauth-toolkit](https://github.com/jazzband/django-oauth-toolkit) is a +Django application providing out of the box all the endpoints, data and logic +needed to add OAuth2 capabilities to your Django projects. It supports +[OpenID Connect too](https://django-oauth-toolkit.readthedocs.io/en/latest/oidc.html). + +Configuration on Django's side: + +1. Add an application: https://example.com/admin/oauth2_provider/application/add/ and choose parameters like this: +* `Redirect uris`: https://synapse.example.com/_synapse/client/oidc/callback +* `Client type`: `Confidential` +* `Authorization grant type`: `Authorization code` +* `Algorithm`: `HMAC with SHA-2 256` +2. You can [customize the claims](https://django-oauth-toolkit.readthedocs.io/en/latest/oidc.html#customizing-the-oidc-responses) Django gives to synapse (optional): +
+ Code sample + + ```python + class CustomOAuth2Validator(OAuth2Validator): + + def get_additional_claims(self, request): + return { + "sub": request.user.email, + "email": request.user.email, + "first_name": request.user.first_name, + "last_name": request.user.last_name, + } + ``` +
+Your synapse config is then: + +```yaml +oidc_providers: + - idp_id: django_example + idp_name: "Django Example" + issuer: "https://example.com/o/" + client_id: "your-client-id" # CHANGE ME + client_secret: "your-client-secret" # CHANGE ME + scopes: ["openid"] + user_profile_method: "userinfo_endpoint" # needed because oauth-toolkit does not include user information in the authorization response + user_mapping_provider: + config: + localpart_template: "{{ user.email.split('@')[0] }}" + display_name_template: "{{ user.first_name }} {{ user.last_name }}" + email_template: "{{ user.email }}" +``` diff --git a/docs/other/running_synapse_on_single_board_computers.md b/docs/other/running_synapse_on_single_board_computers.md new file mode 100644 index 000000000000..ea14afa8b2df --- /dev/null +++ b/docs/other/running_synapse_on_single_board_computers.md @@ -0,0 +1,74 @@ +## Summary of performance impact of running on resource constrained devices such as SBCs + +I've been running my homeserver on a cubietruck at home now for some time and am often replying to statements like "you need loads of ram to join large rooms" with "it works fine for me". I thought it might be useful to curate a summary of the issues you're likely to run into to help as a scaling-down guide, maybe highlight these for development work or end up as documentation. It seems that once you get up to about 4x1.5GHz arm64 4GiB these issues are no longer a problem. + +- **Platform**: 2x1GHz armhf 2GiB ram [Single-board computers](https://wiki.debian.org/CheapServerBoxHardware), SSD, postgres. + +### Presence + +This is the main reason people have a poor matrix experience on resource constrained homeservers. Element web will frequently be saying the server is offline while the python process will be pegged at 100% cpu. This feature is used to tell when other users are active (have a client app in the foreground) and therefore more likely to respond, but requires a lot of network activity to maintain even when nobody is talking in a room. + +![Screenshot_2020-10-01_19-29-46](https://user-images.githubusercontent.com/71895/94848963-a47a3580-041c-11eb-8b6e-acb772b4259e.png) + +While synapse does have some performance issues with presence [#3971](https://github.com/matrix-org/synapse/issues/3971), the fundamental problem is that this is an easy feature to implement for a centralised service at nearly no overhead, but federation makes it combinatorial [#8055](https://github.com/matrix-org/synapse/issues/8055). There is also a client-side config option which disables the UI and idle tracking [enable_presence_by_hs_url] to blacklist the largest instances but I didn't notice much difference, so I recommend disabling the feature entirely at the server level as well. + +[enable_presence_by_hs_url]: https://github.com/vector-im/element-web/blob/v1.7.8/config.sample.json#L45 + +### Joining + +Joining a "large", federated room will initially fail with the below message in Element web, but waiting a while (10-60mins) and trying again will succeed without any issue. What counts as "large" is not message history, user count, connections to homeservers or even a simple count of the state events, it is instead how long the state resolution algorithm takes. However, each of those numbers are reasonable proxies, so we can use them as estimates since user count is one of the few things you see before joining. + +![Screenshot_2020-10-02_17-15-06](https://user-images.githubusercontent.com/71895/94945781-18771500-04d3-11eb-8419-83c2da73a341.png) + +This is [#1211](https://github.com/matrix-org/synapse/issues/1211) and will also hopefully be mitigated by peeking [matrix-org/matrix-doc#2753](https://github.com/matrix-org/matrix-doc/pull/2753) so at least you don't need to wait for a join to complete before finding out if it's the kind of room you want. Note that you should first disable presence, otherwise it'll just make the situation worse [#3120](https://github.com/matrix-org/synapse/issues/3120). There is a lot of database interaction too, so make sure you've [migrated your data](../postgres.md) from the default sqlite to postgresql. Personally, I recommend patience - once the initial join is complete there's rarely any issues with actually interacting with the room, but if you like you can just block "large" rooms entirely. + +### Sessions + +Anything that requires modifying the device list [#7721](https://github.com/matrix-org/synapse/issues/7721) will take a while to propagate, again taking the client "Offline" until it's complete. This includes signing in and out, editing the public name and verifying e2ee. The main mitigation I recommend is to keep long-running sessions open e.g. by using Firefox SSB "Use this site in App mode" or Chromium PWA "Install Element". + +### Recommended configuration + +Put the below in a new file at /etc/matrix-synapse/conf.d/sbc.yaml to override the defaults in homeserver.yaml. + +``` +# Set to false to disable presence tracking on this homeserver. +use_presence: false + +# When this is enabled, the room "complexity" will be checked before a user +# joins a new remote room. If it is above the complexity limit, the server will +# disallow joining, or will instantly leave. +limit_remote_rooms: + # Uncomment to enable room complexity checking. + #enabled: true + complexity: 3.0 + +# Database configuration +database: + name: psycopg2 + args: + user: matrix-synapse + # Generate a long, secure one with a password manager + password: hunter2 + database: matrix-synapse + host: localhost + cp_min: 5 + cp_max: 10 +``` + +Currently the complexity is measured by [current_state_events / 500](https://github.com/matrix-org/synapse/blob/v1.20.1/synapse/storage/databases/main/events_worker.py#L986). You can find join times and your most complex rooms like this: + +``` +admin@homeserver:~$ zgrep '/client/r0/join/' /var/log/matrix-synapse/homeserver.log* | awk '{print $18, $25}' | sort --human-numeric-sort +29.922sec/-0.002sec /_matrix/client/r0/join/%23debian-fasttrack%3Apoddery.com +182.088sec/0.003sec /_matrix/client/r0/join/%23decentralizedweb-general%3Amatrix.org +911.625sec/-570.847sec /_matrix/client/r0/join/%23synapse%3Amatrix.org + +admin@homeserver:~$ sudo --user postgres psql matrix-synapse --command 'select canonical_alias, joined_members, current_state_events from room_stats_state natural join room_stats_current where canonical_alias is not null order by current_state_events desc fetch first 5 rows only' + canonical_alias | joined_members | current_state_events +-------------------------------+----------------+---------------------- + #_oftc_#debian:matrix.org | 871 | 52355 + #matrix:matrix.org | 6379 | 10684 + #irc:matrix.org | 461 | 3751 + #decentralizedweb-general:matrix.org | 997 | 1509 + #whatsapp:maunium.net | 554 | 854 +``` \ No newline at end of file diff --git a/docs/password_auth_providers.md b/docs/password_auth_providers.md index d2cdb9b2f4a3..dc0dfffa21ab 100644 --- a/docs/password_auth_providers.md +++ b/docs/password_auth_providers.md @@ -1,3 +1,9 @@ +

+This page of the Synapse documentation is now deprecated. For up to date +documentation on setting up or writing a password auth provider module, please see +this page. +

+ # Password auth provider modules Password auth providers offer a way for server administrators to diff --git a/docs/postgres.md b/docs/postgres.md index 2c0a5b803a8b..e4861c1f127f 100644 --- a/docs/postgres.md +++ b/docs/postgres.md @@ -29,16 +29,20 @@ connect to a postgres database. Assuming your PostgreSQL database user is called `postgres`, first authenticate as the database user with: - su - postgres - # Or, if your system uses sudo to get administrative rights - sudo -u postgres bash +```sh +su - postgres +# Or, if your system uses sudo to get administrative rights +sudo -u postgres bash +``` Then, create a postgres user and a database with: - # this will prompt for a password for the new user - createuser --pwprompt synapse_user +```sh +# this will prompt for a password for the new user +createuser --pwprompt synapse_user - createdb --encoding=UTF8 --locale=C --template=template0 --owner=synapse_user synapse +createdb --encoding=UTF8 --locale=C --template=template0 --owner=synapse_user synapse +``` The above will create a user called `synapse_user`, and a database called `synapse`. @@ -114,6 +118,9 @@ performance: Note that the appropriate values for those fields depend on the amount of free memory the database host has available. +Additionally, admins of large deployments might want to consider using huge pages +to help manage memory, especially when using large values of `shared_buffers`. You +can read more about that [here](https://www.postgresql.org/docs/10/kernel-resources.html#LINUX-HUGE-PAGES). ## Porting from SQLite @@ -145,20 +152,26 @@ Firstly, shut down the currently running synapse server and copy its database file (typically `homeserver.db`) to another location. Once the copy is complete, restart synapse. For instance: - ./synctl stop - cp homeserver.db homeserver.db.snapshot - ./synctl start +```sh +./synctl stop +cp homeserver.db homeserver.db.snapshot +./synctl start +``` Copy the old config file into a new config file: - cp homeserver.yaml homeserver-postgres.yaml +```sh +cp homeserver.yaml homeserver-postgres.yaml +``` Edit the database section as described in the section *Synapse config* above and with the SQLite snapshot located at `homeserver.db.snapshot` simply run: - synapse_port_db --sqlite-database homeserver.db.snapshot \ - --postgres-config homeserver-postgres.yaml +```sh +synapse_port_db --sqlite-database homeserver.db.snapshot \ + --postgres-config homeserver-postgres.yaml +``` The flag `--curses` displays a coloured curses progress UI. @@ -170,16 +183,20 @@ To complete the conversion shut down the synapse server and run the port script one last time, e.g. if the SQLite database is at `homeserver.db` run: - synapse_port_db --sqlite-database homeserver.db \ - --postgres-config homeserver-postgres.yaml +```sh +synapse_port_db --sqlite-database homeserver.db \ + --postgres-config homeserver-postgres.yaml +``` Once that has completed, change the synapse config to point at the PostgreSQL database configuration file `homeserver-postgres.yaml`: - ./synctl stop - mv homeserver.yaml homeserver-old-sqlite.yaml - mv homeserver-postgres.yaml homeserver.yaml - ./synctl start +```sh +./synctl stop +mv homeserver.yaml homeserver-old-sqlite.yaml +mv homeserver-postgres.yaml homeserver.yaml +./synctl start +``` Synapse should now be running against PostgreSQL. diff --git a/docs/presence_router_module.md b/docs/presence_router_module.md index 4a3e720240c5..face54fe2bb3 100644 --- a/docs/presence_router_module.md +++ b/docs/presence_router_module.md @@ -1,3 +1,9 @@ +

+This page of the Synapse documentation is now deprecated. For up to date +documentation on setting up or writing a presence router module, please see +this page. +

+ # Presence Router Module Synapse supports configuring a module that can specify additional users diff --git a/docs/reverse_proxy.md b/docs/reverse_proxy.md index 76bb45aff2e1..f3b3aea732c7 100644 --- a/docs/reverse_proxy.md +++ b/docs/reverse_proxy.md @@ -33,13 +33,26 @@ Let's assume that we expect clients to connect to our server at `https://example.com:8448`. The following sections detail the configuration of the reverse proxy and the homeserver. + +## Homeserver Configuration + +The HTTP configuration will need to be updated for Synapse to correctly record +client IP addresses and generate redirect URLs while behind a reverse proxy. + +In `homeserver.yaml` set `x_forwarded: true` in the port 8008 section and +consider setting `bind_addresses: ['127.0.0.1']` so that the server only +listens to traffic on localhost. (Do not change `bind_addresses` to `127.0.0.1` +when using a containerized Synapse, as that will prevent it from responding +to proxied traffic.) + + ## Reverse-proxy configuration examples **NOTE**: You only need one of these. ### nginx -``` +```nginx server { listen 443 ssl http2; listen [::]:443 ssl http2; @@ -51,6 +64,9 @@ server { server_name matrix.example.com; location ~* ^(\/_matrix|\/_synapse\/client) { + # note: do not add a path (even a single /) after the port in `proxy_pass`, + # otherwise nginx will canonicalise the URI and cause signature verification + # errors. proxy_pass http://localhost:8008; proxy_set_header X-Forwarded-For $remote_addr; proxy_set_header X-Forwarded-Proto $scheme; @@ -63,10 +79,7 @@ server { } ``` -**NOTE**: Do not add a path after the port in `proxy_pass`, otherwise nginx will -canonicalise/normalise the URI. - -### Caddy 1 +### Caddy v1 ``` matrix.example.com { @@ -86,7 +99,7 @@ example.com:8448 { } ``` -### Caddy 2 +### Caddy v2 ``` matrix.example.com { @@ -128,7 +141,7 @@ matrix.example.com { ### Apache -``` +```apache SSLEngine on ServerName matrix.example.com @@ -157,7 +170,7 @@ matrix.example.com { **NOTE 2**: It appears that Synapse is currently incompatible with the ModSecurity module for Apache (`mod_security2`). If you need it enabled for other services on your web server, you can disable it for Synapse's two VirtualHosts by including the following lines before each of the two `` above: -``` +```apache SecRuleEngine off @@ -175,7 +188,7 @@ frontend https http-request set-header X-Forwarded-For %[src] # Matrix client traffic - acl matrix-host hdr(host) -i matrix.example.com + acl matrix-host hdr(host) -i matrix.example.com matrix.example.com:443 acl matrix-path path_beg /_matrix acl matrix-path path_beg /_synapse/client @@ -239,16 +252,6 @@ relay "matrix_federation" { } ``` -## Homeserver Configuration - -You will also want to set `bind_addresses: ['127.0.0.1']` and -`x_forwarded: true` for port 8008 in `homeserver.yaml` to ensure that -client IP addresses are recorded correctly. - -Having done so, you can then use `https://matrix.example.com` (instead -of `https://matrix.example.com:8448`) as the "Custom server" when -connecting to Synapse from a client. - ## Health check endpoint diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 1a217f35dba9..6696ed5d1ef9 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -91,8 +91,28 @@ pid_file: DATADIR/homeserver.pid # Otherwise, it should be the URL to reach Synapse's client HTTP listener (see # 'listeners' below). # +# Defaults to 'https:///'. +# #public_baseurl: https://example.com/ +# Uncomment the following to tell other servers to send federation traffic on +# port 443. +# +# By default, other servers will try to reach our server on port 8448, which can +# be inconvenient in some environments. +# +# Provided 'https:///' on port 443 is routed to Synapse, this +# option configures Synapse to serve a file at +# 'https:///.well-known/matrix/server'. This will tell other +# servers to send traffic to port 443 instead. +# +# See https://matrix-org.github.io/synapse/latest/delegate.html for more +# information. +# +# Defaults to 'false'. +# +#serve_server_wellknown: true + # Set the soft limit on the number of file descriptors synapse can use # Zero is used to indicate synapse should set the soft limit to the # hard limit. @@ -108,20 +128,6 @@ presence: # #enabled: false - # Presence routers are third-party modules that can specify additional logic - # to where presence updates from users are routed. - # - presence_router: - # The custom module's class. Uncomment to use a custom presence router module. - # - #module: "my_custom_router.PresenceRouter" - - # Configuration options of the custom module. Refer to your module's - # documentation for available options. - # - #config: - # example_option: 'something' - # Whether to require authentication to retrieve profile data (avatars, # display names) of other users through the client API. Defaults to # 'false'. Note that profile data is also available via the federation @@ -210,6 +216,8 @@ presence: # # This option replaces federation_ip_range_blacklist in Synapse v1.25.0. # +# Note: The value is ignored when an HTTP proxy is in use +# #ip_range_blacklist: # - '127.0.0.0/8' # - '10.0.0.0/8' @@ -347,6 +355,24 @@ listeners: # bind_addresses: ['::1', '127.0.0.1'] # type: manhole +# Connection settings for the manhole +# +manhole_settings: + # The username for the manhole. This defaults to 'matrix'. + # + #username: manhole + + # The password for the manhole. This defaults to 'rabbithole'. + # + #password: mypassword + + # The private and public SSH key pair used to encrypt the manhole traffic. + # If these are left unset, then hardcoded and non-secret keys are used, + # which could allow traffic to be intercepted if sent over a public network. + # + #ssh_priv_key_path: CONFDIR/id_rsa + #ssh_pub_key_path: CONFDIR/id_rsa.pub + # Forward extremities can build up in a room due to networking delays between # homeservers. Once this happens in a large room, calculation of the state of # that room can become quite expensive. To mitigate this, once the number of @@ -466,6 +492,48 @@ limit_remote_rooms: # #user_ips_max_age: 14d +# Inhibits the /requestToken endpoints from returning an error that might leak +# information about whether an e-mail address is in use or not on this +# homeserver. +# Note that for some endpoints the error situation is the e-mail already being +# used, and for others the error is entering the e-mail being unused. +# If this option is enabled, instead of returning an error, these endpoints will +# act as if no error happened and return a fake session ID ('sid') to clients. +# +#request_token_inhibit_3pid_errors: true + +# A list of domains that the domain portion of 'next_link' parameters +# must match. +# +# This parameter is optionally provided by clients while requesting +# validation of an email or phone number, and maps to a link that +# users will be automatically redirected to after validation +# succeeds. Clients can make use this parameter to aid the validation +# process. +# +# The whitelist is applied whether the homeserver or an +# identity server is handling validation. +# +# The default value is no whitelist functionality; all domains are +# allowed. Setting this value to an empty list will instead disallow +# all domains. +# +#next_link_domain_whitelist: ["matrix.org"] + +# Templates to use when generating email or HTML page contents. +# +templates: + # Directory in which Synapse will try to find template files to use to generate + # email or HTML page contents. + # If not set, or a file is not found within the template directory, a default + # template from within the Synapse package will be used. + # + # See https://matrix-org.github.io/synapse/latest/templates.html for more + # information about using custom templates. + # + #custom_template_directory: /path/to/custom/templates/ + + # Message retention policy at the server level. # # Room admins and mods can define a retention period for their rooms using the @@ -535,34 +603,6 @@ retention: # - shortest_max_lifetime: 3d # interval: 1d -# Inhibits the /requestToken endpoints from returning an error that might leak -# information about whether an e-mail address is in use or not on this -# homeserver. -# Note that for some endpoints the error situation is the e-mail already being -# used, and for others the error is entering the e-mail being unused. -# If this option is enabled, instead of returning an error, these endpoints will -# act as if no error happened and return a fake session ID ('sid') to clients. -# -#request_token_inhibit_3pid_errors: true - -# A list of domains that the domain portion of 'next_link' parameters -# must match. -# -# This parameter is optionally provided by clients while requesting -# validation of an email or phone number, and maps to a link that -# users will be automatically redirected to after validation -# succeeds. Clients can make use this parameter to aid the validation -# process. -# -# The whitelist is applied whether the homeserver or an -# identity server is handling validation. -# -# The default value is no whitelist functionality; all domains are -# allowed. Setting this value to an empty list will instead disallow -# all domains. -# -#next_link_domain_whitelist: ["matrix.org"] - ## TLS ## @@ -607,8 +647,8 @@ retention: # #federation_certificate_verification_whitelist: # - lon.example.com -# - *.domain.com -# - *.onion +# - "*.domain.com" +# - "*.onion" # List of custom certificate authorities for federation traffic. # @@ -711,6 +751,15 @@ caches: # #expiry_time: 30m + # Controls how long the results of a /sync request are cached for after + # a successful response is returned. A higher duration can help clients with + # intermittent connections, at the cost of higher memory usage. + # + # By default, this is zero, which means that sync responses are not cached + # at all. + # + #sync_response_cache_duration: 2m + ## Database ## @@ -783,6 +832,8 @@ log_config: "CONFDIR/SERVERNAME.log.config" # is using # - one for registration that ratelimits registration requests based on the # client's IP address. +# - one for checking the validity of registration tokens that ratelimits +# requests based on the client's IP address. # - one for login that ratelimits login requests based on the client's IP # address. # - one for login that ratelimits login requests based on the account the @@ -811,6 +862,10 @@ log_config: "CONFDIR/SERVERNAME.log.config" # per_second: 0.17 # burst_count: 3 # +#rc_registration_token_validity: +# per_second: 0.1 +# burst_count: 5 +# #rc_login: # address: # per_second: 0.17 @@ -963,6 +1018,8 @@ media_store_path: "DATADIR/media_store" # This must be specified if url_preview_enabled is set. It is recommended that # you uncomment the following list as a starting point. # +# Note: The value is ignored when an HTTP proxy is in use +# #url_preview_ip_range_blacklist: # - '127.0.0.0/8' # - '10.0.0.0/8' @@ -1057,6 +1114,27 @@ url_preview_accept_language: # - en +# oEmbed allows for easier embedding content from a website. It can be +# used for generating URLs previews of services which support it. +# +oembed: + # A default list of oEmbed providers is included with Synapse. + # + # Uncomment the following to disable using these default oEmbed URLs. + # Defaults to 'false'. + # + #disable_default_providers: true + + # Additional files with oEmbed configuration (each should be in the + # form of providers.json). + # + # By default, this list is empty (so only the default providers.json + # is used). + # + #additional_providers: + # - oembed/my_providers.json + + ## Captcha ## # See docs/CAPTCHA_SETUP.md for full details of configuring this. @@ -1131,6 +1209,44 @@ url_preview_accept_language: # #session_lifetime: 24h +# Time that an access token remains valid for, if the session is +# using refresh tokens. +# For more information about refresh tokens, please see the manual. +# Note that this only applies to clients which advertise support for +# refresh tokens. +# +# Note also that this is calculated at login time and refresh time: +# changes are not applied to existing sessions until they are refreshed. +# +# By default, this is 5 minutes. +# +#refreshable_access_token_lifetime: 5m + +# Time that a refresh token remains valid for (provided that it is not +# exchanged for another one first). +# This option can be used to automatically log-out inactive sessions. +# Please see the manual for more information. +# +# Note also that this is calculated at login time and refresh time: +# changes are not applied to existing sessions until they are refreshed. +# +# By default, this is infinite. +# +#refresh_token_lifetime: 24h + +# Time that an access token remains valid for, if the session is NOT +# using refresh tokens. +# Please note that not all clients support refresh tokens, so setting +# this to a short value may be inconvenient for some users who will +# then be logged out frequently. +# +# Note also that this is calculated at login time: changes are not applied +# retrospectively to existing sessions for users that have already logged in. +# +# By default, this is infinite. +# +#nonrefreshable_access_token_lifetime: 24h + # The user must provide all of the below types of 3PID when registering. # #registrations_require_3pid: @@ -1157,6 +1273,15 @@ url_preview_accept_language: # #enable_3pid_lookup: true +# Require users to submit a token during registration. +# Tokens can be managed using the admin API: +# https://matrix-org.github.io/synapse/latest/usage/administration/admin_api/registration_tokens.html +# Note that `enable_registration` must be set to `true`. +# Disabling this option will not delete any tokens previously generated. +# Defaults to false. Uncomment the following to require tokens: +# +#registration_requires_token: true + # If set, allows registration of standard or admin accounts by anyone who # has the shared secret, even if registration is otherwise disabled. # @@ -1180,7 +1305,7 @@ url_preview_accept_language: # in on this server. # # (By default, no suggestion is made, so it is left up to the client. -# This setting is ignored unless public_baseurl is also set.) +# This setting is ignored unless public_baseurl is also explicitly set.) # #default_identity_server: https://matrix.org @@ -1205,8 +1330,6 @@ url_preview_accept_language: # by the Matrix Identity Service API specification: # https://matrix.org/docs/spec/identity_service/latest # -# If a delegate is specified, the config option public_baseurl must also be filled out. -# account_threepid_delegates: #email: https://example.com # Delegate email sending to example.com #msisdn: http://localhost:8090 # Delegate SMS sending to this local process @@ -1882,6 +2005,9 @@ cas_config: # Additional settings to use with single-sign on systems such as OpenID Connect, # SAML2 and CAS. # +# Server admins can configure custom templates for pages related to SSO. See +# https://matrix-org.github.io/synapse/latest/templates.html for more information. +# sso: # A list of client URLs which are whitelisted so that the user does not # have to confirm giving access to their account to the URL. Any client @@ -1893,11 +2019,10 @@ sso: # phishing attacks from evil.site. To avoid this, include a slash after the # hostname: "https://my.client/". # - # If public_baseurl is set, then the login fallback page (used by clients - # that don't natively support the required login flows) is whitelisted in - # addition to any URLs in this list. + # The login fallback page (used by clients that don't natively support the + # required login flows) is whitelisted in addition to any URLs in this list. # - # By default, this list is empty. + # By default, this list contains only the login fallback page. # #client_whitelist: # - https://riot.im/develop @@ -1914,169 +2039,6 @@ sso: # #update_profile_information: true - # Directory in which Synapse will try to find the template files below. - # If not set, or the files named below are not found within the template - # directory, default templates from within the Synapse package will be used. - # - # Synapse will look for the following templates in this directory: - # - # * HTML page to prompt the user to choose an Identity Provider during - # login: 'sso_login_idp_picker.html'. - # - # This is only used if multiple SSO Identity Providers are configured. - # - # When rendering, this template is given the following variables: - # * redirect_url: the URL that the user will be redirected to after - # login. - # - # * server_name: the homeserver's name. - # - # * providers: a list of available Identity Providers. Each element is - # an object with the following attributes: - # - # * idp_id: unique identifier for the IdP - # * idp_name: user-facing name for the IdP - # * idp_icon: if specified in the IdP config, an MXC URI for an icon - # for the IdP - # * idp_brand: if specified in the IdP config, a textual identifier - # for the brand of the IdP - # - # The rendered HTML page should contain a form which submits its results - # back as a GET request, with the following query parameters: - # - # * redirectUrl: the client redirect URI (ie, the `redirect_url` passed - # to the template) - # - # * idp: the 'idp_id' of the chosen IDP. - # - # * HTML page to prompt new users to enter a userid and confirm other - # details: 'sso_auth_account_details.html'. This is only shown if the - # SSO implementation (with any user_mapping_provider) does not return - # a localpart. - # - # When rendering, this template is given the following variables: - # - # * server_name: the homeserver's name. - # - # * idp: details of the SSO Identity Provider that the user logged in - # with: an object with the following attributes: - # - # * idp_id: unique identifier for the IdP - # * idp_name: user-facing name for the IdP - # * idp_icon: if specified in the IdP config, an MXC URI for an icon - # for the IdP - # * idp_brand: if specified in the IdP config, a textual identifier - # for the brand of the IdP - # - # * user_attributes: an object containing details about the user that - # we received from the IdP. May have the following attributes: - # - # * display_name: the user's display_name - # * emails: a list of email addresses - # - # The template should render a form which submits the following fields: - # - # * username: the localpart of the user's chosen user id - # - # * HTML page allowing the user to consent to the server's terms and - # conditions. This is only shown for new users, and only if - # `user_consent.require_at_registration` is set. - # - # When rendering, this template is given the following variables: - # - # * server_name: the homeserver's name. - # - # * user_id: the user's matrix proposed ID. - # - # * user_profile.display_name: the user's proposed display name, if any. - # - # * consent_version: the version of the terms that the user will be - # shown - # - # * terms_url: a link to the page showing the terms. - # - # The template should render a form which submits the following fields: - # - # * accepted_version: the version of the terms accepted by the user - # (ie, 'consent_version' from the input variables). - # - # * HTML page for a confirmation step before redirecting back to the client - # with the login token: 'sso_redirect_confirm.html'. - # - # When rendering, this template is given the following variables: - # - # * redirect_url: the URL the user is about to be redirected to. - # - # * display_url: the same as `redirect_url`, but with the query - # parameters stripped. The intention is to have a - # human-readable URL to show to users, not to use it as - # the final address to redirect to. - # - # * server_name: the homeserver's name. - # - # * new_user: a boolean indicating whether this is the user's first time - # logging in. - # - # * user_id: the user's matrix ID. - # - # * user_profile.avatar_url: an MXC URI for the user's avatar, if any. - # None if the user has not set an avatar. - # - # * user_profile.display_name: the user's display name. None if the user - # has not set a display name. - # - # * HTML page which notifies the user that they are authenticating to confirm - # an operation on their account during the user interactive authentication - # process: 'sso_auth_confirm.html'. - # - # When rendering, this template is given the following variables: - # * redirect_url: the URL the user is about to be redirected to. - # - # * description: the operation which the user is being asked to confirm - # - # * idp: details of the Identity Provider that we will use to confirm - # the user's identity: an object with the following attributes: - # - # * idp_id: unique identifier for the IdP - # * idp_name: user-facing name for the IdP - # * idp_icon: if specified in the IdP config, an MXC URI for an icon - # for the IdP - # * idp_brand: if specified in the IdP config, a textual identifier - # for the brand of the IdP - # - # * HTML page shown after a successful user interactive authentication session: - # 'sso_auth_success.html'. - # - # Note that this page must include the JavaScript which notifies of a successful authentication - # (see https://matrix.org/docs/spec/client_server/r0.6.0#fallback). - # - # This template has no additional variables. - # - # * HTML page shown after a user-interactive authentication session which - # does not map correctly onto the expected user: 'sso_auth_bad_user.html'. - # - # When rendering, this template is given the following variables: - # * server_name: the homeserver's name. - # * user_id_to_verify: the MXID of the user that we are trying to - # validate. - # - # * HTML page shown during single sign-on if a deactivated user (according to Synapse's database) - # attempts to login: 'sso_account_deactivated.html'. - # - # This template has no additional variables. - # - # * HTML page to display to users if something goes wrong during the - # OpenID Connect authentication process: 'sso_error.html'. - # - # When rendering, this template is given two variables: - # * error: the technical name of the error - # * error_description: a human-readable message for the error - # - # You can see the default templates at: - # https://github.com/matrix-org/synapse/tree/master/synapse/res/templates - # - #template_dir: "res/templates" - # JSON web token integration. The following settings can be used to make # Synapse JSON web tokens for authentication, instead of its internal @@ -2115,6 +2077,12 @@ sso: # #algorithm: "provided-by-your-issuer" + # Name of the claim containing a unique identifier for the user. + # + # Optional, defaults to `sub`. + # + #subject_claim: "sub" + # The issuer to validate the "iss" claim against. # # Optional, if provided the "iss" claim will be required and @@ -2180,7 +2148,7 @@ password_config: # #require_lowercase: true - # Whether a password must contain at least one lowercase letter. + # Whether a password must contain at least one uppercase letter. # Defaults to 'false'. # #require_uppercase: true @@ -2207,6 +2175,9 @@ ui_auth: # Configuration for sending emails from Synapse. # +# Server admins can configure custom templates for email content. See +# https://matrix-org.github.io/synapse/latest/templates.html for more information. +# email: # The hostname of the outgoing SMTP server to use. Defaults to 'localhost'. # @@ -2229,6 +2200,14 @@ email: # #require_transport_security: true + # Uncomment the following to disable TLS for SMTP. + # + # By default, if the server supports TLS, it will be used, and the server + # must present a certificate that is valid for 'smtp_host'. If this option + # is set to false, TLS will not be used. + # + #enable_tls: false + # notif_from defines the "From" address to use when sending emails. # It must be set if email sending is enabled. # @@ -2275,49 +2254,6 @@ email: # #invite_client_location: https://app.element.io - # Directory in which Synapse will try to find the template files below. - # If not set, or the files named below are not found within the template - # directory, default templates from within the Synapse package will be used. - # - # Synapse will look for the following templates in this directory: - # - # * The contents of email notifications of missed events: 'notif_mail.html' and - # 'notif_mail.txt'. - # - # * The contents of account expiry notice emails: 'notice_expiry.html' and - # 'notice_expiry.txt'. - # - # * The contents of password reset emails sent by the homeserver: - # 'password_reset.html' and 'password_reset.txt' - # - # * An HTML page that a user will see when they follow the link in the password - # reset email. The user will be asked to confirm the action before their - # password is reset: 'password_reset_confirmation.html' - # - # * HTML pages for success and failure that a user will see when they confirm - # the password reset flow using the page above: 'password_reset_success.html' - # and 'password_reset_failure.html' - # - # * The contents of address verification emails sent during registration: - # 'registration.html' and 'registration.txt' - # - # * HTML pages for success and failure that a user will see when they follow - # the link in an address verification email sent during registration: - # 'registration_success.html' and 'registration_failure.html' - # - # * The contents of address verification emails sent when an address is added - # to a Matrix account: 'add_threepid.html' and 'add_threepid.txt' - # - # * HTML pages for success and failure that a user will see when they follow - # the link in an address verification email sent when an address is added - # to a Matrix account: 'add_threepid_success.html' and - # 'add_threepid_failure.html' - # - # You can see the default templates at: - # https://github.com/matrix-org/synapse/tree/master/synapse/res/templates - # - #template_dir: "res/templates" - # Subjects to use when sending emails from Synapse. # # The placeholder '%(app)s' will be replaced with the value of the 'app_name' @@ -2386,34 +2322,6 @@ email: #email_validation: "[%(server_name)s] Validate your email" -# Password providers allow homeserver administrators to integrate -# their Synapse installation with existing authentication methods -# ex. LDAP, external tokens, etc. -# -# For more information and known implementations, please see -# https://matrix-org.github.io/synapse/latest/password_auth_providers.html -# -# Note: instances wishing to use SAML or CAS authentication should -# instead use the `saml2_config` or `cas_config` options, -# respectively. -# -password_providers: -# # Example config for an LDAP auth provider -# - module: "ldap_auth_provider.LdapAuthProvider" -# config: -# enabled: true -# uri: "ldap://ldap.example.com:389" -# start_tls: true -# base: "ou=users,dc=example,dc=com" -# attributes: -# uid: "cn" -# mail: "email" -# name: "givenName" -# #bind_dn: -# #bind_password: -# #filter: "(objectClass=posixAccount)" - - ## Push ## @@ -2488,12 +2396,16 @@ user_directory: #enabled: false # Defines whether to search all users visible to your HS when searching - # the user directory, rather than limiting to users visible in public - # rooms. Defaults to false. - # - # If you set it true, you'll have to rebuild the user_directory search - # indexes, see: - # https://matrix-org.github.io/synapse/latest/user_directory.html + # the user directory. If false, search results will only contain users + # visible in public rooms and users sharing a room with the requester. + # Defaults to false. + # + # NB. If you set this to true, and the last time the user_directory search + # indexes were (re)built was before Synapse 1.44, you'll have to + # rebuild the indexes in order to search through all known users. + # These indexes are built the first time Synapse starts; admins can + # manually trigger a rebuild via API following the instructions at + # https://matrix-org.github.io/synapse/latest/usage/administration/admin_api/background_updates.html#run # # Uncomment to return search results containing all known users, even if that # user does not share a room with the requester. diff --git a/docs/sample_log_config.yaml b/docs/sample_log_config.yaml index 669e60008113..2485ad25edfc 100644 --- a/docs/sample_log_config.yaml +++ b/docs/sample_log_config.yaml @@ -24,18 +24,31 @@ handlers: backupCount: 3 # Does not include the current log file. encoding: utf8 - # Default to buffering writes to log file for efficiency. This means that - # will be a delay for INFO/DEBUG logs to get written, but WARNING/ERROR - # logs will still be flushed immediately. + # Default to buffering writes to log file for efficiency. + # WARNING/ERROR logs will still be flushed immediately, but there will be a + # delay (of up to `period` seconds, or until the buffer is full with + # `capacity` messages) before INFO/DEBUG logs get written. buffer: - class: logging.handlers.MemoryHandler + class: synapse.logging.handlers.PeriodicallyFlushingMemoryHandler target: file - # The capacity is the number of log lines that are buffered before - # being written to disk. Increasing this will lead to better + + # The capacity is the maximum number of log lines that are buffered + # before being written to disk. Increasing this will lead to better # performance, at the expensive of it taking longer for log lines to # be written to disk. + # This parameter is required. capacity: 10 - flushLevel: 30 # Flush for WARNING logs as well + + # Logs with a level at or above the flush level will cause the buffer to + # be flushed immediately. + # Default value: 40 (ERROR) + # Other values: 50 (CRITICAL), 30 (WARNING), 20 (INFO), 10 (DEBUG) + flushLevel: 30 # Flush immediately for WARNING logs and higher + + # The period of time, in seconds, between forced flushes. + # Messages will not be delayed for longer than this time. + # Default value: 5 seconds + period: 5 # A handler that writes logs to stderr. Unused by default, but can be used # instead of "buffer" and "file" in the logger handlers. diff --git a/docs/setup/forward_proxy.md b/docs/setup/forward_proxy.md new file mode 100644 index 000000000000..494c14893b26 --- /dev/null +++ b/docs/setup/forward_proxy.md @@ -0,0 +1,74 @@ +# Using a forward proxy with Synapse + +You can use Synapse with a forward or outbound proxy. An example of when +this is necessary is in corporate environments behind a DMZ (demilitarized zone). +Synapse supports routing outbound HTTP(S) requests via a proxy. Only HTTP(S) +proxy is supported, not SOCKS proxy or anything else. + +## Configure + +The `http_proxy`, `https_proxy`, `no_proxy` environment variables are used to +specify proxy settings. The environment variable is not case sensitive. +- `http_proxy`: Proxy server to use for HTTP requests. +- `https_proxy`: Proxy server to use for HTTPS requests. +- `no_proxy`: Comma-separated list of hosts, IP addresses, or IP ranges in CIDR + format which should not use the proxy. Synapse will directly connect to these hosts. + +The `http_proxy` and `https_proxy` environment variables have the form: `[scheme://][:@][:]` +- Supported schemes are `http://` and `https://`. The default scheme is `http://` + for compatibility reasons; it is recommended to set a scheme. If scheme is set + to `https://` the connection uses TLS between Synapse and the proxy. + + **NOTE**: Synapse validates the certificates. If the certificate is not + valid, then the connection is dropped. +- Default port if not given is `1080`. +- Username and password are optional and will be used to authenticate against + the proxy. + +**Examples** +- HTTP_PROXY=http://USERNAME:PASSWORD@10.0.1.1:8080/ +- HTTPS_PROXY=http://USERNAME:PASSWORD@proxy.example.com:8080/ +- NO_PROXY=master.hostname.example.com,10.1.0.0/16,172.30.0.0/16 + +**NOTE**: +Synapse does not apply the IP blacklist to connections through the proxy (since +the DNS resolution is done by the proxy). It is expected that the proxy or firewall +will apply blacklisting of IP addresses. + +## Connection types + +The proxy will be **used** for: + +- push +- url previews +- phone-home stats +- recaptcha validation +- CAS auth validation +- OpenID Connect +- Outbound federation +- Federation (checking public key revocation) +- Fetching public keys of other servers +- Downloading remote media + +It will **not be used** for: + +- Application Services +- Identity servers +- In worker configurations + - connections between workers + - connections from workers to Redis + +## Troubleshooting + +If a proxy server is used with TLS (HTTPS) and no connections are established, +it is most likely due to the proxy's certificates. To test this, the validation +in Synapse can be deactivated. + +**NOTE**: This has an impact on security and is for testing purposes only! + +To deactivate the certificate validation, the following setting must be made in +[homserver.yaml](../usage/configuration/homeserver_sample_config.md). + +```yaml +use_insecure_ssl_client_just_for_testing_do_not_use: true +``` diff --git a/docs/setup/installation.md b/docs/setup/installation.md index 8540a7b0c10d..16562be95388 100644 --- a/docs/setup/installation.md +++ b/docs/setup/installation.md @@ -1,44 +1,5 @@ # Installation Instructions -There are 3 steps to follow under **Installation Instructions**. - -- [Installation Instructions](#installation-instructions) - - [Choosing your server name](#choosing-your-server-name) - - [Installing Synapse](#installing-synapse) - - [Installing from source](#installing-from-source) - - [Platform-specific prerequisites](#platform-specific-prerequisites) - - [Debian/Ubuntu/Raspbian](#debianubunturaspbian) - - [ArchLinux](#archlinux) - - [CentOS/Fedora](#centosfedora) - - [macOS](#macos) - - [OpenSUSE](#opensuse) - - [OpenBSD](#openbsd) - - [Windows](#windows) - - [Prebuilt packages](#prebuilt-packages) - - [Docker images and Ansible playbooks](#docker-images-and-ansible-playbooks) - - [Debian/Ubuntu](#debianubuntu) - - [Matrix.org packages](#matrixorg-packages) - - [Downstream Debian packages](#downstream-debian-packages) - - [Downstream Ubuntu packages](#downstream-ubuntu-packages) - - [Fedora](#fedora) - - [OpenSUSE](#opensuse-1) - - [SUSE Linux Enterprise Server](#suse-linux-enterprise-server) - - [ArchLinux](#archlinux-1) - - [Void Linux](#void-linux) - - [FreeBSD](#freebsd) - - [OpenBSD](#openbsd-1) - - [NixOS](#nixos) - - [Setting up Synapse](#setting-up-synapse) - - [Using PostgreSQL](#using-postgresql) - - [TLS certificates](#tls-certificates) - - [Client Well-Known URI](#client-well-known-uri) - - [Email](#email) - - [Registering a user](#registering-a-user) - - [Setting up a TURN server](#setting-up-a-turn-server) - - [URL previews](#url-previews) - - [Troubleshooting Installation](#troubleshooting-installation) - - ## Choosing your server name It is important to choose the name for your server before you install Synapse, @@ -57,19 +18,185 @@ that your email address is probably `user@example.com` rather than ## Installing Synapse -### Installing from source +### Prebuilt packages + +Prebuilt packages are available for a number of platforms. These are recommended +for most users. + +#### Docker images and Ansible playbooks + +There is an official synapse image available at + which can be used with +the docker-compose file available at +[contrib/docker](https://github.com/matrix-org/synapse/tree/develop/contrib/docker). +Further information on this including configuration options is available in the README +on hub.docker.com. + +Alternatively, Andreas Peters (previously Silvio Fricke) has contributed a +Dockerfile to automate a synapse server in a single Docker image, at + + +Slavi Pantaleev has created an Ansible playbook, +which installs the offical Docker image of Matrix Synapse +along with many other Matrix-related services (Postgres database, Element, coturn, +ma1sd, SSL support, etc.). +For more details, see + + +#### Debian/Ubuntu + +##### Matrix.org packages + +Matrix.org provides Debian/Ubuntu packages of Synapse, for the amd64 +architecture via . + +To install the latest release: + +```sh +sudo apt install -y lsb-release wget apt-transport-https +sudo wget -O /usr/share/keyrings/matrix-org-archive-keyring.gpg https://packages.matrix.org/debian/matrix-org-archive-keyring.gpg +echo "deb [signed-by=/usr/share/keyrings/matrix-org-archive-keyring.gpg] https://packages.matrix.org/debian/ $(lsb_release -cs) main" | + sudo tee /etc/apt/sources.list.d/matrix-org.list +sudo apt update +sudo apt install matrix-synapse-py3 +``` + +Packages are also published for release candidates. To enable the prerelease +channel, add `prerelease` to the `sources.list` line. For example: + +```sh +sudo wget -O /usr/share/keyrings/matrix-org-archive-keyring.gpg https://packages.matrix.org/debian/matrix-org-archive-keyring.gpg +echo "deb [signed-by=/usr/share/keyrings/matrix-org-archive-keyring.gpg] https://packages.matrix.org/debian/ $(lsb_release -cs) main prerelease" | + sudo tee /etc/apt/sources.list.d/matrix-org.list +sudo apt update +sudo apt install matrix-synapse-py3 +``` + +The fingerprint of the repository signing key (as shown by `gpg +/usr/share/keyrings/matrix-org-archive-keyring.gpg`) is +`AAF9AE843A7584B5A3E4CD2BCF45A512DE2DA058`. + +When installing with Debian packages, you might prefer to place files in +`/etc/matrix-synapse/conf.d/` to override your configuration without editing +the main configuration file at `/etc/matrix-synapse/homeserver.yaml`. +By doing that, you won't be asked if you want to replace your configuration +file when you upgrade the Debian package to a later version. + +##### Downstream Debian packages + +We do not recommend using the packages from the default Debian `buster` +repository at this time, as they are old and suffer from known security +vulnerabilities. You can install the latest version of Synapse from +[our repository](#matrixorg-packages) or from `buster-backports`. Please +see the [Debian documentation](https://backports.debian.org/Instructions/) +for information on how to use backports. + +If you are using Debian `sid` or testing, Synapse is available in the default +repositories and it should be possible to install it simply with: + +```sh +sudo apt install matrix-synapse +``` + +##### Downstream Ubuntu packages + +We do not recommend using the packages in the default Ubuntu repository +at this time, as they are old and suffer from known security vulnerabilities. +The latest version of Synapse can be installed from [our repository](#matrixorg-packages). + +#### Fedora + +Synapse is in the Fedora repositories as `matrix-synapse`: + +```sh +sudo dnf install matrix-synapse +``` + +Oleg Girko provides Fedora RPMs at + + +#### OpenSUSE + +Synapse is in the OpenSUSE repositories as `matrix-synapse`: + +```sh +sudo zypper install matrix-synapse +``` + +#### SUSE Linux Enterprise Server + +Unofficial package are built for SLES 15 in the openSUSE:Backports:SLE-15 repository at + + +#### ArchLinux + +The quickest way to get up and running with ArchLinux is probably with the community package +, which should pull in most of +the necessary dependencies. + +pip may be outdated (6.0.7-1 and needs to be upgraded to 6.0.8-1 ): + +```sh +sudo pip install --upgrade pip +``` + +If you encounter an error with lib bcrypt causing an Wrong ELF Class: +ELFCLASS32 (x64 Systems), you may need to reinstall py-bcrypt to correctly +compile it under the right architecture. (This should not be needed if +installing under virtualenv): + +```sh +sudo pip uninstall py-bcrypt +sudo pip install py-bcrypt +``` + +#### Void Linux + +Synapse can be found in the void repositories as 'synapse': + +```sh +xbps-install -Su +xbps-install -S synapse +``` + +#### FreeBSD + +Synapse can be installed via FreeBSD Ports or Packages contributed by Brendan Molloy from: + +- Ports: `cd /usr/ports/net-im/py-matrix-synapse && make install clean` +- Packages: `pkg install py37-matrix-synapse` + +#### OpenBSD + +As of OpenBSD 6.7 Synapse is available as a pre-compiled binary. The filesystem +underlying the homeserver directory (defaults to `/var/synapse`) has to be +mounted with `wxallowed` (cf. `mount(8)`), so creating a separate filesystem +and mounting it to `/var/synapse` should be taken into consideration. + +Installing Synapse: + +```sh +doas pkg_add synapse +``` + +#### NixOS + +Robin Lambertz has packaged Synapse for NixOS at: + -(Prebuilt packages are available for some platforms - see [Prebuilt packages](#prebuilt-packages).) -When installing from source please make sure that the [Platform-specific prerequisites](#platform-specific-prerequisites) are already installed. +### Installing as a Python module from PyPI + +It's also possible to install Synapse as a Python module from PyPI. + +When following this route please make sure that the [Platform-specific prerequisites](#platform-specific-prerequisites) are already installed. System requirements: - POSIX-compliant system (tested on Linux & OS X) -- Python 3.5.2 or later, up to Python 3.9. +- Python 3.6 or later, up to Python 3.9. - At least 1GB of free RAM if you want to join large public rooms like #matrix:matrix.org - To install the Synapse homeserver run: ```sh @@ -235,170 +362,14 @@ make install ##### Windows -If you wish to run or develop Synapse on Windows, the Windows Subsystem For -Linux provides a Linux environment on Windows 10 which is capable of using the -Debian, Fedora, or source installation methods. More information about WSL can -be found at for -Windows 10 and -for Windows Server. - -### Prebuilt packages - -As an alternative to installing from source, prebuilt packages are available -for a number of platforms. - -#### Docker images and Ansible playbooks - -There is an official synapse image available at - which can be used with -the docker-compose file available at -[contrib/docker](https://github.com/matrix-org/synapse/tree/develop/contrib/docker). -Further information on this including configuration options is available in the README -on hub.docker.com. - -Alternatively, Andreas Peters (previously Silvio Fricke) has contributed a -Dockerfile to automate a synapse server in a single Docker image, at - - -Slavi Pantaleev has created an Ansible playbook, -which installs the offical Docker image of Matrix Synapse -along with many other Matrix-related services (Postgres database, Element, coturn, -ma1sd, SSL support, etc.). -For more details, see - - -#### Debian/Ubuntu - -##### Matrix.org packages - -Matrix.org provides Debian/Ubuntu packages of Synapse via -. To install the latest release: - -```sh -sudo apt install -y lsb-release wget apt-transport-https -sudo wget -O /usr/share/keyrings/matrix-org-archive-keyring.gpg https://packages.matrix.org/debian/matrix-org-archive-keyring.gpg -echo "deb [signed-by=/usr/share/keyrings/matrix-org-archive-keyring.gpg] https://packages.matrix.org/debian/ $(lsb_release -cs) main" | - sudo tee /etc/apt/sources.list.d/matrix-org.list -sudo apt update -sudo apt install matrix-synapse-py3 -``` - -Packages are also published for release candidates. To enable the prerelease -channel, add `prerelease` to the `sources.list` line. For example: - -```sh -sudo wget -O /usr/share/keyrings/matrix-org-archive-keyring.gpg https://packages.matrix.org/debian/matrix-org-archive-keyring.gpg -echo "deb [signed-by=/usr/share/keyrings/matrix-org-archive-keyring.gpg] https://packages.matrix.org/debian/ $(lsb_release -cs) main prerelease" | - sudo tee /etc/apt/sources.list.d/matrix-org.list -sudo apt update -sudo apt install matrix-synapse-py3 -``` - -The fingerprint of the repository signing key (as shown by `gpg -/usr/share/keyrings/matrix-org-archive-keyring.gpg`) is -`AAF9AE843A7584B5A3E4CD2BCF45A512DE2DA058`. - -##### Downstream Debian packages - -We do not recommend using the packages from the default Debian `buster` -repository at this time, as they are old and suffer from known security -vulnerabilities. You can install the latest version of Synapse from -[our repository](#matrixorg-packages) or from `buster-backports`. Please -see the [Debian documentation](https://backports.debian.org/Instructions/) -for information on how to use backports. - -If you are using Debian `sid` or testing, Synapse is available in the default -repositories and it should be possible to install it simply with: - -```sh -sudo apt install matrix-synapse -``` - -##### Downstream Ubuntu packages - -We do not recommend using the packages in the default Ubuntu repository -at this time, as they are old and suffer from known security vulnerabilities. -The latest version of Synapse can be installed from [our repository](#matrixorg-packages). - -#### Fedora - -Synapse is in the Fedora repositories as `matrix-synapse`: - -```sh -sudo dnf install matrix-synapse -``` - -Oleg Girko provides Fedora RPMs at - - -#### OpenSUSE - -Synapse is in the OpenSUSE repositories as `matrix-synapse`: - -```sh -sudo zypper install matrix-synapse -``` - -#### SUSE Linux Enterprise Server - -Unofficial package are built for SLES 15 in the openSUSE:Backports:SLE-15 repository at - - -#### ArchLinux - -The quickest way to get up and running with ArchLinux is probably with the community package -, which should pull in most of -the necessary dependencies. - -pip may be outdated (6.0.7-1 and needs to be upgraded to 6.0.8-1 ): - -```sh -sudo pip install --upgrade pip -``` - -If you encounter an error with lib bcrypt causing an Wrong ELF Class: -ELFCLASS32 (x64 Systems), you may need to reinstall py-bcrypt to correctly -compile it under the right architecture. (This should not be needed if -installing under virtualenv): - -```sh -sudo pip uninstall py-bcrypt -sudo pip install py-bcrypt -``` - -#### Void Linux - -Synapse can be found in the void repositories as 'synapse': +Running Synapse natively on Windows is not officially supported. -```sh -xbps-install -Su -xbps-install -S synapse -``` - -#### FreeBSD - -Synapse can be installed via FreeBSD Ports or Packages contributed by Brendan Molloy from: - -- Ports: `cd /usr/ports/net-im/py-matrix-synapse && make install clean` -- Packages: `pkg install py37-matrix-synapse` - -#### OpenBSD - -As of OpenBSD 6.7 Synapse is available as a pre-compiled binary. The filesystem -underlying the homeserver directory (defaults to `/var/synapse`) has to be -mounted with `wxallowed` (cf. `mount(8)`), so creating a separate filesystem -and mounting it to `/var/synapse` should be taken into consideration. - -Installing Synapse: - -```sh -doas pkg_add synapse -``` - -#### NixOS - -Robin Lambertz has packaged Synapse for NixOS at: - +If you wish to run or develop Synapse on Windows, the Windows Subsystem for +Linux provides a Linux environment which is capable of using the Debian, Fedora, +or source installation methods. More information about WSL can be found at + for Windows 10/11 and + for +Windows Server. ## Setting up Synapse diff --git a/docs/synctl_workers.md b/docs/synctl_workers.md index 8da4a31852ec..15e37f608d17 100644 --- a/docs/synctl_workers.md +++ b/docs/synctl_workers.md @@ -20,7 +20,9 @@ Finally, to actually run your worker-based synapse, you must pass synctl the `-a commandline option to tell it to operate on all the worker configurations found in the given directory, e.g.: - synctl -a $CONFIG/workers start +```sh +synctl -a $CONFIG/workers start +``` Currently one should always restart all workers when restarting or upgrading synapse, unless you explicitly know it's safe not to. For instance, restarting @@ -29,4 +31,6 @@ notifications. To manipulate a specific worker, you pass the -w option to synctl: - synctl -w $CONFIG/workers/worker1.yaml restart +```sh +synctl -w $CONFIG/workers/worker1.yaml restart +``` diff --git a/docs/systemd-with-workers/system/matrix-synapse-worker@.service b/docs/systemd-with-workers/system/matrix-synapse-worker@.service index d164e8ce1f88..8f5c44c9d4ef 100644 --- a/docs/systemd-with-workers/system/matrix-synapse-worker@.service +++ b/docs/systemd-with-workers/system/matrix-synapse-worker@.service @@ -15,7 +15,7 @@ Type=notify NotifyAccess=main User=matrix-synapse WorkingDirectory=/var/lib/matrix-synapse -EnvironmentFile=/etc/default/matrix-synapse +EnvironmentFile=-/etc/default/matrix-synapse ExecStart=/opt/venvs/matrix-synapse/bin/python -m synapse.app.generic_worker --config-path=/etc/matrix-synapse/homeserver.yaml --config-path=/etc/matrix-synapse/conf.d/ --config-path=/etc/matrix-synapse/workers/%i.yaml ExecReload=/bin/kill -HUP $MAINPID Restart=always diff --git a/docs/systemd-with-workers/system/matrix-synapse.service b/docs/systemd-with-workers/system/matrix-synapse.service index f6b6dfd3ce8e..0c73fb55fb57 100644 --- a/docs/systemd-with-workers/system/matrix-synapse.service +++ b/docs/systemd-with-workers/system/matrix-synapse.service @@ -10,7 +10,7 @@ Type=notify NotifyAccess=main User=matrix-synapse WorkingDirectory=/var/lib/matrix-synapse -EnvironmentFile=/etc/default/matrix-synapse +EnvironmentFile=-/etc/default/matrix-synapse ExecStartPre=/opt/venvs/matrix-synapse/bin/python -m synapse.app.homeserver --config-path=/etc/matrix-synapse/homeserver.yaml --config-path=/etc/matrix-synapse/conf.d/ --generate-keys ExecStart=/opt/venvs/matrix-synapse/bin/python -m synapse.app.homeserver --config-path=/etc/matrix-synapse/homeserver.yaml --config-path=/etc/matrix-synapse/conf.d/ ExecReload=/bin/kill -HUP $MAINPID diff --git a/docs/templates.md b/docs/templates.md new file mode 100644 index 000000000000..2b66e9d86294 --- /dev/null +++ b/docs/templates.md @@ -0,0 +1,244 @@ +# Templates + +Synapse uses parametrised templates to generate the content of emails it sends and +webpages it shows to users. + +By default, Synapse will use the templates listed [here](https://github.com/matrix-org/synapse/tree/master/synapse/res/templates). +Server admins can configure an additional directory for Synapse to look for templates +in, allowing them to specify custom templates: + +```yaml +templates: + custom_templates_directory: /path/to/custom/templates/ +``` + +If this setting is not set, or the files named below are not found within the directory, +default templates from within the Synapse package will be used. + +Templates that are given variables when being rendered are rendered using [Jinja 2](https://jinja.palletsprojects.com/en/2.11.x/). +Templates rendered by Jinja 2 can also access two functions on top of the functions +already available as part of Jinja 2: + +```python +format_ts(value: int, format: str) -> str +``` + +Formats a timestamp in milliseconds. + +Example: `reason.last_sent_ts|format_ts("%c")` + +```python +mxc_to_http(value: str, width: int, height: int, resize_method: str = "crop") -> str +``` + +Turns a `mxc://` URL for media content into an HTTP(S) one using the homeserver's +`public_baseurl` configuration setting as the URL's base. + +Example: `message.sender_avatar_url|mxc_to_http(32,32)` + + +## Email templates + +Below are the templates Synapse will look for when generating the content of an email: + +* `notif_mail.html` and `notif_mail.txt`: The contents of email notifications of missed + events. + When rendering, this template is given the following variables: + * `user_display_name`: the display name for the user receiving the notification + * `unsubscribe_link`: the link users can click to unsubscribe from email notifications + * `summary_text`: a summary of the notification(s). The text used can be customised + by configuring the various settings in the `email.subjects` section of the + configuration file. + * `rooms`: a list of rooms containing events to include in the email. Each element is + an object with the following attributes: + * `title`: a human-readable name for the room + * `hash`: a hash of the ID of the room + * `invite`: a boolean, which is `True` if the room is an invite the user hasn't + accepted yet, `False` otherwise + * `notifs`: a list of events, or an empty list if `invite` is `True`. Each element + is an object with the following attributes: + * `link`: a `matrix.to` link to the event + * `ts`: the time in milliseconds at which the event was received + * `messages`: a list of messages containing one message before the event, the + message in the event, and one message after the event. Each element is an + object with the following attributes: + * `event_type`: the type of the event + * `is_historical`: a boolean, which is `False` if the message is the one + that triggered the notification, `True` otherwise + * `id`: the ID of the event + * `ts`: the time in milliseconds at which the event was sent + * `sender_name`: the display name for the event's sender + * `sender_avatar_url`: the avatar URL (as a `mxc://` URL) for the event's + sender + * `sender_hash`: a hash of the user ID of the sender + * `msgtype`: the type of the message + * `body_text_html`: html representation of the message + * `body_text_plain`: plaintext representation of the message + * `image_url`: mxc url of an image, when "msgtype" is "m.image" + * `link`: a `matrix.to` link to the room + * `avator_url`: url to the room's avator + * `reason`: information on the event that triggered the email to be sent. It's an + object with the following attributes: + * `room_id`: the ID of the room the event was sent in + * `room_name`: a human-readable name for the room the event was sent in + * `now`: the current time in milliseconds + * `received_at`: the time in milliseconds at which the event was received + * `delay_before_mail_ms`: the amount of time in milliseconds Synapse always waits + before ever emailing about a notification (to give the user a chance to respond + to other push or notice the window) + * `last_sent_ts`: the time in milliseconds at which a notification was last sent + for an event in this room + * `throttle_ms`: the minimum amount of time in milliseconds between two + notifications can be sent for this room +* `password_reset.html` and `password_reset.txt`: The contents of password reset emails + sent by the homeserver. + When rendering, these templates are given a `link` variable which contains the link the + user must click in order to reset their password. +* `registration.html` and `registration.txt`: The contents of address verification emails + sent during registration. + When rendering, these templates are given a `link` variable which contains the link the + user must click in order to validate their email address. +* `add_threepid.html` and `add_threepid.txt`: The contents of address verification emails + sent when an address is added to a Matrix account. + When rendering, these templates are given a `link` variable which contains the link the + user must click in order to validate their email address. + + +## HTML page templates for registration and password reset + +Below are the templates Synapse will look for when generating pages related to +registration and password reset: + +* `password_reset_confirmation.html`: An HTML page that a user will see when they follow + the link in the password reset email. The user will be asked to confirm the action + before their password is reset. + When rendering, this template is given the following variables: + * `sid`: the session ID for the password reset + * `token`: the token for the password reset + * `client_secret`: the client secret for the password reset +* `password_reset_success.html` and `password_reset_failure.html`: HTML pages for success + and failure that a user will see when they confirm the password reset flow using the + page above. + When rendering, `password_reset_success.html` is given no variable, and + `password_reset_failure.html` is given a `failure_reason`, which contains the reason + for the password reset failure. +* `registration_success.html` and `registration_failure.html`: HTML pages for success and + failure that a user will see when they follow the link in an address verification email + sent during registration. + When rendering, `registration_success.html` is given no variable, and + `registration_failure.html` is given a `failure_reason`, which contains the reason + for the registration failure. +* `add_threepid_success.html` and `add_threepid_failure.html`: HTML pages for success and + failure that a user will see when they follow the link in an address verification email + sent when an address is added to a Matrix account. + When rendering, `add_threepid_success.html` is given no variable, and + `add_threepid_failure.html` is given a `failure_reason`, which contains the reason + for the registration failure. + + +## HTML page templates for Single Sign-On (SSO) + +Below are the templates Synapse will look for when generating pages related to SSO: + +* `sso_login_idp_picker.html`: HTML page to prompt the user to choose an + Identity Provider during login. + This is only used if multiple SSO Identity Providers are configured. + When rendering, this template is given the following variables: + * `redirect_url`: the URL that the user will be redirected to after + login. + * `server_name`: the homeserver's name. + * `providers`: a list of available Identity Providers. Each element is + an object with the following attributes: + * `idp_id`: unique identifier for the IdP + * `idp_name`: user-facing name for the IdP + * `idp_icon`: if specified in the IdP config, an MXC URI for an icon + for the IdP + * `idp_brand`: if specified in the IdP config, a textual identifier + for the brand of the IdP + The rendered HTML page should contain a form which submits its results + back as a GET request, with the following query parameters: + * `redirectUrl`: the client redirect URI (ie, the `redirect_url` passed + to the template) + * `idp`: the 'idp_id' of the chosen IDP. +* `sso_auth_account_details.html`: HTML page to prompt new users to enter a + userid and confirm other details. This is only shown if the + SSO implementation (with any `user_mapping_provider`) does not return + a localpart. + When rendering, this template is given the following variables: + * `server_name`: the homeserver's name. + * `idp`: details of the SSO Identity Provider that the user logged in + with: an object with the following attributes: + * `idp_id`: unique identifier for the IdP + * `idp_name`: user-facing name for the IdP + * `idp_icon`: if specified in the IdP config, an MXC URI for an icon + for the IdP + * `idp_brand`: if specified in the IdP config, a textual identifier + for the brand of the IdP + * `user_attributes`: an object containing details about the user that + we received from the IdP. May have the following attributes: + * display_name: the user's display_name + * emails: a list of email addresses + The template should render a form which submits the following fields: + * `username`: the localpart of the user's chosen user id +* `sso_new_user_consent.html`: HTML page allowing the user to consent to the + server's terms and conditions. This is only shown for new users, and only if + `user_consent.require_at_registration` is set. + When rendering, this template is given the following variables: + * `server_name`: the homeserver's name. + * `user_id`: the user's matrix proposed ID. + * `user_profile.display_name`: the user's proposed display name, if any. + * consent_version: the version of the terms that the user will be + shown + * `terms_url`: a link to the page showing the terms. + The template should render a form which submits the following fields: + * `accepted_version`: the version of the terms accepted by the user + (ie, 'consent_version' from the input variables). +* `sso_redirect_confirm.html`: HTML page for a confirmation step before redirecting back + to the client with the login token. + When rendering, this template is given the following variables: + * `redirect_url`: the URL the user is about to be redirected to. + * `display_url`: the same as `redirect_url`, but with the query + parameters stripped. The intention is to have a + human-readable URL to show to users, not to use it as + the final address to redirect to. + * `server_name`: the homeserver's name. + * `new_user`: a boolean indicating whether this is the user's first time + logging in. + * `user_id`: the user's matrix ID. + * `user_profile.avatar_url`: an MXC URI for the user's avatar, if any. + `None` if the user has not set an avatar. + * `user_profile.display_name`: the user's display name. `None` if the user + has not set a display name. +* `sso_auth_confirm.html`: HTML page which notifies the user that they are authenticating + to confirm an operation on their account during the user interactive authentication + process. + When rendering, this template is given the following variables: + * `redirect_url`: the URL the user is about to be redirected to. + * `description`: the operation which the user is being asked to confirm + * `idp`: details of the Identity Provider that we will use to confirm + the user's identity: an object with the following attributes: + * `idp_id`: unique identifier for the IdP + * `idp_name`: user-facing name for the IdP + * `idp_icon`: if specified in the IdP config, an MXC URI for an icon + for the IdP + * `idp_brand`: if specified in the IdP config, a textual identifier + for the brand of the IdP +* `sso_auth_success.html`: HTML page shown after a successful user interactive + authentication session. + Note that this page must include the JavaScript which notifies of a successful + authentication (see https://matrix.org/docs/spec/client_server/r0.6.0#fallback). + This template has no additional variables. +* `sso_auth_bad_user.html`: HTML page shown after a user-interactive authentication + session which does not map correctly onto the expected user. + When rendering, this template is given the following variables: + * `server_name`: the homeserver's name. + * `user_id_to_verify`: the MXID of the user that we are trying to + validate. +* `sso_account_deactivated.html`: HTML page shown during single sign-on if a deactivated + user (according to Synapse's database) attempts to login. + This template has no additional variables. +* `sso_error.html`: HTML page to display to users if something goes wrong during the + OpenID Connect authentication process. + When rendering, this template is given two variables: + * `error`: the technical name of the error + * `error_description`: a human-readable message for the error diff --git a/docs/turn-howto.md b/docs/turn-howto.md index 6433446c2af4..e6812de69e6b 100644 --- a/docs/turn-howto.md +++ b/docs/turn-howto.md @@ -1,12 +1,12 @@ # Overview -This document explains how to enable VoIP relaying on your Home Server with +This document explains how to enable VoIP relaying on your homeserver with TURN. -The synapse Matrix Home Server supports integration with TURN server via the +The synapse Matrix homeserver supports integration with TURN server via the [TURN server REST API](). This -allows the Home Server to generate credentials that are valid for use on the -TURN server through the use of a secret shared between the Home Server and the +allows the homeserver to generate credentials that are valid for use on the +TURN server through the use of a secret shared between the homeserver and the TURN server. The following sections describe how to install [coturn]() (which implements the TURN REST API) and integrate it with synapse. @@ -40,7 +40,9 @@ This will install and start a systemd service called `coturn`. 1. Configure it: - ./configure + ```sh + ./configure + ``` You may need to install `libevent2`: if so, you should do so in the way recommended by your operating system. You can ignore @@ -49,22 +51,28 @@ This will install and start a systemd service called `coturn`. 1. Build and install it: - make - make install + ```sh + make + make install + ``` ### Configuration 1. Create or edit the config file in `/etc/turnserver.conf`. The relevant lines, with example values, are: - use-auth-secret - static-auth-secret=[your secret key here] - realm=turn.myserver.org + ``` + use-auth-secret + static-auth-secret=[your secret key here] + realm=turn.myserver.org + ``` See `turnserver.conf` for explanations of the options. One way to generate the `static-auth-secret` is with `pwgen`: - pwgen -s 64 1 + ```sh + pwgen -s 64 1 + ``` A `realm` must be specified, but its value is somewhat arbitrary. (It is sent to clients as part of the authentication flow.) It is conventional to @@ -73,7 +81,9 @@ This will install and start a systemd service called `coturn`. 1. You will most likely want to configure coturn to write logs somewhere. The easiest way is normally to send them to the syslog: - syslog + ```sh + syslog + ``` (in which case, the logs will be available via `journalctl -u coturn` on a systemd system). Alternatively, coturn can be configured to write to a @@ -83,31 +93,35 @@ This will install and start a systemd service called `coturn`. connect to arbitrary IP addresses and ports. The following configuration is suggested as a minimum starting point: - # VoIP traffic is all UDP. There is no reason to let users connect to arbitrary TCP endpoints via the relay. - no-tcp-relay + ``` + # VoIP traffic is all UDP. There is no reason to let users connect to arbitrary TCP endpoints via the relay. + no-tcp-relay - # don't let the relay ever try to connect to private IP address ranges within your network (if any) - # given the turn server is likely behind your firewall, remember to include any privileged public IPs too. - denied-peer-ip=10.0.0.0-10.255.255.255 - denied-peer-ip=192.168.0.0-192.168.255.255 - denied-peer-ip=172.16.0.0-172.31.255.255 + # don't let the relay ever try to connect to private IP address ranges within your network (if any) + # given the turn server is likely behind your firewall, remember to include any privileged public IPs too. + denied-peer-ip=10.0.0.0-10.255.255.255 + denied-peer-ip=192.168.0.0-192.168.255.255 + denied-peer-ip=172.16.0.0-172.31.255.255 - # special case the turn server itself so that client->TURN->TURN->client flows work - allowed-peer-ip=10.0.0.1 + # special case the turn server itself so that client->TURN->TURN->client flows work + allowed-peer-ip=10.0.0.1 - # consider whether you want to limit the quota of relayed streams per user (or total) to avoid risk of DoS. - user-quota=12 # 4 streams per video call, so 12 streams = 3 simultaneous relayed calls per user. - total-quota=1200 + # consider whether you want to limit the quota of relayed streams per user (or total) to avoid risk of DoS. + user-quota=12 # 4 streams per video call, so 12 streams = 3 simultaneous relayed calls per user. + total-quota=1200 + ``` 1. Also consider supporting TLS/DTLS. To do this, add the following settings to `turnserver.conf`: - # TLS certificates, including intermediate certs. - # For Let's Encrypt certificates, use `fullchain.pem` here. - cert=/path/to/fullchain.pem + ``` + # TLS certificates, including intermediate certs. + # For Let's Encrypt certificates, use `fullchain.pem` here. + cert=/path/to/fullchain.pem - # TLS private key file - pkey=/path/to/privkey.pem + # TLS private key file + pkey=/path/to/privkey.pem + ``` In this case, replace the `turn:` schemes in the `turn_uri` settings below with `turns:`. @@ -126,7 +140,9 @@ This will install and start a systemd service called `coturn`. If you want to try it anyway, you will at least need to tell coturn its external IP address: - external-ip=192.88.99.1 + ``` + external-ip=192.88.99.1 + ``` ... and your NAT gateway must forward all of the relayed ports directly (eg, port 56789 on the external IP must be always be forwarded to port @@ -149,18 +165,18 @@ This will install and start a systemd service called `coturn`. ## Synapse setup -Your home server configuration file needs the following extra keys: +Your homeserver configuration file needs the following extra keys: 1. "`turn_uris`": This needs to be a yaml list of public-facing URIs for your TURN server to be given out to your clients. Add separate entries for each transport your TURN server supports. 2. "`turn_shared_secret`": This is the secret shared between your - Home server and your TURN server, so you should set it to the same + homeserver and your TURN server, so you should set it to the same string you used in turnserver.conf. 3. "`turn_user_lifetime`": This is the amount of time credentials - generated by your Home Server are valid for (in milliseconds). + generated by your homeserver are valid for (in milliseconds). Shorter times offer less potential for abuse at the expense of - increased traffic between web clients and your home server to + increased traffic between web clients and your homeserver to refresh credentials. The TURN REST API specification recommends one day (86400000). 4. "`turn_allow_guests`": Whether to allow guest users to use the @@ -186,7 +202,7 @@ After updating the homeserver configuration, you must restart synapse: ./synctl restart ``` * If you use systemd: - ``` + ```sh systemctl restart matrix-synapse.service ``` ... and then reload any clients (or wait an hour for them to refresh their @@ -204,7 +220,7 @@ Here are a few things to try: anyone who has successfully set this up. * Check that you have opened your firewall to allow TCP and UDP traffic to the - TURN ports (normally 3478 and 5479). + TURN ports (normally 3478 and 5349). * Check that you have opened your firewall to allow UDP traffic to the UDP relay ports (49152-65535 by default). diff --git a/docs/upgrade.md b/docs/upgrade.md index ce9167e6de13..136c806c417a 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -85,6 +85,124 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.47.0 + +## Removal of old Room Admin API + +The following admin APIs were deprecated in [Synapse 1.34](https://github.com/matrix-org/synapse/blob/v1.34.0/CHANGES.md#deprecations-and-removals) +(released on 2021-05-17) and have now been removed: + +- `POST /_synapse/admin/v1//delete` + +Any scripts still using the above APIs should be converted to use the +[Delete Room API](https://matrix-org.github.io/synapse/latest/admin_api/rooms.html#delete-room-api). + +## Deprecation of the `user_may_create_room_with_invites` module callback + +The `user_may_create_room_with_invites` is deprecated and will be removed in a future +version of Synapse. Modules implementing this callback can instead implement +[`user_may_invite`](https://matrix-org.github.io/synapse/latest/modules/spam_checker_callbacks.html#user_may_invite) +and use the [`get_room_state`](https://github.com/matrix-org/synapse/blob/872f23b95fa980a61b0866c1475e84491991fa20/synapse/module_api/__init__.py#L869-L876) +module API method to infer whether the invite is happening in the context of creating a +room. + +We plan to remove this callback in January 2022. + +# Upgrading to v1.45.0 + +## Changes required to media storage provider modules when reading from the Synapse configuration object + +Media storage provider modules that read from the Synapse configuration object (i.e. that +read the value of `hs.config.[...]`) now need to specify the configuration section they're +reading from. This means that if a module reads the value of e.g. `hs.config.media_store_path`, +it needs to replace it with `hs.config.media.media_store_path`. + +# Upgrading to v1.44.0 + +## The URL preview cache is no longer mirrored to storage providers +The `url_cache/` and `url_cache_thumbnails/` directories in the media store are +no longer mirrored to storage providers. These two directories can be safely +deleted from any configured storage providers to reclaim space. + +# Upgrading to v1.43.0 + +## The spaces summary APIs can now be handled by workers + +The [available worker applications documentation](https://matrix-org.github.io/synapse/latest/workers.html#available-worker-applications) +has been updated to reflect that calls to the `/spaces`, `/hierarchy`, and +`/summary` endpoints can now be routed to workers for both client API and +federation requests. + +# Upgrading to v1.42.0 + +## Removal of old Room Admin API + +The following admin APIs were deprecated in [Synapse 1.25](https://github.com/matrix-org/synapse/blob/v1.25.0/CHANGES.md#removal-warning) +(released on 2021-01-13) and have now been removed: + +- `POST /_synapse/admin/v1/purge_room` +- `POST /_synapse/admin/v1/shutdown_room/` + +Any scripts still using the above APIs should be converted to use the +[Delete Room API](https://matrix-org.github.io/synapse/latest/admin_api/rooms.html#delete-room-api). + +## User-interactive authentication fallback templates can now display errors + +This may affect you if you make use of custom HTML templates for the +[reCAPTCHA](../synapse/res/templates/recaptcha.html) or +[terms](../synapse/res/templates/terms.html) fallback pages. + +The template is now provided an `error` variable if the authentication +process failed. See the default templates linked above for an example. + +## Removal of out-of-date email pushers + +Users will stop receiving message updates via email for addresses that were +once, but not still, linked to their account. + +# Upgrading to v1.41.0 + +## Add support for routing outbound HTTP requests via a proxy for federation + +Since Synapse 1.6.0 (2019-11-26) you can set a proxy for outbound HTTP requests via +http_proxy/https_proxy environment variables. This proxy was set for: +- push +- url previews +- phone-home stats +- recaptcha validation +- CAS auth validation +- OpenID Connect +- Federation (checking public key revocation) + +In this version we have added support for outbound requests for: +- Outbound federation +- Downloading remote media +- Fetching public keys of other servers + +These requests use the same proxy configuration. If you have a proxy configuration we +recommend to verify the configuration. It may be necessary to adjust the `no_proxy` +environment variable. + +See [using a forward proxy with Synapse documentation](setup/forward_proxy.md) for +details. + +## Deprecation of `template_dir` + +The `template_dir` settings in the `sso`, `account_validity` and `email` sections of the +configuration file are now deprecated. Server admins should use the new +`templates.custom_template_directory` setting in the configuration file and use one single +custom template directory for all aforementioned features. Template file names remain +unchanged. See [the related documentation](https://matrix-org.github.io/synapse/latest/templates.html) +for more information and examples. + +We plan to remove support for these settings in October 2021. + +## `/_synapse/admin/v1/users/{userId}/media` must be handled by media workers + +The [media repository worker documentation](https://matrix-org.github.io/synapse/latest/workers.html#synapseappmedia_repository) +has been updated to reflect that calls to `/_synapse/admin/v1/users/{userId}/media` +must now be handled by media repository workers. This is due to the new `DELETE` method +of this endpoint modifying the media store. # Upgrading to v1.39.0 @@ -92,8 +210,8 @@ process, for example: The current third-party rules module interface is deprecated in favour of the new generic modules system introduced in Synapse v1.37.0. Authors of third-party rules modules can refer -to [this documentation](modules.md#porting-an-existing-module-that-uses-the-old-interface) -to update their modules. Synapse administrators can refer to [this documentation](modules.md#using-modules) +to [this documentation](modules/porting_legacy_module.md) +to update their modules. Synapse administrators can refer to [this documentation](modules/index.md) to update their configuration once the modules they are using have been updated. We plan to remove support for the current third-party rules interface in September 2021. @@ -142,9 +260,9 @@ SQLite databases are unaffected by this change. The current spam checker interface is deprecated in favour of a new generic modules system. Authors of spam checker modules can refer to [this -documentation](modules.md#porting-an-existing-module-that-uses-the-old-interface) +documentation](modules/porting_legacy_module.md to update their modules. Synapse administrators can refer to [this -documentation](modules.md#using-modules) +documentation](modules/index.md) to update their configuration once the modules they are using have been updated. We plan to remove support for the current spam checker interface in August 2021. @@ -253,24 +371,24 @@ Please ensure your Application Services are up to date. ## Requirement for X-Forwarded-Proto header When using Synapse with a reverse proxy (in particular, when using the -[x_forwarded]{.title-ref} option on an HTTP listener), Synapse now -expects to receive an [X-Forwarded-Proto]{.title-ref} header on incoming +`x_forwarded` option on an HTTP listener), Synapse now +expects to receive an `X-Forwarded-Proto` header on incoming HTTP requests. If it is not set, Synapse will log a warning on each received request. To avoid the warning, administrators using a reverse proxy should ensure -that the reverse proxy sets [X-Forwarded-Proto]{.title-ref} header to -[https]{.title-ref} or [http]{.title-ref} to indicate the protocol used +that the reverse proxy sets `X-Forwarded-Proto` header to +`https` or `http` to indicate the protocol used by the client. -Synapse also requires the [Host]{.title-ref} header to be preserved. +Synapse also requires the `Host` header to be preserved. See the [reverse proxy documentation](reverse_proxy.md), where the example configurations have been updated to show how to set these headers. (Users of [Caddy](https://caddyserver.com/) are unaffected, since we -believe it sets [X-Forwarded-Proto]{.title-ref} by default.) +believe it sets `X-Forwarded-Proto` by default.) # Upgrading to v1.27.0 @@ -434,13 +552,13 @@ mapping provider to specify different algorithms, instead of the way](). If your Synapse configuration uses a custom mapping provider -([oidc_config.user_mapping_provider.module]{.title-ref} is specified and +(`oidc_config.user_mapping_provider.module` is specified and not equal to -[synapse.handlers.oidc_handler.JinjaOidcMappingProvider]{.title-ref}) -then you *must* ensure that [map_user_attributes]{.title-ref} of the +`synapse.handlers.oidc_handler.JinjaOidcMappingProvider`) +then you *must* ensure that `map_user_attributes` of the mapping provider performs some normalisation of the -[localpart]{.title-ref} returned. To match previous behaviour you can -use the [map_username_to_mxid_localpart]{.title-ref} function provided +`localpart` returned. To match previous behaviour you can +use the `map_username_to_mxid_localpart` function provided by Synapse. An example is shown below: ```python @@ -469,7 +587,7 @@ v1.24.0. The Admin API is now only accessible under: - `/_synapse/admin/v1` -The only exception is the [/admin/whois]{.title-ref} endpoint, which is +The only exception is the `/admin/whois` endpoint, which is [also available via the client-server API](https://matrix.org/docs/spec/client_server/r0.6.1#get-matrix-client-r0-admin-whois-userid). @@ -544,7 +662,7 @@ This page will appear to the user after clicking a password reset link that has been emailed to them. To complete password reset, the page must include a way to make a -[POST]{.title-ref} request to +`POST` request to `/_synapse/client/password_reset/{medium}/submit_token` with the query parameters from the original link, presented as a URL-encoded form. See the file itself for more details. @@ -565,18 +683,18 @@ but the parameters are slightly different: # Upgrading to v1.18.0 -## Docker [-py3]{.title-ref} suffix will be removed in future versions +## Docker `-py3` suffix will be removed in future versions From 10th August 2020, we will no longer publish Docker images with the -[-py3]{.title-ref} tag suffix. The images tagged with the -[-py3]{.title-ref} suffix have been identical to the non-suffixed tags +`-py3` tag suffix. The images tagged with the +`-py3` suffix have been identical to the non-suffixed tags since release 0.99.0, and the suffix is obsolete. -On 10th August, we will remove the [latest-py3]{.title-ref} tag. -Existing per-release tags (such as [v1.18.0-py3]{.title-ref}) will not -be removed, but no new [-py3]{.title-ref} tags will be added. +On 10th August, we will remove the `latest-py3` tag. +Existing per-release tags (such as `v1.18.0-py3` will not +be removed, but no new `-py3` tags will be added. -Scripts relying on the [-py3]{.title-ref} suffix will need to be +Scripts relying on the `-py3` suffix will need to be updated. ## Redis replication is now recommended in lieu of TCP replication @@ -610,8 +728,8 @@ This will *not* be a problem for Synapse installations which were: If completeness of the room directory is a concern, installations which are affected can be repaired as follows: -1. Run the following sql from a [psql]{.title-ref} or - [sqlite3]{.title-ref} console: +1. Run the following sql from a `psql` or + `sqlite3` console: ```sql INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES @@ -675,8 +793,8 @@ participating in many rooms. of any problems. 1. As an initial check to see if you will be affected, you can try - running the following query from the [psql]{.title-ref} or - [sqlite3]{.title-ref} console. It is safe to run it while Synapse is + running the following query from the `psql` or + `sqlite3` console. It is safe to run it while Synapse is still running. ```sql @@ -1068,16 +1186,20 @@ For more information on configuring TLS certificates see the For users who have installed Synapse into a virtualenv, we recommend doing this by creating a new virtualenv. For example: - virtualenv -p python3 ~/synapse/env3 - source ~/synapse/env3/bin/activate - pip install matrix-synapse + ```sh + virtualenv -p python3 ~/synapse/env3 + source ~/synapse/env3/bin/activate + pip install matrix-synapse + ``` You can then start synapse as normal, having activated the new virtualenv: - cd ~/synapse - source env3/bin/activate - synctl start + ```sh + cd ~/synapse + source env3/bin/activate + synctl start + ``` Users who have installed from distribution packages should see the relevant package documentation. See below for notes on Debian @@ -1089,34 +1211,38 @@ For more information on configuring TLS certificates see the `.log.config` file. For example, if your `log.config` file contains: - handlers: - file: - class: logging.handlers.RotatingFileHandler - formatter: precise - filename: homeserver.log - maxBytes: 104857600 - backupCount: 10 - filters: [context] - console: - class: logging.StreamHandler - formatter: precise - filters: [context] + ```yaml + handlers: + file: + class: logging.handlers.RotatingFileHandler + formatter: precise + filename: homeserver.log + maxBytes: 104857600 + backupCount: 10 + filters: [context] + console: + class: logging.StreamHandler + formatter: precise + filters: [context] + ``` Then you should update this to be: - handlers: - file: - class: logging.handlers.RotatingFileHandler - formatter: precise - filename: homeserver.log - maxBytes: 104857600 - backupCount: 10 - filters: [context] - encoding: utf8 - console: - class: logging.StreamHandler - formatter: precise - filters: [context] + ```yaml + handlers: + file: + class: logging.handlers.RotatingFileHandler + formatter: precise + filename: homeserver.log + maxBytes: 104857600 + backupCount: 10 + filters: [context] + encoding: utf8 + console: + class: logging.StreamHandler + formatter: precise + filters: [context] + ``` There is no need to revert this change if downgrading to Python 2. @@ -1202,24 +1328,28 @@ with the HS remotely has been removed. It has been replaced by specifying a list of application service registrations in `homeserver.yaml`: - app_service_config_files: ["registration-01.yaml", "registration-02.yaml"] +```yaml +app_service_config_files: ["registration-01.yaml", "registration-02.yaml"] +``` Where `registration-01.yaml` looks like: - url: # e.g. "https://my.application.service.com" - as_token: - hs_token: - sender_localpart: # This is a new field which denotes the user_id localpart when using the AS token - namespaces: - users: - - exclusive: - regex: # e.g. "@prefix_.*" - aliases: - - exclusive: - regex: - rooms: - - exclusive: - regex: +```yaml +url: # e.g. "https://my.application.service.com" +as_token: +hs_token: +sender_localpart: # This is a new field which denotes the user_id localpart when using the AS token +namespaces: + users: + - exclusive: + regex: # e.g. "@prefix_.*" + aliases: + - exclusive: + regex: + rooms: + - exclusive: + regex: +``` # Upgrading to v0.8.0 @@ -1258,9 +1388,9 @@ first need to upgrade the database by running: python scripts/upgrade_db_to_v0.6.0.py -Where []{.title-ref} is the location of the database, -[]{.title-ref} is the server name as specified in the -synapse configuration, and []{.title-ref} is the location +Where `` is the location of the database, +`` is the server name as specified in the +synapse configuration, and `` is the location of the signing key as specified in the synapse configuration. This may take some time to complete. Failures of signatures and content diff --git a/docs/url_previews.md b/docs/url_previews.md deleted file mode 100644 index 665554e165ae..000000000000 --- a/docs/url_previews.md +++ /dev/null @@ -1,76 +0,0 @@ -URL Previews -============ - -Design notes on a URL previewing service for Matrix: - -Options are: - - 1. Have an AS which listens for URLs, downloads them, and inserts an event that describes their metadata. - * Pros: - * Decouples the implementation entirely from Synapse. - * Uses existing Matrix events & content repo to store the metadata. - * Cons: - * Which AS should provide this service for a room, and why should you trust it? - * Doesn't work well with E2E; you'd have to cut the AS into every room - * the AS would end up subscribing to every room anyway. - - 2. Have a generic preview API (nothing to do with Matrix) that provides a previewing service: - * Pros: - * Simple and flexible; can be used by any clients at any point - * Cons: - * If each HS provides one of these independently, all the HSes in a room may needlessly DoS the target URI - * We need somewhere to store the URL metadata rather than just using Matrix itself - * We can't piggyback on matrix to distribute the metadata between HSes. - - 3. Make the synapse of the sending user responsible for spidering the URL and inserting an event asynchronously which describes the metadata. - * Pros: - * Works transparently for all clients - * Piggy-backs nicely on using Matrix for distributing the metadata. - * No confusion as to which AS - * Cons: - * Doesn't work with E2E - * We might want to decouple the implementation of the spider from the HS, given spider behaviour can be quite complicated and evolve much more rapidly than the HS. It's more like a bot than a core part of the server. - - 4. Make the sending client use the preview API and insert the event itself when successful. - * Pros: - * Works well with E2E - * No custom server functionality - * Lets the client customise the preview that they send (like on FB) - * Cons: - * Entirely specific to the sending client, whereas it'd be nice if /any/ URL was correctly previewed if clients support it. - - 5. Have the option of specifying a shared (centralised) previewing service used by a room, to avoid all the different HSes in the room DoSing the target. - -Best solution is probably a combination of both 2 and 4. - * Sending clients do their best to create and send a preview at the point of sending the message, perhaps delaying the message until the preview is computed? (This also lets the user validate the preview before sending) - * Receiving clients have the option of going and creating their own preview if one doesn't arrive soon enough (or if the original sender didn't create one) - -This is a bit magical though in that the preview could come from two entirely different sources - the sending HS or your local one. However, this can always be exposed to users: "Generate your own URL previews if none are available?" - -This is tantamount also to senders calculating their own thumbnails for sending in advance of the main content - we are trusting the sender not to lie about the content in the thumbnail. Whereas currently thumbnails are calculated by the receiving homeserver to avoid this attack. - -However, this kind of phishing attack does exist whether we let senders pick their thumbnails or not, in that a malicious sender can send normal text messages around the attachment claiming it to be legitimate. We could rely on (future) reputation/abuse management to punish users who phish (be it with bogus metadata or bogus descriptions). Bogus metadata is particularly bad though, especially if it's avoidable. - -As a first cut, let's do #2 and have the receiver hit the API to calculate its own previews (as it does currently for image thumbnails). We can then extend/optimise this to option 4 as a special extra if needed. - -API ---- - -``` -GET /_matrix/media/r0/preview_url?url=http://wherever.com -200 OK -{ - "og:type" : "article" - "og:url" : "https://twitter.com/matrixdotorg/status/684074366691356672" - "og:title" : "Matrix on Twitter" - "og:image" : "https://pbs.twimg.com/profile_images/500400952029888512/yI0qtFi7_400x400.png" - "og:description" : "“Synapse 0.12 is out! Lots of polishing, performance &amp; bugfixes: /sync API, /r0 prefix, fulltext search, 3PID invites https://t.co/5alhXLLEGP”" - "og:site_name" : "Twitter" -} -``` - -* Downloads the URL - * If HTML, just stores it in RAM and parses it for OG meta tags - * Download any media OG meta tags to the media repo, and refer to them in the OG via mxc:// URIs. - * If a media filetype we know we can thumbnail: store it on disk, and hand it to the thumbnailer. Generate OG meta tags from the thumbnailer contents. - * Otherwise, don't bother downloading further. diff --git a/docs/usage/administration/admin_api/background_updates.md b/docs/usage/administration/admin_api/background_updates.md new file mode 100644 index 000000000000..9f6ac7d56784 --- /dev/null +++ b/docs/usage/administration/admin_api/background_updates.md @@ -0,0 +1,109 @@ +# Background Updates API + +This API allows a server administrator to manage the background updates being +run against the database. + +## Status + +This API gets the current status of the background updates. + + +The API is: + +``` +GET /_synapse/admin/v1/background_updates/status +``` + +Returning: + +```json +{ + "enabled": true, + "current_updates": { + "": { + "name": "", + "total_item_count": 50, + "total_duration_ms": 10000.0, + "average_items_per_ms": 2.2, + }, + } +} +``` + +`enabled` whether the background updates are enabled or disabled. + +`db_name` the database name (usually Synapse is configured with a single database named 'master'). + +For each update: + +`name` the name of the update. +`total_item_count` total number of "items" processed (the meaning of 'items' depends on the update in question). +`total_duration_ms` how long the background process has been running, not including time spent sleeping. +`average_items_per_ms` how many items are processed per millisecond based on an exponential average. + + +## Enabled + +This API allow pausing background updates. + +Background updates should *not* be paused for significant periods of time, as +this can affect the performance of Synapse. + +*Note*: This won't persist over restarts. + +*Note*: This won't cancel any update query that is currently running. This is +usually fine since most queries are short lived, except for `CREATE INDEX` +background updates which won't be cancelled once started. + + +The API is: + +``` +POST /_synapse/admin/v1/background_updates/enabled +``` + +with the following body: + +```json +{ + "enabled": false +} +``` + +`enabled` sets whether the background updates are enabled or disabled. + +The API returns the `enabled` param. + +```json +{ + "enabled": false +} +``` + +There is also a `GET` version which returns the `enabled` state. + + +## Run + +This API schedules a specific background update to run. The job starts immediately after calling the API. + + +The API is: + +``` +POST /_synapse/admin/v1/background_updates/start_job +``` + +with the following body: + +```json +{ + "job_name": "populate_stats_process_rooms" +} +``` + +The following JSON body parameters are available: + +- `job_name` - A string which job to run. Valid values are: + - `populate_stats_process_rooms` - Recalculate the stats for all rooms. + - `regenerate_directory` - Recalculate the [user directory](../../../user_directory.md) if it is stale or out of sync. diff --git a/docs/usage/administration/admin_api/federation.md b/docs/usage/administration/admin_api/federation.md new file mode 100644 index 000000000000..8f9535f57b09 --- /dev/null +++ b/docs/usage/administration/admin_api/federation.md @@ -0,0 +1,114 @@ +# Federation API + +This API allows a server administrator to manage Synapse's federation with other homeservers. + +Note: This API is new, experimental and "subject to change". + +## List of destinations + +This API gets the current destination retry timing info for all remote servers. + +The list contains all the servers with which the server federates, +regardless of whether an error occurred or not. +If an error occurs, it may take up to 20 minutes for the error to be displayed here, +as a complete retry must have failed. + +The API is: + +A standard request with no filtering: + +``` +GET /_synapse/admin/v1/federation/destinations +``` + +A response body like the following is returned: + +```json +{ + "destinations":[ + { + "destination": "matrix.org", + "retry_last_ts": 1557332397936, + "retry_interval": 3000000, + "failure_ts": 1557329397936, + "last_successful_stream_ordering": null + } + ], + "total": 1 +} +``` + +To paginate, check for `next_token` and if present, call the endpoint again +with `from` set to the value of `next_token`. This will return a new page. + +If the endpoint does not return a `next_token` then there are no more destinations +to paginate through. + +**Parameters** + +The following query parameters are available: + +- `from` - Offset in the returned list. Defaults to `0`. +- `limit` - Maximum amount of destinations to return. Defaults to `100`. +- `order_by` - The method in which to sort the returned list of destinations. + Valid values are: + - `destination` - Destinations are ordered alphabetically by remote server name. + This is the default. + - `retry_last_ts` - Destinations are ordered by time of last retry attempt in ms. + - `retry_interval` - Destinations are ordered by how long until next retry in ms. + - `failure_ts` - Destinations are ordered by when the server started failing in ms. + - `last_successful_stream_ordering` - Destinations are ordered by the stream ordering + of the most recent successfully-sent PDU. +- `dir` - Direction of room order. Either `f` for forwards or `b` for backwards. Setting + this value to `b` will reverse the above sort order. Defaults to `f`. + +*Caution:* The database only has an index on the column `destination`. +This means that if a different sort order is used, +this can cause a large load on the database, especially for large environments. + +**Response** + +The following fields are returned in the JSON response body: + +- `destinations` - An array of objects, each containing information about a destination. + Destination objects contain the following fields: + - `destination` - string - Name of the remote server to federate. + - `retry_last_ts` - integer - The last time Synapse tried and failed to reach the + remote server, in ms. This is `0` if the last attempt to communicate with the + remote server was successful. + - `retry_interval` - integer - How long since the last time Synapse tried to reach + the remote server before trying again, in ms. This is `0` if no further retrying occuring. + - `failure_ts` - nullable integer - The first time Synapse tried and failed to reach the + remote server, in ms. This is `null` if communication with the remote server has never failed. + - `last_successful_stream_ordering` - nullable integer - The stream ordering of the most + recent successfully-sent [PDU](understanding_synapse_through_grafana_graphs.md#federation) + to this destination, or `null` if this information has not been tracked yet. +- `next_token`: string representing a positive integer - Indication for pagination. See above. +- `total` - integer - Total number of destinations. + +# Destination Details API + +This API gets the retry timing info for a specific remote server. + +The API is: + +``` +GET /_synapse/admin/v1/federation/destinations/ +``` + +A response body like the following is returned: + +```json +{ + "destination": "matrix.org", + "retry_last_ts": 1557332397936, + "retry_interval": 3000000, + "failure_ts": 1557329397936, + "last_successful_stream_ordering": null +} +``` + +**Response** + +The response fields are the same like in the `destinations` array in +[List of destinations](#list-of-destinations) response. diff --git a/docs/usage/administration/admin_api/registration_tokens.md b/docs/usage/administration/admin_api/registration_tokens.md new file mode 100644 index 000000000000..13d5eb75e933 --- /dev/null +++ b/docs/usage/administration/admin_api/registration_tokens.md @@ -0,0 +1,296 @@ +# Registration Tokens + +This API allows you to manage tokens which can be used to authenticate +registration requests, as proposed in +[MSC3231](https://github.com/matrix-org/matrix-doc/blob/main/proposals/3231-token-authenticated-registration.md). +To use it, you will need to enable the `registration_requires_token` config +option, and authenticate by providing an `access_token` for a server admin: +see [Admin API](../../usage/administration/admin_api). +Note that this API is still experimental; not all clients may support it yet. + + +## Registration token objects + +Most endpoints make use of JSON objects that contain details about tokens. +These objects have the following fields: +- `token`: The token which can be used to authenticate registration. +- `uses_allowed`: The number of times the token can be used to complete a + registration before it becomes invalid. +- `pending`: The number of pending uses the token has. When someone uses + the token to authenticate themselves, the pending counter is incremented + so that the token is not used more than the permitted number of times. + When the person completes registration the pending counter is decremented, + and the completed counter is incremented. +- `completed`: The number of times the token has been used to successfully + complete a registration. +- `expiry_time`: The latest time the token is valid. Given as the number of + milliseconds since 1970-01-01 00:00:00 UTC (the start of the Unix epoch). + To convert this into a human-readable form you can remove the milliseconds + and use the `date` command. For example, `date -d '@1625394937'`. + + +## List all tokens + +Lists all tokens and details about them. If the request is successful, the top +level JSON object will have a `registration_tokens` key which is an array of +registration token objects. + +``` +GET /_synapse/admin/v1/registration_tokens +``` + +Optional query parameters: +- `valid`: `true` or `false`. If `true`, only valid tokens are returned. + If `false`, only tokens that have expired or have had all uses exhausted are + returned. If omitted, all tokens are returned regardless of validity. + +Example: + +``` +GET /_synapse/admin/v1/registration_tokens +``` +``` +200 OK + +{ + "registration_tokens": [ + { + "token": "abcd", + "uses_allowed": 3, + "pending": 0, + "completed": 1, + "expiry_time": null + }, + { + "token": "pqrs", + "uses_allowed": 2, + "pending": 1, + "completed": 1, + "expiry_time": null + }, + { + "token": "wxyz", + "uses_allowed": null, + "pending": 0, + "completed": 9, + "expiry_time": 1625394937000 // 2021-07-04 10:35:37 UTC + } + ] +} +``` + +Example using the `valid` query parameter: + +``` +GET /_synapse/admin/v1/registration_tokens?valid=false +``` +``` +200 OK + +{ + "registration_tokens": [ + { + "token": "pqrs", + "uses_allowed": 2, + "pending": 1, + "completed": 1, + "expiry_time": null + }, + { + "token": "wxyz", + "uses_allowed": null, + "pending": 0, + "completed": 9, + "expiry_time": 1625394937000 // 2021-07-04 10:35:37 UTC + } + ] +} +``` + + +## Get one token + +Get details about a single token. If the request is successful, the response +body will be a registration token object. + +``` +GET /_synapse/admin/v1/registration_tokens/ +``` + +Path parameters: +- `token`: The registration token to return details of. + +Example: + +``` +GET /_synapse/admin/v1/registration_tokens/abcd +``` +``` +200 OK + +{ + "token": "abcd", + "uses_allowed": 3, + "pending": 0, + "completed": 1, + "expiry_time": null +} +``` + + +## Create token + +Create a new registration token. If the request is successful, the newly created +token will be returned as a registration token object in the response body. + +``` +POST /_synapse/admin/v1/registration_tokens/new +``` + +The request body must be a JSON object and can contain the following fields: +- `token`: The registration token. A string of no more than 64 characters that + consists only of characters matched by the regex `[A-Za-z0-9._~-]`. + Default: randomly generated. +- `uses_allowed`: The integer number of times the token can be used to complete + a registration before it becomes invalid. + Default: `null` (unlimited uses). +- `expiry_time`: The latest time the token is valid. Given as the number of + milliseconds since 1970-01-01 00:00:00 UTC (the start of the Unix epoch). + You could use, for example, `date '+%s000' -d 'tomorrow'`. + Default: `null` (token does not expire). +- `length`: The length of the token randomly generated if `token` is not + specified. Must be between 1 and 64 inclusive. Default: `16`. + +If a field is omitted the default is used. + +Example using defaults: + +``` +POST /_synapse/admin/v1/registration_tokens/new + +{} +``` +``` +200 OK + +{ + "token": "0M-9jbkf2t_Tgiw1", + "uses_allowed": null, + "pending": 0, + "completed": 0, + "expiry_time": null +} +``` + +Example specifying some fields: + +``` +POST /_synapse/admin/v1/registration_tokens/new + +{ + "token": "defg", + "uses_allowed": 1 +} +``` +``` +200 OK + +{ + "token": "defg", + "uses_allowed": 1, + "pending": 0, + "completed": 0, + "expiry_time": null +} +``` + + +## Update token + +Update the number of allowed uses or expiry time of a token. If the request is +successful, the updated token will be returned as a registration token object +in the response body. + +``` +PUT /_synapse/admin/v1/registration_tokens/ +``` + +Path parameters: +- `token`: The registration token to update. + +The request body must be a JSON object and can contain the following fields: +- `uses_allowed`: The integer number of times the token can be used to complete + a registration before it becomes invalid. By setting `uses_allowed` to `0` + the token can be easily made invalid without deleting it. + If `null` the token will have an unlimited number of uses. +- `expiry_time`: The latest time the token is valid. Given as the number of + milliseconds since 1970-01-01 00:00:00 UTC (the start of the Unix epoch). + If `null` the token will not expire. + +If a field is omitted its value is not modified. + +Example: + +``` +PUT /_synapse/admin/v1/registration_tokens/defg + +{ + "expiry_time": 4781243146000 // 2121-07-06 11:05:46 UTC +} +``` +``` +200 OK + +{ + "token": "defg", + "uses_allowed": 1, + "pending": 0, + "completed": 0, + "expiry_time": 4781243146000 +} +``` + + +## Delete token + +Delete a registration token. If the request is successful, the response body +will be an empty JSON object. + +``` +DELETE /_synapse/admin/v1/registration_tokens/ +``` + +Path parameters: +- `token`: The registration token to delete. + +Example: + +``` +DELETE /_synapse/admin/v1/registration_tokens/wxyz +``` +``` +200 OK + +{} +``` + + +## Errors + +If a request fails a "standard error response" will be returned as defined in +the [Matrix Client-Server API specification](https://matrix.org/docs/spec/client_server/r0.6.1#api-standards). + +For example, if the token specified in a path parameter does not exist a +`404 Not Found` error will be returned. + +``` +GET /_synapse/admin/v1/registration_tokens/1234 +``` +``` +404 Not Found + +{ + "errcode": "M_NOT_FOUND", + "error": "No such registration token: 1234" +} +``` diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md new file mode 100644 index 000000000000..3dcad4bbef5d --- /dev/null +++ b/docs/usage/administration/admin_faq.md @@ -0,0 +1,103 @@ +## Admin FAQ + +How do I become a server admin? +--- +If your server already has an admin account you should use the user admin API to promote other accounts to become admins. See [User Admin API](../../admin_api/user_admin_api.md#Change-whether-a-user-is-a-server-administrator-or-not) + +If you don't have any admin accounts yet you won't be able to use the admin API so you'll have to edit the database manually. Manually editing the database is generally not recommended so once you have an admin account, use the admin APIs to make further changes. + +```sql +UPDATE users SET admin = 1 WHERE name = '@foo:bar.com'; +``` +What servers are my server talking to? +--- +Run this sql query on your db: +```sql +SELECT * FROM destinations; +``` + +What servers are currently participating in this room? +--- +Run this sql query on your db: +```sql +SELECT DISTINCT split_part(state_key, ':', 2) + FROM current_state_events AS c + INNER JOIN room_memberships AS m USING (room_id, event_id) + WHERE room_id = '!cURbafjkfsMDVwdRDQ:matrix.org' AND membership = 'join'; +``` + +What users are registered on my server? +--- +```sql +SELECT NAME from users; +``` + +Manually resetting passwords: +--- +See https://github.com/matrix-org/synapse/blob/master/README.rst#password-reset + +I have a problem with my server. Can I just delete my database and start again? +--- +Deleting your database is unlikely to make anything better. + +It's easy to make the mistake of thinking that you can start again from a clean slate by dropping your database, but things don't work like that in a federated network: lots of other servers have information about your server. + +For example: other servers might think that you are in a room, your server will think that you are not, and you'll probably be unable to interact with that room in a sensible way ever again. + +In general, there are better solutions to any problem than dropping the database. Come and seek help in https://matrix.to/#/#synapse:matrix.org. + +There are two exceptions when it might be sensible to delete your database and start again: +* You have *never* joined any rooms which are federated with other servers. For instance, a local deployment which the outside world can't talk to. +* You are changing the `server_name` in the homeserver configuration. In effect this makes your server a completely new one from the point of view of the network, so in this case it makes sense to start with a clean database. +(In both cases you probably also want to clear out the media_store.) + +I've stuffed up access to my room, how can I delete it to free up the alias? +--- +Using the following curl command: +``` +curl -H 'Authorization: Bearer ' -X DELETE https://matrix.org/_matrix/client/r0/directory/room/ +``` +`` - can be obtained in riot by looking in the riot settings, down the bottom is: +Access Token:\ + +`` - the room alias, eg. #my_room:matrix.org this possibly needs to be URL encoded also, for example %23my_room%3Amatrix.org + +How can I find the lines corresponding to a given HTTP request in my homeserver log? +--- + +Synapse tags each log line according to the HTTP request it is processing. When it finishes processing each request, it logs a line containing the words `Processed request: `. For example: + +``` +2019-02-14 22:35:08,196 - synapse.access.http.8008 - 302 - INFO - GET-37 - ::1 - 8008 - {@richvdh:localhost} Processed request: 0.173sec/0.001sec (0.002sec, 0.000sec) (0.027sec/0.026sec/2) 687B 200 "GET /_matrix/client/r0/sync HTTP/1.1" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" [0 dbevts]" +``` + +Here we can see that the request has been tagged with `GET-37`. (The tag depends on the method of the HTTP request, so might start with `GET-`, `PUT-`, `POST-`, `OPTIONS-` or `DELETE-`.) So to find all lines corresponding to this request, we can do: + +``` +grep 'GET-37' homeserver.log +``` + +If you want to paste that output into a github issue or matrix room, please remember to surround it with triple-backticks (```) to make it legible (see https://help.github.com/en/articles/basic-writing-and-formatting-syntax#quoting-code). + + +What do all those fields in the 'Processed' line mean? +--- +See [Request log format](request_log.md). + + +What are the biggest rooms on my server? +--- + +```sql +SELECT s.canonical_alias, g.room_id, count(*) AS num_rows +FROM + state_groups_state AS g, + room_stats_state AS s +WHERE g.room_id = s.room_id +GROUP BY s.canonical_alias, g.room_id +ORDER BY num_rows desc +LIMIT 10; +``` + +You can also use the [List Room API](../../admin_api/rooms.md#list-room-api) +and `order_by` `state_events`. diff --git a/docs/usage/administration/database_maintenance_tools.md b/docs/usage/administration/database_maintenance_tools.md new file mode 100644 index 000000000000..92b805d413cb --- /dev/null +++ b/docs/usage/administration/database_maintenance_tools.md @@ -0,0 +1,18 @@ +This blog post by Victor Berger explains how to use many of the tools listed on this page: https://levans.fr/shrink-synapse-database.html + +# List of useful tools and scripts for maintenance Synapse database: + +## [Purge Remote Media API](../../admin_api/media_admin_api.md#purge-remote-media-api) +The purge remote media API allows server admins to purge old cached remote media. + +## [Purge Local Media API](../../admin_api/media_admin_api.md#delete-local-media) +This API deletes the *local* media from the disk of your own server. + +## [Purge History API](../../admin_api/purge_history_api.md) +The purge history API allows server admins to purge historic events from their database, reclaiming disk space. + +## [synapse-compress-state](https://github.com/matrix-org/rust-synapse-compress-state) +Tool for compressing (deduplicating) `state_groups_state` table. + +## [SQL for analyzing Synapse PostgreSQL database stats](useful_sql_for_admins.md) +Some easy SQL that reports useful stats about your Synapse database. \ No newline at end of file diff --git a/docs/usage/administration/state_groups.md b/docs/usage/administration/state_groups.md new file mode 100644 index 000000000000..f1dee7accf0f --- /dev/null +++ b/docs/usage/administration/state_groups.md @@ -0,0 +1,25 @@ +# How do State Groups work? + +As a general rule, I encourage people who want to understand the deepest darkest secrets of the database schema to drop by #synapse-dev:matrix.org and ask questions. + +However, one question that comes up frequently is that of how "state groups" work, and why the `state_groups_state` table gets so big, so here's an attempt to answer that question. + +We need to be able to relatively quickly calculate the state of a room at any point in that room's history. In other words, we need to know the state of the room at each event in that room. This is done as follows: + +A sequence of events where the state is the same are grouped together into a `state_group`; the mapping is recorded in `event_to_state_groups`. (Technically speaking, since a state event usually changes the state in the room, we are recording the state of the room *after* the given event id: which is to say, to a handwavey simplification, the first event in a state group is normally a state event, and others in the same state group are normally non-state-events.) + +`state_groups` records, for each state group, the id of the room that we're looking at, and also the id of the first event in that group. (I'm not sure if that event id is used much in practice.) + +Now, if we stored all the room state for each `state_group`, that would be a huge amount of data. Instead, for each state group, we normally store the difference between the state in that group and some other state group, and only occasionally (every 100 state changes or so) record the full state. + +So, most state groups have an entry in `state_group_edges` (don't ask me why it's not a column in `state_groups`) which records the previous state group in the room, and `state_groups_state` records the differences in state since that previous state group. + +A full state group just records the event id for each piece of state in the room at that point. + +## Known bugs with state groups + +There are various reasons that we can end up creating many more state groups than we need: see https://github.com/matrix-org/synapse/issues/3364 for more details. + +## Compression tool + +There is a tool at https://github.com/matrix-org/rust-synapse-compress-state which can compress the `state_groups_state` on a room by-room basis (essentially, it reduces the number of "full" state groups). This can result in dramatic reductions of the storage used. \ No newline at end of file diff --git a/docs/usage/administration/understanding_synapse_through_grafana_graphs.md b/docs/usage/administration/understanding_synapse_through_grafana_graphs.md new file mode 100644 index 000000000000..c365cc392309 --- /dev/null +++ b/docs/usage/administration/understanding_synapse_through_grafana_graphs.md @@ -0,0 +1,84 @@ +## Understanding Synapse through Grafana graphs + +It is possible to monitor much of the internal state of Synapse using [Prometheus](https://prometheus.io) +metrics and [Grafana](https://grafana.com/). +A guide for configuring Synapse to provide metrics is available [here](../../metrics-howto.md) +and information on setting up Grafana is [here](https://github.com/matrix-org/synapse/tree/master/contrib/grafana). +In this setup, Prometheus will periodically scrape the information Synapse provides and +store a record of it over time. Grafana is then used as an interface to query and +present this information through a series of pretty graphs. + +Once you have grafana set up, and assuming you're using [our grafana dashboard template](https://github.com/matrix-org/synapse/blob/master/contrib/grafana/synapse.json), look for the following graphs when debugging a slow/overloaded Synapse: + +## Message Event Send Time + +![image](https://user-images.githubusercontent.com/1342360/82239409-a1c8e900-9930-11ea-8081-e4614e0c63f4.png) + +This, along with the CPU and Memory graphs, is a good way to check the general health of your Synapse instance. It represents how long it takes for a user on your homeserver to send a message. + +## Transaction Count and Transaction Duration + +![image](https://user-images.githubusercontent.com/1342360/82239985-8d392080-9931-11ea-80d0-843ab2f22e1e.png) + +![image](https://user-images.githubusercontent.com/1342360/82240050-ab068580-9931-11ea-98f1-f94671cbac9a.png) + +These graphs show the database transactions that are occurring the most frequently, as well as those are that are taking the most amount of time to execute. + +![image](https://user-images.githubusercontent.com/1342360/82240192-e86b1300-9931-11ea-9aac-3e2c9bfa6fdc.png) + +In the first graph, we can see obvious spikes corresponding to lots of `get_user_by_id` transactions. This would be useful information to figure out which part of the Synapse codebase is potentially creating a heavy load on the system. However, be sure to cross-reference this with Transaction Duration, which states that `get_users_by_id` is actually a very quick database transaction and isn't causing as much load as others, like `persist_events`: + +![image](https://user-images.githubusercontent.com/1342360/82240467-62030100-9932-11ea-8db9-917f2d977fe1.png) + +Still, it's probably worth investigating why we're getting users from the database that often, and whether it's possible to reduce the amount of queries we make by adjusting our cache factor(s). + +The `persist_events` transaction is responsible for saving new room events to the Synapse database, so can often show a high transaction duration. + +## Federation + +The charts in the "Federation" section show information about incoming and outgoing federation requests. Federation data can be divided into two basic types: + +- PDU (Persistent Data Unit) - room events: messages, state events (join/leave), etc. These are permanently stored in the database. +- EDU (Ephemeral Data Unit) - other data, which need not be stored permanently, such as read receipts, typing notifications. + +The "Outgoing EDUs by type" chart shows the EDUs within outgoing federation requests by type: `m.device_list_update`, `m.direct_to_device`, `m.presence`, `m.receipt`, `m.typing`. + +If you see a large number of `m.presence` EDUs and are having trouble with too much CPU load, you can disable `presence` in the Synapse config. See also [#3971](https://github.com/matrix-org/synapse/issues/3971). + +## Caches + +![image](https://user-images.githubusercontent.com/1342360/82240572-8b239180-9932-11ea-96ff-6b5f0e57ebe5.png) + +![image](https://user-images.githubusercontent.com/1342360/82240666-b8703f80-9932-11ea-86af-9f663988d8da.png) + +This is quite a useful graph. It shows how many times Synapse attempts to retrieve a piece of data from a cache which the cache did not contain, thus resulting in a call to the database. We can see here that the `_get_joined_profile_from_event_id` cache is being requested a lot, and often the data we're after is not cached. + +Cross-referencing this with the Eviction Rate graph, which shows that entries are being evicted from `_get_joined_profile_from_event_id` quite often: + +![image](https://user-images.githubusercontent.com/1342360/82240766-de95df80-9932-11ea-8c15-5acfc57c48da.png) + +we should probably consider raising the size of that cache by raising its cache factor (a multiplier value for the size of an individual cache). Information on doing so is available [here](https://github.com/matrix-org/synapse/blob/ee421e524478c1ad8d43741c27379499c2f6135c/docs/sample_config.yaml#L608-L642) (note that the configuration of individual cache factors through the configuration file is available in Synapse v1.14.0+, whereas doing so through environment variables has been supported for a very long time). Note that this will increase Synapse's overall memory usage. + +## Forward Extremities + +![image](https://user-images.githubusercontent.com/1342360/82241440-13566680-9934-11ea-8b88-ba468db937ed.png) + +Forward extremities are the leaf events at the end of a DAG in a room, aka events that have no children. The more that exist in a room, the more [state resolution](https://spec.matrix.org/v1.1/server-server-api/#room-state-resolution) that Synapse needs to perform (hint: it's an expensive operation). While Synapse has code to prevent too many of these existing at one time in a room, bugs can sometimes make them crop up again. + +If a room has >10 forward extremities, it's worth checking which room is the culprit and potentially removing them using the SQL queries mentioned in [#1760](https://github.com/matrix-org/synapse/issues/1760). + +## Garbage Collection + +![image](https://user-images.githubusercontent.com/1342360/82241911-da6ac180-9934-11ea-9a0d-a311fe22acd0.png) + +Large spikes in garbage collection times (bigger than shown here, I'm talking in the +multiple seconds range), can cause lots of problems in Synapse performance. It's more an +indicator of problems, and a symptom of other problems though, so check other graphs for what might be causing it. + +## Final Thoughts + +If you're still having performance problems with your Synapse instance and you've +tried everything you can, it may just be a lack of system resources. Consider adding +more CPU and RAM, and make use of [worker mode](../../workers.md) +to make use of multiple CPU cores / multiple machines for your homeserver. + diff --git a/docs/usage/administration/useful_sql_for_admins.md b/docs/usage/administration/useful_sql_for_admins.md new file mode 100644 index 000000000000..d4aada3272d0 --- /dev/null +++ b/docs/usage/administration/useful_sql_for_admins.md @@ -0,0 +1,156 @@ +## Some useful SQL queries for Synapse Admins + +## Size of full matrix db +`SELECT pg_size_pretty( pg_database_size( 'matrix' ) );` +### Result example: +``` +pg_size_pretty +---------------- + 6420 MB +(1 row) +``` +## Show top 20 larger rooms by state events count +```sql +SELECT r.name, s.room_id, s.current_state_events + FROM room_stats_current s + LEFT JOIN room_stats_state r USING (room_id) + ORDER BY current_state_events DESC + LIMIT 20; +``` + +and by state_group_events count: +```sql +SELECT rss.name, s.room_id, count(s.room_id) FROM state_groups_state s +LEFT JOIN room_stats_state rss USING (room_id) +GROUP BY s.room_id, rss.name +ORDER BY count(s.room_id) DESC +LIMIT 20; +``` +plus same, but with join removed for performance reasons: +```sql +SELECT s.room_id, count(s.room_id) FROM state_groups_state s +GROUP BY s.room_id +ORDER BY count(s.room_id) DESC +LIMIT 20; +``` + +## Show top 20 larger tables by row count +```sql +SELECT relname, n_live_tup as rows + FROM pg_stat_user_tables + ORDER BY n_live_tup DESC + LIMIT 20; +``` +This query is quick, but may be very approximate, for exact number of rows use `SELECT COUNT(*) FROM `. +### Result example: +``` +state_groups_state - 161687170 +event_auth - 8584785 +event_edges - 6995633 +event_json - 6585916 +event_reference_hashes - 6580990 +events - 6578879 +received_transactions - 5713989 +event_to_state_groups - 4873377 +stream_ordering_to_exterm - 4136285 +current_state_delta_stream - 3770972 +event_search - 3670521 +state_events - 2845082 +room_memberships - 2785854 +cache_invalidation_stream - 2448218 +state_groups - 1255467 +state_group_edges - 1229849 +current_state_events - 1222905 +users_in_public_rooms - 364059 +device_lists_stream - 326903 +user_directory_search - 316433 +``` + +## Show top 20 rooms by new events count in last 1 day: +```sql +SELECT e.room_id, r.name, COUNT(e.event_id) cnt FROM events e +LEFT JOIN room_stats_state r USING (room_id) +WHERE e.origin_server_ts >= DATE_PART('epoch', NOW() - INTERVAL '1 day') * 1000 GROUP BY e.room_id, r.name ORDER BY cnt DESC LIMIT 20; +``` + +## Show top 20 users on homeserver by sent events (messages) at last month: +```sql +SELECT user_id, SUM(total_events) + FROM user_stats_historical + WHERE TO_TIMESTAMP(end_ts/1000) AT TIME ZONE 'UTC' > date_trunc('day', now() - interval '1 month') + GROUP BY user_id + ORDER BY SUM(total_events) DESC + LIMIT 20; +``` + +## Show last 100 messages from needed user, with room names: +```sql +SELECT e.room_id, r.name, e.event_id, e.type, e.content, j.json FROM events e + LEFT JOIN event_json j USING (room_id) + LEFT JOIN room_stats_state r USING (room_id) + WHERE sender = '@LOGIN:example.com' + AND e.type = 'm.room.message' + ORDER BY stream_ordering DESC + LIMIT 100; +``` + +## Show top 20 larger tables by storage size +```sql +SELECT nspname || '.' || relname AS "relation", + pg_size_pretty(pg_total_relation_size(C.oid)) AS "total_size" + FROM pg_class C + LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) + WHERE nspname NOT IN ('pg_catalog', 'information_schema') + AND C.relkind <> 'i' + AND nspname !~ '^pg_toast' + ORDER BY pg_total_relation_size(C.oid) DESC + LIMIT 20; +``` +### Result example: +``` +public.state_groups_state - 27 GB +public.event_json - 9855 MB +public.events - 3675 MB +public.event_edges - 3404 MB +public.received_transactions - 2745 MB +public.event_reference_hashes - 1864 MB +public.event_auth - 1775 MB +public.stream_ordering_to_exterm - 1663 MB +public.event_search - 1370 MB +public.room_memberships - 1050 MB +public.event_to_state_groups - 948 MB +public.current_state_delta_stream - 711 MB +public.state_events - 611 MB +public.presence_stream - 530 MB +public.current_state_events - 525 MB +public.cache_invalidation_stream - 466 MB +public.receipts_linearized - 279 MB +public.state_groups - 160 MB +public.device_lists_remote_cache - 124 MB +public.state_group_edges - 122 MB +``` + +## Show rooms with names, sorted by events in this rooms +`echo "select event_json.room_id,room_stats_state.name from event_json,room_stats_state where room_stats_state.room_id=event_json.room_id" | psql synapse | sort | uniq -c | sort -n` +### Result example: +``` + 9459 !FPUfgzXYWTKgIrwKxW:matrix.org | This Week in Matrix + 9459 !FPUfgzXYWTKgIrwKxW:matrix.org | This Week in Matrix (TWIM) + 17799 !iDIOImbmXxwNngznsa:matrix.org | Linux in Russian + 18739 !GnEEPYXUhoaHbkFBNX:matrix.org | Riot Android + 23373 !QtykxKocfZaZOUrTwp:matrix.org | Matrix HQ + 39504 !gTQfWzbYncrtNrvEkB:matrix.org | ru.[matrix] + 43601 !iNmaIQExDMeqdITdHH:matrix.org | Riot + 43601 !iNmaIQExDMeqdITdHH:matrix.org | Riot Web/Desktop +``` + +## Lookup room state info by list of room_id +```sql +SELECT rss.room_id, rss.name, rss.canonical_alias, rss.topic, rss.encryption, rsc.joined_members, rsc.local_users_in_room, rss.join_rules +FROM room_stats_state rss +LEFT JOIN room_stats_current rsc USING (room_id) +WHERE room_id IN (WHERE room_id IN ( + '!OGEhHVWSdvArJzumhm:matrix.org', + '!YTvKGNlinIzlkMTVRl:matrix.org' +) +``` \ No newline at end of file diff --git a/docs/usage/configuration/logging_sample_config.md b/docs/usage/configuration/logging_sample_config.md index a673d487b85e..499ab7cfe500 100644 --- a/docs/usage/configuration/logging_sample_config.md +++ b/docs/usage/configuration/logging_sample_config.md @@ -2,13 +2,13 @@ Below is a sample logging configuration file. This file can be tweaked to control how your homeserver will output logs. A restart of the server is generally required to apply any -changes made to this file. +changes made to this file. The value of the `log_config` option in your homeserver +config should be the path to this file. -Note that the contents below are *not* intended to be copied and used as the basis for -a real homeserver.yaml. Instead, if you are starting from scratch, please generate -a fresh config using Synapse by following the instructions in -[Installation](../../setup/installation.md). +Note that a default logging configuration (shown below) is created automatically alongside +the homeserver config when following the [installation instructions](../../setup/installation.md). +It should be named `.log.config` by default. ```yaml {{#include ../../sample_log_config.yaml}} -``` \ No newline at end of file +``` diff --git a/docs/usage/configuration/user_authentication/single_sign_on/README.md b/docs/usage/configuration/user_authentication/single_sign_on/README.md new file mode 100644 index 000000000000..b94aad92cf28 --- /dev/null +++ b/docs/usage/configuration/user_authentication/single_sign_on/README.md @@ -0,0 +1,5 @@ +# Single Sign-On + +Synapse supports single sign-on through the SAML, Open ID Connect or CAS protocols. +LDAP and other login methods are supported through first and third-party password +auth provider modules. \ No newline at end of file diff --git a/docs/usage/configuration/user_authentication/single_sign_on/cas.md b/docs/usage/configuration/user_authentication/single_sign_on/cas.md new file mode 100644 index 000000000000..3bac1b29f051 --- /dev/null +++ b/docs/usage/configuration/user_authentication/single_sign_on/cas.md @@ -0,0 +1,8 @@ +# CAS + +Synapse supports authenticating users via the [Central Authentication +Service protocol](https://en.wikipedia.org/wiki/Central_Authentication_Service) +(CAS) natively. + +Please see the `cas_config` and `sso` sections of the [Synapse configuration +file](../../../configuration/homeserver_sample_config.md) for more details. \ No newline at end of file diff --git a/docs/usage/configuration/user_authentication/single_sign_on/saml.md b/docs/usage/configuration/user_authentication/single_sign_on/saml.md new file mode 100644 index 000000000000..2b6f052cc1d7 --- /dev/null +++ b/docs/usage/configuration/user_authentication/single_sign_on/saml.md @@ -0,0 +1,8 @@ +# SAML + +Synapse supports authenticating users via the [Security Assertion +Markup Language](https://en.wikipedia.org/wiki/Security_Assertion_Markup_Language) +(SAML) protocol natively. + +Please see the `saml2_config` and `sso` sections of the [Synapse configuration +file](../../../configuration/homeserver_sample_config.md) for more details. \ No newline at end of file diff --git a/docs/user_directory.md b/docs/user_directory.md index d4f38d2cf11a..c4794b04cf61 100644 --- a/docs/user_directory.md +++ b/docs/user_directory.md @@ -6,7 +6,44 @@ on this particular server - i.e. ones which your account shares a room with, or who are present in a publicly viewable room present on the server. The directory info is stored in various tables, which can (typically after -DB corruption) get stale or out of sync. If this happens, for now the -solution to fix it is to execute the SQL [here](https://github.com/matrix-org/synapse/blob/master/synapse/storage/schema/main/delta/53/user_dir_populate.sql) -and then restart synapse. This should then start a background task to +DB corruption) get stale or out of sync. If this happens, for now the +solution to fix it is to use the [admin API](usage/administration/admin_api/background_updates.md#run) +and execute the job `regenerate_directory`. This should then start a background task to flush the current tables and regenerate the directory. + +Data model +---------- + +There are five relevant tables that collectively form the "user directory". +Three of them track a master list of all the users we could search for. +The last two (collectively called the "search tables") track who can +see who. + +From all of these tables we exclude three types of local user: + - support users + - appservice users + - deactivated users + +* `user_directory`. This contains the user_id, display name and avatar we'll + return when you search the directory. + - Because there's only one directory entry per user, it's important that we only + ever put publicly visible names here. Otherwise we might leak a private + nickname or avatar used in a private room. + - Indexed on rooms. Indexed on users. + +* `user_directory_search`. To be joined to `user_directory`. It contains an extra + column that enables full text search based on user ids and display names. + Different schemas for SQLite and Postgres with different code paths to match. + - Indexed on the full text search data. Indexed on users. + +* `user_directory_stream_pos`. When the initial background update to populate + the directory is complete, we record a stream position here. This indicates + that synapse should now listen for room changes and incrementally update + the directory where necessary. + +* `users_in_public_rooms`. Contains associations between users and the public rooms they're in. + Used to determine which users are in public rooms and should be publicly visible in the directory. + +* `users_who_share_private_rooms`. Rows are triples `(L, M, room id)` where `L` + is a local user and `M` is a local or remote user. `L` and `M` should be + different, but this isn't enforced by a constraint. diff --git a/docs/website_files/table-of-contents.css b/docs/website_files/table-of-contents.css index d16bb3b9886b..1b6f44b66a2e 100644 --- a/docs/website_files/table-of-contents.css +++ b/docs/website_files/table-of-contents.css @@ -1,3 +1,7 @@ +:root { + --pagetoc-width: 250px; +} + @media only screen and (max-width:1439px) { .sidetoc { display: none; @@ -8,6 +12,7 @@ main { position: relative; margin-left: 100px !important; + margin-right: var(--pagetoc-width) !important; } .sidetoc { margin-left: auto; @@ -18,7 +23,7 @@ } .pagetoc { position: fixed; - width: 250px; + width: var(--pagetoc-width); overflow: auto; right: 20px; height: calc(100% - var(--menu-bar-height)); diff --git a/docs/welcome_and_overview.md b/docs/welcome_and_overview.md index 30e75984d1ef..aab2d6b4f0f6 100644 --- a/docs/welcome_and_overview.md +++ b/docs/welcome_and_overview.md @@ -1,4 +1,79 @@ # Introduction -Welcome to the documentation repository for Synapse, the reference -[Matrix](https://matrix.org) homeserver implementation. \ No newline at end of file +Welcome to the documentation repository for Synapse, a +[Matrix](https://matrix.org) homeserver implementation developed by the matrix.org core +team. + +## Installing and using Synapse + +This documentation covers topics for **installation**, **configuration** and +**maintainence** of your Synapse process: + +* Learn how to [install](setup/installation.md) and + [configure](usage/configuration/index.html) your own instance, perhaps with [Single + Sign-On](usage/configuration/user_authentication/index.html). + +* See how to [upgrade](upgrade.md) between Synapse versions. + +* Administer your instance using the [Admin + API](usage/administration/admin_api/index.html), installing [pluggable + modules](modules/index.html), or by accessing the [manhole](manhole.md). + +* Learn how to [read log lines](usage/administration/request_log.md), configure + [logging](usage/configuration/logging_sample_config.md) or set up [structured + logging](structured_logging.md). + +* Scale Synapse through additional [worker processes](workers.md). + +* Set up [monitoring and metrics](metrics-howto.md) to keep an eye on your + Synapse instance's performance. + +## Developing on Synapse + +Contributions are welcome! Synapse is primarily written in +[Python](https://python.org). As a developer, you may be interested in the +following documentation: + +* Read the [Contributing Guide](development/contributing_guide.md). It is meant + to walk new contributors through the process of developing and submitting a + change to the Synapse codebase (which is [hosted on + GitHub](https://github.com/matrix-org/synapse)). + +* Set up your [development + environment](development/contributing_guide.md#2-what-do-i-need), then learn + how to [lint](development/contributing_guide.md#run-the-linters) and + [test](development/contributing_guide.md#8-test-test-test) your code. + +* Look at [the issue tracker](https://github.com/matrix-org/synapse/issues) for + bugs to fix or features to add. If you're new, it may be best to start with + those labeled [good first + issue](https://github.com/matrix-org/synapse/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). + +* Understand [how Synapse is + built](development/internal_documentation/index.html), how to [migrate + database schemas](development/database_schema.md), learn about + [federation](federate.md) and how to [set up a local + federation](federate.md#running-a-demo-federation-of-synapses) for development. + +* We like to keep our `git` history clean. [Learn](development/git.md) how to + do so! + +* And finally, contribute to this documentation! The source for which is + [located here](https://github.com/matrix-org/synapse/tree/develop/docs). + +## Donating to Synapse development + +Want to help keep Synapse going but don't know how to code? Synapse is a +[Matrix.org Foundation](https://matrix.org) project. Consider becoming a +supportor on [Liberapay](https://liberapay.com/matrixdotorg), +[Patreon](https://patreon.com/matrixdotorg) or through +[PayPal](https://paypal.me/matrixdotorg) via a one-time donation. + +If you are an organisation or enterprise and would like to sponsor development, +reach out to us over email at: support (at) matrix.org + +## Reporting a security vulnerability + +If you've found a security issue in Synapse or any other Matrix.org Foundation +project, please report it to us in accordance with our [Security Disclosure +Policy](https://www.matrix.org/security-disclosure-policy/). Thank you! diff --git a/docs/workers.md b/docs/workers.md index d8672324c301..fd83e2ddeb1f 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -182,10 +182,10 @@ This worker can handle API requests matching the following regular expressions: # Sync requests - ^/_matrix/client/(v2_alpha|r0)/sync$ - ^/_matrix/client/(api/v1|v2_alpha|r0)/events$ - ^/_matrix/client/(api/v1|r0)/initialSync$ - ^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$ + ^/_matrix/client/(v2_alpha|r0|v3)/sync$ + ^/_matrix/client/(api/v1|v2_alpha|r0|v3)/events$ + ^/_matrix/client/(api/v1|r0|v3)/initialSync$ + ^/_matrix/client/(api/v1|r0|v3)/rooms/[^/]+/initialSync$ # Federation requests ^/_matrix/federation/v1/event/ @@ -209,40 +209,47 @@ expressions: ^/_matrix/federation/v1/user/devices/ ^/_matrix/federation/v1/get_groups_publicised$ ^/_matrix/key/v2/query + ^/_matrix/federation/unstable/org.matrix.msc2946/spaces/ + ^/_matrix/federation/(v1|unstable/org.matrix.msc2946)/hierarchy/ # Inbound federation transaction request ^/_matrix/federation/v1/send/ # Client API requests - ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$ - ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$ - ^/_matrix/client/(api/v1|r0|unstable)/devices$ - ^/_matrix/client/(api/v1|r0|unstable)/keys/query$ - ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/createRoom$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/publicRooms$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/joined_members$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/context/.*$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/members$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$ + ^/_matrix/client/unstable/org.matrix.msc2946/rooms/.*/spaces$ + ^/_matrix/client/(v1|unstable/org.matrix.msc2946)/rooms/.*/hierarchy$ + ^/_matrix/client/unstable/im.nheko.summary/rooms/.*/summary$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/account/3pid$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/devices$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/keys/query$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/keys/changes$ ^/_matrix/client/versions$ - ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$ - ^/_matrix/client/(api/v1|r0|unstable)/joined_groups$ - ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups$ - ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups/ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/event/ - ^/_matrix/client/(api/v1|r0|unstable)/joined_rooms$ - ^/_matrix/client/(api/v1|r0|unstable)/search$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/voip/turnServer$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/joined_groups$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/publicised_groups$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/publicised_groups/ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/event/ + ^/_matrix/client/(api/v1|r0|v3|unstable)/joined_rooms$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/search$ # Registration/login requests - ^/_matrix/client/(api/v1|r0|unstable)/login$ - ^/_matrix/client/(r0|unstable)/register$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/login$ + ^/_matrix/client/(r0|v3|unstable)/register$ + ^/_matrix/client/unstable/org.matrix.msc3231/register/org.matrix.msc3231.login.registration_token/validity$ # Event sending requests - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/redact - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state/ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$ - ^/_matrix/client/(api/v1|r0|unstable)/join/ - ^/_matrix/client/(api/v1|r0|unstable)/profile/ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/redact + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/send + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state/ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/join/ + ^/_matrix/client/(api/v1|r0|v3|unstable)/profile/ Additionally, the following REST endpoints can be handled for GET requests: @@ -254,14 +261,14 @@ room must be routed to the same instance. Additionally, care must be taken to ensure that the purge history admin API is not used while pagination requests for the room are in flight: - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/messages$ Additionally, the following endpoints should be included if Synapse is configured to use SSO (you only need to include the ones for whichever SSO provider you're using): # for all SSO providers - ^/_matrix/client/(api/v1|r0|unstable)/login/sso/redirect + ^/_matrix/client/(api/v1|r0|v3|unstable)/login/sso/redirect ^/_synapse/client/pick_idp$ ^/_synapse/client/pick_username ^/_synapse/client/new_user_consent$ @@ -274,7 +281,7 @@ using): ^/_synapse/client/saml2/authn_response$ # CAS requests. - ^/_matrix/client/(api/v1|r0|unstable)/login/cas/ticket$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/login/cas/ticket$ Ensure that all SSO logins go to a single process. For multiple workers not handling the SSO endpoints properly, see @@ -425,28 +432,30 @@ Handles the media repository. It can handle all endpoints starting with: ^/_synapse/admin/v1/user/.*/media.*$ ^/_synapse/admin/v1/media/.*$ ^/_synapse/admin/v1/quarantine_media/.*$ + ^/_synapse/admin/v1/users/.*/media$ You should also set `enable_media_repo: False` in the shared configuration file to stop the main synapse running background jobs related to managing the -media repository. +media repository. Note that doing so will prevent the main process from being +able to handle the above endpoints. In the `media_repository` worker configuration file, configure the http listener to expose the `media` resource. For example: ```yaml - worker_listeners: - - type: http - port: 8085 - resources: - - names: - - media +worker_listeners: + - type: http + port: 8085 + resources: + - names: + - media ``` Note that if running multiple media repositories they must be on the same server and you must configure a single instance to run the background tasks, e.g.: ```yaml - media_instance_running_background_jobs: "media-repository-1" +media_instance_running_background_jobs: "media-repository-1" ``` Note that if a reverse proxy is used , then `/_matrix/media/` must be routed for both inbound client and federation requests (if they are handled separately). @@ -456,7 +465,7 @@ Note that if a reverse proxy is used , then `/_matrix/media/` must be routed for Handles searches in the user directory. It can handle REST endpoints matching the following regular expressions: - ^/_matrix/client/(api/v1|r0|unstable)/user_directory/search$ + ^/_matrix/client/(api/v1|r0|v3|unstable)/user_directory/search$ When using this worker you must also set `update_user_directory: False` in the shared configuration file to stop the main synapse running background @@ -468,12 +477,12 @@ Proxies some frequently-requested client endpoints to add caching and remove load from the main synapse. It can handle REST endpoints matching the following regular expressions: - ^/_matrix/client/(api/v1|r0|unstable)/keys/upload + ^/_matrix/client/(api/v1|r0|v3|unstable)/keys/upload If `use_presence` is False in the homeserver config, it can also handle REST endpoints matching the following regular expressions: - ^/_matrix/client/(api/v1|r0|unstable)/presence/[^/]+/status + ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/[^/]+/status This "stub" presence handler will pass through `GET` request but make the `PUT` effectively a no-op. @@ -483,7 +492,9 @@ must therefore be configured with the location of the main instance, via the `worker_main_http_uri` setting in the `frontend_proxy` worker configuration file. For example: - worker_main_http_uri: http://127.0.0.1:8008 +```yaml +worker_main_http_uri: http://127.0.0.1:8008 +``` ### Historical apps diff --git a/mypy.ini b/mypy.ini index 8717ae738e31..1caf807e8505 100644 --- a/mypy.ini +++ b/mypy.ini @@ -10,179 +10,320 @@ warn_unreachable = True local_partial_types = True no_implicit_optional = True -# To find all folders that pass mypy you run: -# -# find synapse/* -type d -not -name __pycache__ -exec bash -c "mypy '{}' > /dev/null" \; -print - files = scripts-dev/sign_json, - synapse/api, - synapse/appservice, - synapse/config, - synapse/crypto, - synapse/event_auth.py, - synapse/events/builder.py, - synapse/events/spamcheck.py, - synapse/events/third_party_rules.py, - synapse/events/validator.py, - synapse/federation, - synapse/groups, - synapse/handlers, - synapse/http/client.py, - synapse/http/federation/matrix_federation_agent.py, - synapse/http/federation/well_known_resolver.py, - synapse/http/matrixfederationclient.py, - synapse/http/servlet.py, - synapse/http/server.py, - synapse/http/site.py, - synapse/logging, - synapse/metrics, - synapse/module_api, - synapse/notifier.py, - synapse/push, - synapse/replication, - synapse/rest, - synapse/server.py, - synapse/server_notices, - synapse/spam_checker_api, - synapse/state, - synapse/storage/__init__.py, - synapse/storage/_base.py, - synapse/storage/background_updates.py, - synapse/storage/databases/main/appservice.py, - synapse/storage/databases/main/events.py, - synapse/storage/databases/main/keys.py, - synapse/storage/databases/main/pusher.py, - synapse/storage/databases/main/registration.py, - synapse/storage/databases/main/stream.py, - synapse/storage/databases/main/ui_auth.py, - synapse/storage/database.py, - synapse/storage/engines, - synapse/storage/keys.py, - synapse/storage/persist_events.py, - synapse/storage/prepare_database.py, - synapse/storage/purge_events.py, - synapse/storage/push_rule.py, - synapse/storage/relations.py, - synapse/storage/roommember.py, - synapse/storage/state.py, - synapse/storage/types.py, - synapse/storage/util, - synapse/streams, - synapse/types.py, - synapse/util/async_helpers.py, - synapse/util/caches, - synapse/util/daemonize.py, - synapse/util/hash.py, - synapse/util/iterutils.py, - synapse/util/linked_list.py, - synapse/util/metrics.py, - synapse/util/macaroons.py, - synapse/util/module_loader.py, - synapse/util/msisdn.py, - synapse/util/stringutils.py, - synapse/visibility.py, - tests/replication, - tests/test_event_auth.py, - tests/test_utils, - tests/handlers/test_password_providers.py, - tests/rest/client/v1/test_login.py, - tests/rest/client/v2_alpha/test_auth.py, - tests/util/test_itertools.py, - tests/util/test_stream_change_cache.py + setup.py, + synapse/, + tests/ -[mypy-pymacaroons.*] -ignore_missing_imports = True +# Note: Better exclusion syntax coming in mypy > 0.910 +# https://github.com/python/mypy/pull/11329 +# +# For now, set the (?x) flag enable "verbose" regexes +# https://docs.python.org/3/library/re.html#re.X +exclude = (?x) + ^( + |synapse/storage/databases/__init__.py + |synapse/storage/databases/main/__init__.py + |synapse/storage/databases/main/account_data.py + |synapse/storage/databases/main/cache.py + |synapse/storage/databases/main/devices.py + |synapse/storage/databases/main/e2e_room_keys.py + |synapse/storage/databases/main/end_to_end_keys.py + |synapse/storage/databases/main/event_federation.py + |synapse/storage/databases/main/event_push_actions.py + |synapse/storage/databases/main/events_bg_updates.py + |synapse/storage/databases/main/group_server.py + |synapse/storage/databases/main/metrics.py + |synapse/storage/databases/main/monthly_active_users.py + |synapse/storage/databases/main/presence.py + |synapse/storage/databases/main/purge_events.py + |synapse/storage/databases/main/push_rule.py + |synapse/storage/databases/main/receipts.py + |synapse/storage/databases/main/room.py + |synapse/storage/databases/main/roommember.py + |synapse/storage/databases/main/search.py + |synapse/storage/databases/main/state.py + |synapse/storage/databases/main/stats.py + |synapse/storage/databases/main/transactions.py + |synapse/storage/databases/main/user_directory.py + |synapse/storage/schema/ + + |tests/api/test_auth.py + |tests/api/test_ratelimiting.py + |tests/app/test_openid_listener.py + |tests/appservice/test_scheduler.py + |tests/config/test_cache.py + |tests/config/test_tls.py + |tests/crypto/test_keyring.py + |tests/events/test_presence_router.py + |tests/events/test_utils.py + |tests/federation/test_federation_catch_up.py + |tests/federation/test_federation_sender.py + |tests/federation/test_federation_server.py + |tests/federation/transport/test_knocking.py + |tests/federation/transport/test_server.py + |tests/handlers/test_cas.py + |tests/handlers/test_directory.py + |tests/handlers/test_e2e_keys.py + |tests/handlers/test_federation.py + |tests/handlers/test_oidc.py + |tests/handlers/test_presence.py + |tests/handlers/test_profile.py + |tests/handlers/test_saml.py + |tests/handlers/test_typing.py + |tests/http/federation/test_matrix_federation_agent.py + |tests/http/federation/test_srv_resolver.py + |tests/http/test_fedclient.py + |tests/http/test_proxyagent.py + |tests/http/test_servlet.py + |tests/http/test_site.py + |tests/logging/__init__.py + |tests/logging/test_terse_json.py + |tests/module_api/test_api.py + |tests/push/test_email.py + |tests/push/test_http.py + |tests/push/test_presentable_names.py + |tests/push/test_push_rule_evaluator.py + |tests/rest/admin/test_admin.py + |tests/rest/admin/test_user.py + |tests/rest/admin/test_username_available.py + |tests/rest/client/test_account.py + |tests/rest/client/test_events.py + |tests/rest/client/test_filter.py + |tests/rest/client/test_groups.py + |tests/rest/client/test_register.py + |tests/rest/client/test_report_event.py + |tests/rest/client/test_rooms.py + |tests/rest/client/test_third_party_rules.py + |tests/rest/client/test_transactions.py + |tests/rest/client/test_typing.py + |tests/rest/client/utils.py + |tests/rest/key/v2/test_remote_key_resource.py + |tests/rest/media/v1/test_base.py + |tests/rest/media/v1/test_media_storage.py + |tests/rest/media/v1/test_url_preview.py + |tests/scripts/test_new_matrix_user.py + |tests/server.py + |tests/server_notices/test_resource_limits_server_notices.py + |tests/state/test_v2.py + |tests/storage/test_account_data.py + |tests/storage/test_background_update.py + |tests/storage/test_base.py + |tests/storage/test_client_ips.py + |tests/storage/test_database.py + |tests/storage/test_event_federation.py + |tests/storage/test_id_generators.py + |tests/storage/test_roommember.py + |tests/test_metrics.py + |tests/test_phone_home.py + |tests/test_server.py + |tests/test_state.py + |tests/test_terms_auth.py + |tests/unittest.py + |tests/util/caches/test_cached_call.py + |tests/util/caches/test_deferred_cache.py + |tests/util/caches/test_descriptors.py + |tests/util/caches/test_response_cache.py + |tests/util/caches/test_ttlcache.py + |tests/util/test_async_helpers.py + |tests/util/test_batching_queue.py + |tests/util/test_dict_cache.py + |tests/util/test_expiring_cache.py + |tests/util/test_file_consumer.py + |tests/util/test_linearizer.py + |tests/util/test_logcontext.py + |tests/util/test_lrucache.py + |tests/util/test_rwlock.py + |tests/util/test_wheel_timer.py + |tests/utils.py + )$ + +[mypy-synapse.api.*] +disallow_untyped_defs = True + +[mypy-synapse.app.*] +disallow_untyped_defs = True + +[mypy-synapse.config._base] +disallow_untyped_defs = True + +[mypy-synapse.crypto.*] +disallow_untyped_defs = True + +[mypy-synapse.events.*] +disallow_untyped_defs = True + +[mypy-synapse.federation.*] +disallow_untyped_defs = True + +[mypy-synapse.federation.transport.client] +disallow_untyped_defs = False + +[mypy-synapse.handlers.*] +disallow_untyped_defs = True + +[mypy-synapse.metrics.*] +disallow_untyped_defs = True + +[mypy-synapse.module_api.*] +disallow_untyped_defs = True + +[mypy-synapse.push.*] +disallow_untyped_defs = True + +[mypy-synapse.rest.*] +disallow_untyped_defs = True + +[mypy-synapse.server_notices.*] +disallow_untyped_defs = True + +[mypy-synapse.state.*] +disallow_untyped_defs = True + +[mypy-synapse.storage.databases.main.client_ips] +disallow_untyped_defs = True + +[mypy-synapse.storage.databases.main.directory] +disallow_untyped_defs = True + +[mypy-synapse.storage.databases.main.events_worker] +disallow_untyped_defs = True + +[mypy-synapse.storage.databases.main.room_batch] +disallow_untyped_defs = True + +[mypy-synapse.storage.databases.main.profile] +disallow_untyped_defs = True + +[mypy-synapse.storage.databases.main.state_deltas] +disallow_untyped_defs = True + +[mypy-synapse.storage.databases.main.user_erasure_store] +disallow_untyped_defs = True + +[mypy-synapse.storage.util.*] +disallow_untyped_defs = True -[mypy-zope] -ignore_missing_imports = True +[mypy-synapse.streams.*] +disallow_untyped_defs = True -[mypy-bcrypt] +[mypy-synapse.util.*] +disallow_untyped_defs = True + +[mypy-synapse.util.caches.treecache] +disallow_untyped_defs = False + +[mypy-tests.handlers.test_user_directory] +disallow_untyped_defs = True + +[mypy-tests.storage.test_profile] +disallow_untyped_defs = True + +[mypy-tests.storage.test_user_directory] +disallow_untyped_defs = True + +[mypy-tests.rest.client.test_directory] +disallow_untyped_defs = True + +[mypy-tests.federation.transport.test_client] +disallow_untyped_defs = True + + +;; Dependencies without annotations +;; Before ignoring a module, check to see if type stubs are available. +;; The `typeshed` project maintains stubs here: +;; https://github.com/python/typeshed/tree/master/stubs +;; and for each package `foo` there's a corresponding `types-foo` package on PyPI, +;; which we can pull in as a dev dependency by adding to `setup.py`'s +;; `CONDITIONAL_REQUIREMENTS["mypy"]` list. + +[mypy-authlib.*] ignore_missing_imports = True -[mypy-constantly] +[mypy-bcrypt] ignore_missing_imports = True -[mypy-twisted.*] +[mypy-canonicaljson] ignore_missing_imports = True -[mypy-treq.*] +[mypy-constantly] ignore_missing_imports = True -[mypy-hyperlink] +[mypy-daemonize] ignore_missing_imports = True [mypy-h11] ignore_missing_imports = True -[mypy-msgpack] +[mypy-hiredis] ignore_missing_imports = True -[mypy-opentracing] +[mypy-hyperlink] ignore_missing_imports = True -[mypy-OpenSSL.*] +[mypy-ijson.*] ignore_missing_imports = True -[mypy-netaddr] +[mypy-jaeger_client.*] ignore_missing_imports = True -[mypy-saml2.*] +[mypy-josepy.*] ignore_missing_imports = True -[mypy-canonicaljson] +[mypy-jwt.*] ignore_missing_imports = True -[mypy-jaeger_client.*] +[mypy-lxml] ignore_missing_imports = True -[mypy-jsonschema] +[mypy-msgpack] ignore_missing_imports = True -[mypy-signedjson.*] +[mypy-nacl.*] ignore_missing_imports = True -[mypy-prometheus_client.*] +[mypy-netaddr] ignore_missing_imports = True -[mypy-service_identity.*] +[mypy-opentracing] ignore_missing_imports = True -[mypy-daemonize] +[mypy-parameterized.*] ignore_missing_imports = True -[mypy-sentry_sdk] +[mypy-phonenumbers.*] ignore_missing_imports = True -[mypy-PIL.*] +[mypy-prometheus_client.*] ignore_missing_imports = True -[mypy-lxml] +[mypy-pymacaroons.*] ignore_missing_imports = True -[mypy-jwt.*] +[mypy-pympler.*] ignore_missing_imports = True -[mypy-authlib.*] +[mypy-rust_python_jaeger_reporter.*] ignore_missing_imports = True -[mypy-rust_python_jaeger_reporter.*] +[mypy-saml2.*] ignore_missing_imports = True -[mypy-nacl.*] +[mypy-sentry_sdk] ignore_missing_imports = True -[mypy-hiredis] +[mypy-service_identity.*] ignore_missing_imports = True -[mypy-josepy.*] +[mypy-signedjson.*] ignore_missing_imports = True -[mypy-pympler.*] +[mypy-treq.*] ignore_missing_imports = True -[mypy-phonenumbers.*] +[mypy-twisted.*] ignore_missing_imports = True -[mypy-ijson.*] +[mypy-zope] ignore_missing_imports = True diff --git a/scripts-dev/build_debian_packages b/scripts-dev/build_debian_packages index 0ed1c679fd94..3a9a2d257c6f 100755 --- a/scripts-dev/build_debian_packages +++ b/scripts-dev/build_debian_packages @@ -20,13 +20,14 @@ from concurrent.futures import ThreadPoolExecutor from typing import Optional, Sequence DISTS = ( - "debian:buster", + "debian:buster", # oldstable: EOL 2022-08 "debian:bullseye", + "debian:bookworm", "debian:sid", "ubuntu:bionic", # 18.04 LTS (our EOL forced by Py36 on 2021-12-23) "ubuntu:focal", # 20.04 LTS (our EOL forced by Py38 on 2024-10-14) - "ubuntu:groovy", # 20.10 (EOL 2021-07-07) "ubuntu:hirsute", # 21.04 (EOL 2022-01-05) + "ubuntu:impish", # 21.10 (EOL 2022-07) ) DESC = """\ diff --git a/scripts-dev/check-newsfragment b/scripts-dev/check-newsfragment index af6d32e3321c..af4de345df57 100755 --- a/scripts-dev/check-newsfragment +++ b/scripts-dev/check-newsfragment @@ -11,7 +11,7 @@ set -e git remote set-branches --add origin develop git fetch -q origin develop -pr="$BUILDKITE_PULL_REQUEST" +pr="$PULL_REQUEST_NUMBER" # if there are changes in the debian directory, check that the debian changelog # has been updated @@ -42,10 +42,10 @@ echo "--------------------------" echo matched=0 -for f in `git diff --name-only FETCH_HEAD... -- changelog.d`; do +for f in $(git diff --name-only FETCH_HEAD... -- changelog.d); do # check that any modified newsfiles on this branch end with a full stop. - lastchar=`tr -d '\n' < $f | tail -c 1` - if [ $lastchar != '.' -a $lastchar != '!' ]; then + lastchar=$(tr -d '\n' < "$f" | tail -c 1) + if [ "$lastchar" != '.' ] && [ "$lastchar" != '!' ]; then echo -e "\e[31mERROR: newsfragment $f does not end with a '.' or '!'\e[39m" >&2 echo -e "$CONTRIBUTING_GUIDE_TEXT" >&2 exit 1 diff --git a/scripts-dev/check_line_terminators.sh b/scripts-dev/check_line_terminators.sh index c98395623197..fffa24e01e43 100755 --- a/scripts-dev/check_line_terminators.sh +++ b/scripts-dev/check_line_terminators.sh @@ -25,7 +25,7 @@ # terminators are found, 0 otherwise. # cd to the root of the repository -cd `dirname $0`/.. +cd "$(dirname "$0")/.." || exit # Find and print files with non-unix line terminators if find . -path './.git/*' -prune -o -type f -print0 | xargs -0 grep -I -l $'\r$'; then diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index cba015d942ca..53295b58fca9 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -35,25 +35,25 @@ if [[ -z "$COMPLEMENT_DIR" ]]; then echo "Checkout available at 'complement-master'" fi +# Build the base Synapse image from the local checkout +docker build -t matrixdotorg/synapse -f "docker/Dockerfile" . + # If we're using workers, modify the docker files slightly. if [[ -n "$WORKERS" ]]; then - BASE_IMAGE=matrixdotorg/synapse-workers - BASE_DOCKERFILE=docker/Dockerfile-workers + # Build the workers docker image (from the base Synapse image). + docker build -t matrixdotorg/synapse-workers -f "docker/Dockerfile-workers" . + export COMPLEMENT_BASE_IMAGE=complement-synapse-workers COMPLEMENT_DOCKERFILE=SynapseWorkers.Dockerfile # And provide some more configuration to complement. export COMPLEMENT_CA=true export COMPLEMENT_VERSION_CHECK_ITERATIONS=500 else - BASE_IMAGE=matrixdotorg/synapse - BASE_DOCKERFILE=docker/Dockerfile export COMPLEMENT_BASE_IMAGE=complement-synapse COMPLEMENT_DOCKERFILE=Synapse.Dockerfile fi -# Build the base Synapse image from the local checkout -docker build -t $BASE_IMAGE -f "$BASE_DOCKERFILE" . -# Build the Synapse monolith image from Complement, based on the above image we just built +# Build the Complement image from the Synapse image we just built. docker build -t $COMPLEMENT_BASE_IMAGE -f "$COMPLEMENT_DIR/dockerfiles/$COMPLEMENT_DOCKERFILE" "$COMPLEMENT_DIR/dockerfiles" cd "$COMPLEMENT_DIR" @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests/... +go test -v -tags synapse_blacklist,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests/... diff --git a/scripts-dev/config-lint.sh b/scripts-dev/config-lint.sh index 8c6323e59a1b..6ce030b819d6 100755 --- a/scripts-dev/config-lint.sh +++ b/scripts-dev/config-lint.sh @@ -3,7 +3,7 @@ # Exits with 0 if there are no problems, or another code otherwise. # cd to the root of the repository -cd `dirname $0`/.. +cd "$(dirname "$0")/.." || exit # Restore backup of sample config upon script exit trap "mv docs/sample_config.yaml.bak docs/sample_config.yaml" EXIT diff --git a/scripts-dev/docker_update_debian_changelog.sh b/scripts-dev/docker_update_debian_changelog.sh new file mode 100755 index 000000000000..729f8fc46748 --- /dev/null +++ b/scripts-dev/docker_update_debian_changelog.sh @@ -0,0 +1,64 @@ +#!/bin/bash -e +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script is meant to be used inside a Docker container to run the `dch` incantations +# needed to release Synapse. This is useful on systems like macOS where such scripts are +# not easily accessible. +# +# Running it (when if the current working directory is the root of the Synapse checkout): +# docker run --rm -v $PWD:/synapse ubuntu:latest /synapse/scripts-dev/docker_update_debian_changelog.sh VERSION +# +# The image can be replaced by any other Debian-based image (as long as the `devscripts` +# package exists in the default repository). +# `VERSION` is the version of Synapse being released without the leading "v" (e.g. 1.42.0). + +# Check if a version was provided. +if [ "$#" -ne 1 ]; then + echo "Usage: update_debian_changelog.sh VERSION" + echo "VERSION is the version of Synapse being released in the form 1.42.0 (without the leading \"v\")" + exit 1 +fi + +# Check that apt-get is available on the system. +if ! which apt-get > /dev/null 2>&1; then + echo "\"apt-get\" isn't available on this system. This script needs to be run in a Docker container using a Debian-based image." + exit 1 +fi + +# Check if devscripts is available in the default repos for this distro. +# Update the apt package list cache. +# We need to do this before we can search the apt cache or install devscripts. +apt-get update || exit 1 + +if ! apt-cache search devscripts | grep -E "^devscripts \-" > /dev/null; then + echo "The package \"devscripts\" needs to exist in the default repositories for this distribution." + exit 1 +fi + +# We set -x here rather than in the shebang so that if we need to exit early because no +# version was provided, the message doesn't get drowned in useless output. +set -x + +# Make the root of the Synapse checkout the current working directory. +cd /synapse + +# Install devscripts (which provides dch). We need to make the Debian frontend +# noninteractive because installing devscripts otherwise asks for the machine's location. +DEBIAN_FRONTEND=noninteractive apt-get install -y devscripts + +# Update the Debian changelog. +ver=${1} +dch -M -v "$(sed -Ee 's/(rc|a|b|c)/~\1/' <<<"$ver")" "New synapse release $ver." +dch -M -r -D stable "" diff --git a/scripts-dev/federation_client.py b/scripts-dev/federation_client.py index 6f76c08fcff2..c72e19f61d62 100755 --- a/scripts-dev/federation_client.py +++ b/scripts-dev/federation_client.py @@ -15,6 +15,25 @@ # See the License for the specific language governing permissions and # limitations under the License. + +""" +Script for signing and sending federation requests. + +Some tips on doing the join dance with this: + + room_id=... + user_id=... + + # make_join + federation_client.py "/_matrix/federation/v1/make_join/$room_id/$user_id?ver=5" > make_join.json + + # sign + jq -M .event make_join.json | sign_json --sign-event-room-version=$(jq -r .room_version make_join.json) -o signed-join.json + + # send_join + federation_client.py -X PUT "/_matrix/federation/v2/send_join/$room_id/x" --body $( send_join.json +""" + import argparse import base64 import json diff --git a/scripts-dev/generate_sample_config b/scripts-dev/generate_sample_config index 02739894b595..4cd1d1d5b829 100755 --- a/scripts-dev/generate_sample_config +++ b/scripts-dev/generate_sample_config @@ -4,7 +4,7 @@ set -e -cd `dirname $0`/.. +cd "$(dirname "$0")/.." SAMPLE_CONFIG="docs/sample_config.yaml" SAMPLE_LOG_CONFIG="docs/sample_log_config.yaml" diff --git a/scripts-dev/lint.sh b/scripts-dev/lint.sh index 869eb2372d51..b6554a73c115 100755 --- a/scripts-dev/lint.sh +++ b/scripts-dev/lint.sh @@ -90,11 +90,11 @@ else "scripts/hash_password" "scripts/register_new_matrix_user" "scripts/synapse_port_db" + "scripts/update_synapse_database" "scripts-dev" "scripts-dev/build_debian_packages" "scripts-dev/sign_json" - "scripts-dev/update_database" - "contrib" "synctl" "setup.py" "synmark" "stubs" ".buildkite" + "contrib" "synctl" "setup.py" "synmark" "stubs" ".ci" ) fi fi diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh index 39bf30d258bd..c3c90f4ec637 100755 --- a/scripts-dev/make_full_schema.sh +++ b/scripts-dev/make_full_schema.sh @@ -147,7 +147,7 @@ python -m synapse.app.homeserver --generate-keys -c "$SQLITE_CONFIG" # Make sure the SQLite3 database is using the latest schema and has no pending background update. echo "Running db background jobs..." -scripts-dev/update_database --database-config "$SQLITE_CONFIG" +scripts/update_synapse_database --database-config --run-background-updates "$SQLITE_CONFIG" # Create the PostgreSQL database. echo "Creating postgres database..." diff --git a/scripts-dev/next_github_number.sh b/scripts-dev/next_github_number.sh index 00e9b1456986..5ecd515127c7 100755 --- a/scripts-dev/next_github_number.sh +++ b/scripts-dev/next_github_number.sh @@ -4,6 +4,6 @@ set -e # Fetch the current GitHub issue number, add one to it -- presto! The likely # next PR number. -CURRENT_NUMBER=`curl -s "https://api.github.com/repos/matrix-org/synapse/issues?state=all&per_page=1" | jq -r ".[0].number"` +CURRENT_NUMBER=$(curl -s "https://api.github.com/repos/matrix-org/synapse/issues?state=all&per_page=1" | jq -r ".[0].number") CURRENT_NUMBER=$((CURRENT_NUMBER+1)) echo $CURRENT_NUMBER diff --git a/scripts-dev/release.py b/scripts-dev/release.py index a339260c4371..4e1f99fee4e1 100755 --- a/scripts-dev/release.py +++ b/scripts-dev/release.py @@ -35,6 +35,19 @@ from packaging import version +def run_until_successful(command, *args, **kwargs): + while True: + completed_process = subprocess.run(command, *args, **kwargs) + exit_code = completed_process.returncode + if exit_code == 0: + # successful, so nothing more to do here. + return completed_process + + print(f"The command {command!r} failed with exit code {exit_code}.") + print("Please try to correct the failure and then re-run.") + click.confirm("Try again?", abort=True) + + @click.group() def cli(): """An interactive script to walk through the parts of creating a release. @@ -197,7 +210,7 @@ def prepare(): f.write(parsed_synapse_ast.dumps()) # Generate changelogs - subprocess.run("python3 -m towncrier", shell=True) + run_until_successful("python3 -m towncrier", shell=True) # Generate debian changelogs if parsed_new_version.pre is not None: @@ -209,11 +222,11 @@ def prepare(): else: debian_version = new_version - subprocess.run( + run_until_successful( f'dch -M -v {debian_version} "New synapse release {debian_version}."', shell=True, ) - subprocess.run('dch -M -r -D stable ""', shell=True) + run_until_successful('dch -M -r -D stable ""', shell=True) # Show the user the changes and ask if they want to edit the change log. repo.git.add("-u") @@ -224,7 +237,7 @@ def prepare(): # Commit the changes. repo.git.add("-u") - repo.git.commit(f"-m {new_version}") + repo.git.commit("-m", new_version) # We give the option to bail here in case the user wants to make sure things # are OK before pushing. @@ -239,6 +252,8 @@ def prepare(): # Otherwise, push and open the changelog in the browser. repo.git.push("-u", repo.remote().name, repo.active_branch.name) + print("Opening the changelog in your browser...") + print("Please ask others to give it a check.") click.launch( f"https://github.com/matrix-org/synapse/blob/{repo.active_branch.name}/CHANGES.md" ) @@ -276,7 +291,7 @@ def tag(gh_token: Optional[str]): if click.confirm("Edit text?", default=False): changes = click.edit(changes, require_save=False) - repo.create_tag(tag_name, message=changes) + repo.create_tag(tag_name, message=changes, sign=True) if not click.confirm("Push tag to GitHub?", default=True): print("") @@ -290,7 +305,19 @@ def tag(gh_token: Optional[str]): # If no token was given, we bail here if not gh_token: + print("Launching the GitHub release page in your browser.") + print("Please correct the title and create a draft.") + if current_version.is_prerelease: + print("As this is an RC, remember to mark it as a pre-release!") + print("(by the way, this step can be automated by passing --gh-token,") + print("or one of the GH_TOKEN or GITHUB_TOKEN env vars.)") click.launch(f"https://github.com/matrix-org/synapse/releases/edit/{tag_name}") + + print("Once done, you need to wait for the release assets to build.") + if click.confirm("Launch the release assets actions page?", default=True): + click.launch( + f"https://github.com/matrix-org/synapse/actions?query=branch%3A{tag_name}" + ) return # Create a new draft release @@ -305,6 +332,7 @@ def tag(gh_token: Optional[str]): ) # Open the release and the actions where we are building the assets. + print("Launching the release page and the actions page.") click.launch(release.html_url) click.launch( f"https://github.com/matrix-org/synapse/actions?query=branch%3A{tag_name}" diff --git a/scripts-dev/sign_json b/scripts-dev/sign_json index 4a43d3f2b058..945954310610 100755 --- a/scripts-dev/sign_json +++ b/scripts-dev/sign_json @@ -22,6 +22,8 @@ import yaml from signedjson.key import read_signing_keys from signedjson.sign import sign_json +from synapse.api.room_versions import KNOWN_ROOM_VERSIONS +from synapse.crypto.event_signing import add_hashes_and_signatures from synapse.util import json_encoder @@ -51,17 +53,33 @@ Example usage: "request with.", ) + parser.add_argument( + "-K", + "--signing-key", + help="The private ed25519 key to sign the request with.", + ) + parser.add_argument( "-c", "--config", default="homeserver.yaml", help=( "Path to synapse config file, from which the server name and/or signing " - "key path will be read. Ignored if --server-name and --signing-key-path " + "key path will be read. Ignored if --server-name and --signing-key(-path) " "are both given." ), ) + parser.add_argument( + "--sign-event-room-version", + type=str, + help=( + "Sign the JSON as an event for the given room version, rather than raw JSON. " + "This means that we will add a 'hashes' object, and redact the event before " + "signing." + ), + ) + input_args = parser.add_mutually_exclusive_group() input_args.add_argument("input_data", nargs="?", help="Raw JSON to be signed.") @@ -87,11 +105,14 @@ Example usage: args = parser.parse_args() - if not args.server_name or not args.signing_key_path: + if not args.server_name or not (args.signing_key_path or args.signing_key): read_args_from_config(args) - with open(args.signing_key_path) as f: - key = read_signing_keys(f)[0] + if args.signing_key: + keys = read_signing_keys([args.signing_key]) + else: + with open(args.signing_key_path) as f: + keys = read_signing_keys(f) json_to_sign = args.input_data if json_to_sign is None: @@ -107,7 +128,17 @@ Example usage: print("Input json was not an object", file=sys.stderr) sys.exit(1) - sign_json(obj, args.server_name, key) + if args.sign_event_room_version: + room_version = KNOWN_ROOM_VERSIONS.get(args.sign_event_room_version) + if not room_version: + print( + f"Unknown room version {args.sign_event_room_version}", file=sys.stderr + ) + sys.exit(1) + add_hashes_and_signatures(room_version, obj, args.server_name, keys[0]) + else: + sign_json(obj, args.server_name, keys[0]) + for c in json_encoder.iterencode(obj): args.output.write(c) args.output.write("\n") @@ -118,8 +149,17 @@ def read_args_from_config(args: argparse.Namespace) -> None: config = yaml.safe_load(fh) if not args.server_name: args.server_name = config["server_name"] - if not args.signing_key_path: - args.signing_key_path = config["signing_key_path"] + if not args.signing_key_path and not args.signing_key: + if "signing_key" in config: + args.signing_key = config["signing_key"] + elif "signing_key_path" in config: + args.signing_key_path = config["signing_key_path"] + else: + print( + "A signing key must be given on the commandline or in the config file.", + file=sys.stderr, + ) + sys.exit(1) if __name__ == "__main__": diff --git a/scripts-dev/test_postgresql.sh b/scripts-dev/test_postgresql.sh new file mode 100755 index 000000000000..43cfa256e4da --- /dev/null +++ b/scripts-dev/test_postgresql.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +# This script builds the Docker image to run the PostgreSQL tests, and then runs +# the tests. It uses a dedicated tox environment so that we don't have to +# rebuild it each time. + +# Command line arguments to this script are forwarded to "tox" and then to "trial". + +set -e + +# Build, and tag +docker build docker/ \ + --build-arg "UID=$(id -u)" \ + --build-arg "GID=$(id -g)" \ + -f docker/Dockerfile-pgtests \ + -t synapsepgtests + +# Run, mounting the current directory into /src +docker run --rm -it -v "$(pwd):/src" -v synapse-pg-test-tox:/tox synapsepgtests "$@" diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 2bbaf5557dc4..640ff15277db 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -43,9 +43,11 @@ from synapse.storage.databases.main.end_to_end_keys import EndToEndKeyBackground from synapse.storage.databases.main.events_bg_updates import ( EventsBackgroundUpdatesStore, ) +from synapse.storage.databases.main.group_server import GroupServerWorkerStore from synapse.storage.databases.main.media_repository import ( MediaRepositoryBackgroundUpdateStore, ) +from synapse.storage.databases.main.presence import PresenceBackgroundUpdateStore from synapse.storage.databases.main.pusher import PusherWorkerStore from synapse.storage.databases.main.registration import ( RegistrationBackgroundUpdateStore, @@ -179,6 +181,8 @@ class Store( EndToEndKeyBackgroundStore, StatsStore, PusherWorkerStore, + PresenceBackgroundUpdateStore, + GroupServerWorkerStore, ): def execute(self, f, *args, **kwargs): return self.db_pool.runInteraction(f.__name__, f, *args, **kwargs) @@ -213,7 +217,7 @@ class MockHomeserver: def __init__(self, config): self.clock = Clock(reactor) self.config = config - self.hostname = config.server_name + self.hostname = config.server.server_name self.version_string = "Synapse/" + get_version_string(synapse) def get_clock(self): @@ -581,7 +585,7 @@ class Porter(object): return self.postgres_store = self.build_db_store( - self.hs_config.get_single_database() + self.hs_config.database.get_single_database() ) await self.run_background_updates_on_postgres() @@ -1067,7 +1071,7 @@ class CursesProgress(Progress): self.stdscr.addstr(0, 0, status, curses.A_BOLD) - max_len = max([len(t) for t in self.tables.keys()]) + max_len = max(len(t) for t in self.tables.keys()) left_margin = 5 middle_space = 1 diff --git a/scripts-dev/update_database b/scripts/update_synapse_database similarity index 84% rename from scripts-dev/update_database rename to scripts/update_synapse_database index 87f709b6ed43..6c088bad9366 100755 --- a/scripts-dev/update_database +++ b/scripts/update_synapse_database @@ -36,16 +36,35 @@ class MockHomeserver(HomeServer): def __init__(self, config, **kwargs): super(MockHomeserver, self).__init__( - config.server_name, reactor=reactor, config=config, **kwargs + config.server.server_name, reactor=reactor, config=config, **kwargs ) self.version_string = "Synapse/" + get_version_string(synapse) -if __name__ == "__main__": +def run_background_updates(hs): + store = hs.get_datastore() + + async def run_background_updates(): + await store.db_pool.updates.run_background_updates(sleep=False) + # Stop the reactor to exit the script once every background update is run. + reactor.stop() + + def run(): + # Apply all background updates on the database. + defer.ensureDeferred( + run_as_background_process("background_updates", run_background_updates) + ) + + reactor.callWhenRunning(run) + + reactor.run() + + +def main(): parser = argparse.ArgumentParser( description=( - "Updates a synapse database to the latest schema and runs background updates" + "Updates a synapse database to the latest schema and optionally runs background updates" " on it." ) ) @@ -54,7 +73,13 @@ if __name__ == "__main__": "--database-config", type=argparse.FileType("r"), required=True, - help="A database config file for either a SQLite3 database or a PostgreSQL one.", + help="Synapse configuration file, giving the details of the database to be updated", + ) + parser.add_argument( + "--run-background-updates", + action="store_true", + required=False, + help="run background updates after upgrading the database schema", ) args = parser.parse_args() @@ -82,19 +107,10 @@ if __name__ == "__main__": # Setup instantiates the store within the homeserver object and updates the # DB. hs.setup() - store = hs.get_datastore() - async def run_background_updates(): - await store.db_pool.updates.run_background_updates(sleep=False) - # Stop the reactor to exit the script once every background update is run. - reactor.stop() + if args.run_background_updates: + run_background_updates(hs) - def run(): - # Apply all background updates on the database. - defer.ensureDeferred( - run_as_background_process("background_updates", run_background_updates) - ) - reactor.callWhenRunning(run) - - reactor.run() +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index c47856351081..2c6fb9aacb45 100755 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ # limitations under the License. import glob import os +from typing import Any, Dict from setuptools import Command, find_packages, setup @@ -49,8 +50,6 @@ # [1]: http://tox.readthedocs.io/en/2.5.0/example/basic.html#integration-with-setup-py-test-command # [2]: https://pypi.python.org/pypi/setuptools_trial class TestCommand(Command): - user_options = [] - def initialize_options(self): pass @@ -75,7 +74,7 @@ def read_file(path_segments): def exec_file(path_segments): """Execute a single python file to get the variables defined in it""" - result = {} + result: Dict[str, Any] = {} code = read_file(path_segments) exec(code, result) return result @@ -103,23 +102,43 @@ def exec_file(path_segments): "flake8", ] -CONDITIONAL_REQUIREMENTS["dev"] = CONDITIONAL_REQUIREMENTS["lint"] + [ - # The following are used by the release script - "click==7.1.2", - "redbaron==0.9.2", - "GitPython==3.1.14", - "commonmark==0.9.1", - "pygithub==1.55", +CONDITIONAL_REQUIREMENTS["mypy"] = [ + "mypy==0.910", + "mypy-zope==0.3.2", + "types-bleach>=4.1.0", + "types-jsonschema>=3.2.0", + "types-Pillow>=8.3.4", + "types-pyOpenSSL>=20.0.7", + "types-PyYAML>=5.4.10", + "types-requests>=2.26.0", + "types-setuptools>=57.4.0", ] -CONDITIONAL_REQUIREMENTS["mypy"] = ["mypy==0.812", "mypy-zope==0.2.13"] - # Dependencies which are exclusively required by unit test code. This is # NOT a list of all modules that are necessary to run the unit tests. # Tests assume that all optional dependencies are installed. # # parameterized_class decorator was introduced in parameterized 0.7.0 -CONDITIONAL_REQUIREMENTS["test"] = ["parameterized>=0.7.0"] +# +# We use `mock` library as that backports `AsyncMock` to Python 3.6 +CONDITIONAL_REQUIREMENTS["test"] = ["parameterized>=0.7.0", "mock>=4.0.0"] + +CONDITIONAL_REQUIREMENTS["dev"] = ( + CONDITIONAL_REQUIREMENTS["lint"] + + CONDITIONAL_REQUIREMENTS["mypy"] + + CONDITIONAL_REQUIREMENTS["test"] + + [ + # The following are used by the release script + "click==7.1.2", + "redbaron==0.9.2", + "GitPython==3.1.14", + "commonmark==0.9.1", + "pygithub==1.55", + # The following are executed as commands by the release script. + "twine", + "towncrier", + ] +) setup( name="matrix-synapse", @@ -133,6 +152,12 @@ def exec_file(path_segments): long_description=long_description, long_description_content_type="text/x-rst", python_requires="~=3.6", + entry_points={ + "console_scripts": [ + "synapse_homeserver = synapse.app.homeserver:main", + "synapse_worker = synapse.app.generic_worker:main", + ] + }, classifiers=[ "Development Status :: 5 - Production/Stable", "Topic :: Communications :: Chat", diff --git a/stubs/sortedcontainers/__init__.pyi b/stubs/sortedcontainers/__init__.pyi index fa307483febe..0602a4fa9045 100644 --- a/stubs/sortedcontainers/__init__.pyi +++ b/stubs/sortedcontainers/__init__.pyi @@ -1,5 +1,6 @@ from .sorteddict import SortedDict, SortedItemsView, SortedKeysView, SortedValuesView from .sortedlist import SortedKeyList, SortedList, SortedListWithKey +from .sortedset import SortedSet __all__ = [ "SortedDict", @@ -9,4 +10,5 @@ __all__ = [ "SortedKeyList", "SortedList", "SortedListWithKey", + "SortedSet", ] diff --git a/stubs/sortedcontainers/sortedset.pyi b/stubs/sortedcontainers/sortedset.pyi new file mode 100644 index 000000000000..f9c290838678 --- /dev/null +++ b/stubs/sortedcontainers/sortedset.pyi @@ -0,0 +1,118 @@ +# stub for SortedSet. This is a lightly edited copy of +# https://github.com/grantjenks/python-sortedcontainers/blob/d0a225d7fd0fb4c54532b8798af3cbeebf97e2d5/sortedcontainers/sortedset.pyi +# (from https://github.com/grantjenks/python-sortedcontainers/pull/107) + +from typing import ( + AbstractSet, + Any, + Callable, + Generic, + Hashable, + Iterable, + Iterator, + List, + MutableSet, + Optional, + Sequence, + Set, + Tuple, + Type, + TypeVar, + Union, + overload, +) + +# --- Global + +_T = TypeVar("_T", bound=Hashable) +_S = TypeVar("_S", bound=Hashable) +_SS = TypeVar("_SS", bound=SortedSet) +_Key = Callable[[_T], Any] + +class SortedSet(MutableSet[_T], Sequence[_T]): + def __init__( + self, + iterable: Optional[Iterable[_T]] = ..., + key: Optional[_Key[_T]] = ..., + ) -> None: ... + @classmethod + def _fromset( + cls, values: Set[_T], key: Optional[_Key[_T]] = ... + ) -> SortedSet[_T]: ... + @property + def key(self) -> Optional[_Key[_T]]: ... + def __contains__(self, value: Any) -> bool: ... + @overload + def __getitem__(self, index: int) -> _T: ... + @overload + def __getitem__(self, index: slice) -> List[_T]: ... + def __delitem__(self, index: Union[int, slice]) -> None: ... + def __eq__(self, other: Any) -> bool: ... + def __ne__(self, other: Any) -> bool: ... + def __lt__(self, other: Iterable[_T]) -> bool: ... + def __gt__(self, other: Iterable[_T]) -> bool: ... + def __le__(self, other: Iterable[_T]) -> bool: ... + def __ge__(self, other: Iterable[_T]) -> bool: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[_T]: ... + def __reversed__(self) -> Iterator[_T]: ... + def add(self, value: _T) -> None: ... + def _add(self, value: _T) -> None: ... + def clear(self) -> None: ... + def copy(self: _SS) -> _SS: ... + def __copy__(self: _SS) -> _SS: ... + def count(self, value: _T) -> int: ... + def discard(self, value: _T) -> None: ... + def _discard(self, value: _T) -> None: ... + def pop(self, index: int = ...) -> _T: ... + def remove(self, value: _T) -> None: ... + def difference(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def __sub__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def difference_update( + self, *iterables: Iterable[_S] + ) -> SortedSet[Union[_T, _S]]: ... + def __isub__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def intersection(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def __and__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def __rand__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def intersection_update( + self, *iterables: Iterable[_S] + ) -> SortedSet[Union[_T, _S]]: ... + def __iand__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def symmetric_difference(self, other: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def __xor__(self, other: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def __rxor__(self, other: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def symmetric_difference_update( + self, other: Iterable[_S] + ) -> SortedSet[Union[_T, _S]]: ... + def __ixor__(self, other: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def union(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def __or__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def __ror__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def update(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def __ior__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def _update(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ... + def __reduce__( + self, + ) -> Tuple[Type[SortedSet[_T]], Set[_T], Callable[[_T], Any]]: ... + def __repr__(self) -> str: ... + def _check(self) -> None: ... + def bisect_left(self, value: _T) -> int: ... + def bisect_right(self, value: _T) -> int: ... + def islice( + self, + start: Optional[int] = ..., + stop: Optional[int] = ..., + reverse=bool, + ) -> Iterator[_T]: ... + def irange( + self, + minimum: Optional[_T] = ..., + maximum: Optional[_T] = ..., + inclusive: Tuple[bool, bool] = ..., + reverse: bool = ..., + ) -> Iterator[_T]: ... + def index( + self, value: _T, start: Optional[int] = ..., stop: Optional[int] = ... + ) -> int: ... + def _reset(self, load: int) -> None: ... diff --git a/stubs/txredisapi.pyi b/stubs/txredisapi.pyi index c1a06ae022f6..4ff3c6de5feb 100644 --- a/stubs/txredisapi.pyi +++ b/stubs/txredisapi.pyi @@ -73,4 +73,4 @@ class RedisFactory(protocol.ReconnectingClientFactory): def buildProtocol(self, addr) -> RedisProtocol: ... class SubscriberFactory(RedisFactory): - def __init__(self): ... + def __init__(self) -> None: ... diff --git a/synapse/__init__.py b/synapse/__init__.py index 919293cd80c5..95a49c20befc 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -47,7 +47,7 @@ except ImportError: pass -__version__ = "1.40.0" +__version__ = "1.49.2" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when diff --git a/synapse/_scripts/register_new_matrix_user.py b/synapse/_scripts/register_new_matrix_user.py index dae986c78869..4ffe6a1ef3cd 100644 --- a/synapse/_scripts/register_new_matrix_user.py +++ b/synapse/_scripts/register_new_matrix_user.py @@ -1,5 +1,6 @@ # Copyright 2015, 2016 OpenMarket Ltd # Copyright 2018 New Vector +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,22 +20,23 @@ import hmac import logging import sys +from typing import Callable, Optional import requests as _requests import yaml def request_registration( - user, - password, - server_location, - shared_secret, - admin=False, - user_type=None, + user: str, + password: str, + server_location: str, + shared_secret: str, + admin: bool = False, + user_type: Optional[str] = None, requests=_requests, - _print=print, - exit=sys.exit, -): + _print: Callable[[str], None] = print, + exit: Callable[[int], None] = sys.exit, +) -> None: url = "%s/_synapse/admin/v1/register" % (server_location.rstrip("/"),) @@ -65,13 +67,13 @@ def request_registration( mac.update(b"\x00") mac.update(user_type.encode("utf8")) - mac = mac.hexdigest() + hex_mac = mac.hexdigest() data = { "nonce": nonce, "username": user, "password": password, - "mac": mac, + "mac": hex_mac, "admin": admin, "user_type": user_type, } @@ -91,10 +93,17 @@ def request_registration( _print("Success!") -def register_new_user(user, password, server_location, shared_secret, admin, user_type): +def register_new_user( + user: str, + password: str, + server_location: str, + shared_secret: str, + admin: Optional[bool], + user_type: Optional[str], +) -> None: if not user: try: - default_user = getpass.getuser() + default_user: Optional[str] = getpass.getuser() except Exception: default_user = None @@ -123,8 +132,8 @@ def register_new_user(user, password, server_location, shared_secret, admin, use sys.exit(1) if admin is None: - admin = input("Make admin [no]: ") - if admin in ("y", "yes", "true"): + admin_inp = input("Make admin [no]: ") + if admin_inp in ("y", "yes", "true"): admin = True else: admin = False @@ -134,7 +143,7 @@ def register_new_user(user, password, server_location, shared_secret, admin, use ) -def main(): +def main() -> None: logging.captureWarnings(True) diff --git a/synapse/_scripts/review_recent_signups.py b/synapse/_scripts/review_recent_signups.py index 9de913db889c..093af4327ae0 100644 --- a/synapse/_scripts/review_recent_signups.py +++ b/synapse/_scripts/review_recent_signups.py @@ -20,7 +20,12 @@ import attr -from synapse.config._base import RootConfig, find_config_files, read_config_files +from synapse.config._base import ( + Config, + RootConfig, + find_config_files, + read_config_files, +) from synapse.config.database import DatabaseConfig from synapse.storage.database import DatabasePool, LoggingTransaction, make_conn from synapse.storage.engines import create_engine @@ -87,7 +92,7 @@ def get_recent_users(txn: LoggingTransaction, since_ms: int) -> List[UserInfo]: return user_infos -def main(): +def main() -> None: parser = argparse.ArgumentParser() parser.add_argument( "-c", @@ -126,7 +131,7 @@ def main(): config_dict, ) - since_ms = time.time() * 1000 - config.parse_duration(config_args.since) + since_ms = time.time() * 1000 - Config.parse_duration(config_args.since) exclude_users_with_email = config_args.exclude_emails include_context = not config_args.only_users @@ -137,7 +142,8 @@ def main(): engine = create_engine(database_config.config) with make_conn(database_config, engine, "review_recent_signups") as db_conn: - user_infos = get_recent_users(db_conn.cursor(), since_ms) + # This generates a type of Cursor, not LoggingTransaction. + user_infos = get_recent_users(db_conn.cursor(), since_ms) # type: ignore[arg-type] for user_info in user_infos: if exclude_users_with_email and user_info.emails: diff --git a/synapse/api/auth.py b/synapse/api/auth.py index d6479b12a69d..415e4eaa9b7a 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -70,8 +70,8 @@ def __init__(self, hs: "HomeServer"): self._auth_blocking = AuthBlocking(self.hs) - self._track_appservice_user_ips = hs.config.track_appservice_user_ips - self._macaroon_secret_key = hs.config.macaroon_secret_key + self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips + self._macaroon_secret_key = hs.config.key.macaroon_secret_key self._force_tracing_for_users = hs.config.tracing.force_tracing_for_users async def check_user_in_room( @@ -261,7 +261,7 @@ async def get_user_by_req( async def validate_appservice_can_control_user_id( self, app_service: ApplicationService, user_id: str - ): + ) -> None: """Validates that the app service is allowed to control the given user. @@ -649,5 +649,13 @@ async def check_user_in_room_or_world_readable( % (user_id, room_id), ) - async def check_auth_blocking(self, *args, **kwargs) -> None: - await self._auth_blocking.check_auth_blocking(*args, **kwargs) + async def check_auth_blocking( + self, + user_id: Optional[str] = None, + threepid: Optional[dict] = None, + user_type: Optional[str] = None, + requester: Optional[Requester] = None, + ) -> None: + await self._auth_blocking.check_auth_blocking( + user_id=user_id, threepid=threepid, user_type=user_type, requester=requester + ) diff --git a/synapse/api/auth_blocking.py b/synapse/api/auth_blocking.py index e6bced93d5fa..08fe160c98ec 100644 --- a/synapse/api/auth_blocking.py +++ b/synapse/api/auth_blocking.py @@ -30,13 +30,15 @@ class AuthBlocking: def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() - self._server_notices_mxid = hs.config.server_notices_mxid - self._hs_disabled = hs.config.hs_disabled - self._hs_disabled_message = hs.config.hs_disabled_message - self._admin_contact = hs.config.admin_contact - self._max_mau_value = hs.config.max_mau_value - self._limit_usage_by_mau = hs.config.limit_usage_by_mau - self._mau_limits_reserved_threepids = hs.config.mau_limits_reserved_threepids + self._server_notices_mxid = hs.config.servernotices.server_notices_mxid + self._hs_disabled = hs.config.server.hs_disabled + self._hs_disabled_message = hs.config.server.hs_disabled_message + self._admin_contact = hs.config.server.admin_contact + self._max_mau_value = hs.config.server.max_mau_value + self._limit_usage_by_mau = hs.config.server.limit_usage_by_mau + self._mau_limits_reserved_threepids = ( + hs.config.server.mau_limits_reserved_threepids + ) self._server_name = hs.hostname self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips @@ -79,7 +81,7 @@ async def check_auth_blocking( # We never block the server from doing actions on behalf of # users. return - elif requester.app_service and not self._track_appservice_user_ips: + if requester.app_service and not self._track_appservice_user_ips: # If we're authenticated as an appservice then we only block # auth if `track_appservice_user_ips` is set, as that option # implicitly means that application services are part of MAU diff --git a/synapse/api/constants.py b/synapse/api/constants.py index b42d6dbc9bb5..c34270ef58a2 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -17,6 +17,8 @@ """Contains constants from the specification.""" +from typing_extensions import Final + # the max size of a (canonical-json-encoded) event MAX_PDU_SIZE = 65536 @@ -39,124 +41,125 @@ class Membership: """Represents the membership states of a user in a room.""" - INVITE = "invite" - JOIN = "join" - KNOCK = "knock" - LEAVE = "leave" - BAN = "ban" - LIST = (INVITE, JOIN, KNOCK, LEAVE, BAN) + INVITE: Final = "invite" + JOIN: Final = "join" + KNOCK: Final = "knock" + LEAVE: Final = "leave" + BAN: Final = "ban" + LIST: Final = (INVITE, JOIN, KNOCK, LEAVE, BAN) class PresenceState: """Represents the presence state of a user.""" - OFFLINE = "offline" - UNAVAILABLE = "unavailable" - ONLINE = "online" - BUSY = "org.matrix.msc3026.busy" + OFFLINE: Final = "offline" + UNAVAILABLE: Final = "unavailable" + ONLINE: Final = "online" + BUSY: Final = "org.matrix.msc3026.busy" class JoinRules: - PUBLIC = "public" - KNOCK = "knock" - INVITE = "invite" - PRIVATE = "private" + PUBLIC: Final = "public" + KNOCK: Final = "knock" + INVITE: Final = "invite" + PRIVATE: Final = "private" # As defined for MSC3083. - RESTRICTED = "restricted" + RESTRICTED: Final = "restricted" class RestrictedJoinRuleTypes: """Understood types for the allow rules in restricted join rules.""" - ROOM_MEMBERSHIP = "m.room_membership" + ROOM_MEMBERSHIP: Final = "m.room_membership" class LoginType: - PASSWORD = "m.login.password" - EMAIL_IDENTITY = "m.login.email.identity" - MSISDN = "m.login.msisdn" - RECAPTCHA = "m.login.recaptcha" - TERMS = "m.login.terms" - SSO = "m.login.sso" - DUMMY = "m.login.dummy" + PASSWORD: Final = "m.login.password" + EMAIL_IDENTITY: Final = "m.login.email.identity" + MSISDN: Final = "m.login.msisdn" + RECAPTCHA: Final = "m.login.recaptcha" + TERMS: Final = "m.login.terms" + SSO: Final = "m.login.sso" + DUMMY: Final = "m.login.dummy" + REGISTRATION_TOKEN: Final = "org.matrix.msc3231.login.registration_token" # This is used in the `type` parameter for /register when called by # an appservice to register a new user. -APP_SERVICE_REGISTRATION_TYPE = "m.login.application_service" +APP_SERVICE_REGISTRATION_TYPE: Final = "m.login.application_service" class EventTypes: - Member = "m.room.member" - Create = "m.room.create" - Tombstone = "m.room.tombstone" - JoinRules = "m.room.join_rules" - PowerLevels = "m.room.power_levels" - Aliases = "m.room.aliases" - Redaction = "m.room.redaction" - ThirdPartyInvite = "m.room.third_party_invite" - RelatedGroups = "m.room.related_groups" - - RoomHistoryVisibility = "m.room.history_visibility" - CanonicalAlias = "m.room.canonical_alias" - Encrypted = "m.room.encrypted" - RoomAvatar = "m.room.avatar" - RoomEncryption = "m.room.encryption" - GuestAccess = "m.room.guest_access" + Member: Final = "m.room.member" + Create: Final = "m.room.create" + Tombstone: Final = "m.room.tombstone" + JoinRules: Final = "m.room.join_rules" + PowerLevels: Final = "m.room.power_levels" + Aliases: Final = "m.room.aliases" + Redaction: Final = "m.room.redaction" + ThirdPartyInvite: Final = "m.room.third_party_invite" + RelatedGroups: Final = "m.room.related_groups" + + RoomHistoryVisibility: Final = "m.room.history_visibility" + CanonicalAlias: Final = "m.room.canonical_alias" + Encrypted: Final = "m.room.encrypted" + RoomAvatar: Final = "m.room.avatar" + RoomEncryption: Final = "m.room.encryption" + GuestAccess: Final = "m.room.guest_access" # These are used for validation - Message = "m.room.message" - Topic = "m.room.topic" - Name = "m.room.name" + Message: Final = "m.room.message" + Topic: Final = "m.room.topic" + Name: Final = "m.room.name" - ServerACL = "m.room.server_acl" - Pinned = "m.room.pinned_events" + ServerACL: Final = "m.room.server_acl" + Pinned: Final = "m.room.pinned_events" - Retention = "m.room.retention" + Retention: Final = "m.room.retention" - Dummy = "org.matrix.dummy_event" + Dummy: Final = "org.matrix.dummy_event" - SpaceChild = "m.space.child" - SpaceParent = "m.space.parent" + SpaceChild: Final = "m.space.child" + SpaceParent: Final = "m.space.parent" - MSC2716_INSERTION = "org.matrix.msc2716.insertion" - MSC2716_CHUNK = "org.matrix.msc2716.chunk" - MSC2716_MARKER = "org.matrix.msc2716.marker" + MSC2716_INSERTION: Final = "org.matrix.msc2716.insertion" + MSC2716_BATCH: Final = "org.matrix.msc2716.batch" + MSC2716_MARKER: Final = "org.matrix.msc2716.marker" class ToDeviceEventTypes: - RoomKeyRequest = "m.room_key_request" + RoomKeyRequest: Final = "m.room_key_request" class DeviceKeyAlgorithms: """Spec'd algorithms for the generation of per-device keys""" - ED25519 = "ed25519" - CURVE25519 = "curve25519" - SIGNED_CURVE25519 = "signed_curve25519" + ED25519: Final = "ed25519" + CURVE25519: Final = "curve25519" + SIGNED_CURVE25519: Final = "signed_curve25519" class EduTypes: - Presence = "m.presence" + Presence: Final = "m.presence" class RejectedReason: - AUTH_ERROR = "auth_error" + AUTH_ERROR: Final = "auth_error" class RoomCreationPreset: - PRIVATE_CHAT = "private_chat" - PUBLIC_CHAT = "public_chat" - TRUSTED_PRIVATE_CHAT = "trusted_private_chat" + PRIVATE_CHAT: Final = "private_chat" + PUBLIC_CHAT: Final = "public_chat" + TRUSTED_PRIVATE_CHAT: Final = "trusted_private_chat" class ThirdPartyEntityKind: - USER = "user" - LOCATION = "location" + USER: Final = "user" + LOCATION: Final = "location" -ServerNoticeMsgType = "m.server_notice" -ServerNoticeLimitReached = "m.server_notice.usage_limit_reached" +ServerNoticeMsgType: Final = "m.server_notice" +ServerNoticeLimitReached: Final = "m.server_notice.usage_limit_reached" class UserTypes: @@ -164,74 +167,93 @@ class UserTypes: 'admin' and 'guest' users should also be UserTypes. Normal users are type None """ - SUPPORT = "support" - BOT = "bot" - FREE = "free" - LIMITED = "limited" - ALL_USER_TYPES = (SUPPORT, BOT, FREE, LIMITED) + SUPPORT: Final = "support" + BOT: Final = "bot" + FREE: Final = "free" + LIMITED: Final = "limited" + ALL_USER_TYPES: Final = (SUPPORT, BOT, FREE, LIMITED) class RelationTypes: """The types of relations known to this server.""" - ANNOTATION = "m.annotation" - REPLACE = "m.replace" - REFERENCE = "m.reference" + ANNOTATION: Final = "m.annotation" + REPLACE: Final = "m.replace" + REFERENCE: Final = "m.reference" + THREAD: Final = "io.element.thread" class LimitBlockingTypes: """Reasons that a server may be blocked""" - MONTHLY_ACTIVE_USER = "monthly_active_user" - HS_DISABLED = "hs_disabled" + MONTHLY_ACTIVE_USER: Final = "monthly_active_user" + HS_DISABLED: Final = "hs_disabled" class EventContentFields: """Fields found in events' content, regardless of type.""" # Labels for the event, cf https://github.com/matrix-org/matrix-doc/pull/2326 - LABELS = "org.matrix.labels" + LABELS: Final = "org.matrix.labels" # Timestamp to delete the event after # cf https://github.com/matrix-org/matrix-doc/pull/2228 - SELF_DESTRUCT_AFTER = "org.matrix.self_destruct_after" + SELF_DESTRUCT_AFTER: Final = "org.matrix.self_destruct_after" # cf https://github.com/matrix-org/matrix-doc/pull/1772 - ROOM_TYPE = "type" + ROOM_TYPE: Final = "type" + + # Whether a room can federate. + FEDERATE: Final = "m.federate" + + # The creator of the room, as used in `m.room.create` events. + ROOM_CREATOR: Final = "creator" + + # Used in m.room.guest_access events. + GUEST_ACCESS: Final = "guest_access" # Used on normal messages to indicate they were historically imported after the fact - MSC2716_HISTORICAL = "org.matrix.msc2716.historical" - # For "insertion" events to indicate what the next chunk ID should be in + MSC2716_HISTORICAL: Final = "org.matrix.msc2716.historical" + # For "insertion" events to indicate what the next batch ID should be in # order to connect to it - MSC2716_NEXT_CHUNK_ID = "org.matrix.msc2716.next_chunk_id" - # Used on "chunk" events to indicate which insertion event it connects to - MSC2716_CHUNK_ID = "org.matrix.msc2716.chunk_id" + MSC2716_NEXT_BATCH_ID: Final = "org.matrix.msc2716.next_batch_id" + # Used on "batch" events to indicate which insertion event it connects to + MSC2716_BATCH_ID: Final = "org.matrix.msc2716.batch_id" # For "marker" events - MSC2716_MARKER_INSERTION = "org.matrix.msc2716.marker.insertion" + MSC2716_MARKER_INSERTION: Final = "org.matrix.msc2716.marker.insertion" + + # The authorising user for joining a restricted room. + AUTHORISING_USER: Final = "join_authorised_via_users_server" class RoomTypes: """Understood values of the room_type field of m.room.create events.""" - SPACE = "m.space" + SPACE: Final = "m.space" class RoomEncryptionAlgorithms: - MEGOLM_V1_AES_SHA2 = "m.megolm.v1.aes-sha2" - DEFAULT = MEGOLM_V1_AES_SHA2 + MEGOLM_V1_AES_SHA2: Final = "m.megolm.v1.aes-sha2" + DEFAULT: Final = MEGOLM_V1_AES_SHA2 class AccountDataTypes: - DIRECT = "m.direct" - IGNORED_USER_LIST = "m.ignored_user_list" + DIRECT: Final = "m.direct" + IGNORED_USER_LIST: Final = "m.ignored_user_list" class HistoryVisibility: - INVITED = "invited" - JOINED = "joined" - SHARED = "shared" - WORLD_READABLE = "world_readable" + INVITED: Final = "invited" + JOINED: Final = "joined" + SHARED: Final = "shared" + WORLD_READABLE: Final = "world_readable" + + +class GuestAccess: + CAN_JOIN: Final = "can_join" + # anything that is not "can_join" is considered "forbidden", but for completeness: + FORBIDDEN: Final = "forbidden" class ReadReceiptEventFields: - MSC2285_HIDDEN = "org.matrix.msc2285.hidden" + MSC2285_HIDDEN: Final = "org.matrix.msc2285.hidden" diff --git a/synapse/api/errors.py b/synapse/api/errors.py index dc662bca8353..85302163dad9 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -18,7 +18,7 @@ import logging import typing from http import HTTPStatus -from typing import Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union from twisted.web import http @@ -143,10 +143,18 @@ def __init__(self, code: int, msg: str, errcode: str = Codes.UNKNOWN): super().__init__(code, msg) self.errcode = errcode - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error(self.msg, self.errcode) +class InvalidAPICallError(SynapseError): + """You called an existing API endpoint, but fed that endpoint + invalid or incomplete data.""" + + def __init__(self, msg: str): + super().__init__(HTTPStatus.BAD_REQUEST, msg, Codes.BAD_JSON) + + class ProxiedRequestError(SynapseError): """An error from a general matrix endpoint, eg. from a proxied Matrix API call. @@ -167,7 +175,7 @@ def __init__( else: self._additional_fields = dict(additional_fields) - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error(self.msg, self.errcode, **self._additional_fields) @@ -188,7 +196,7 @@ def __init__(self, msg: str, consent_uri: str): ) self._consent_uri = consent_uri - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error(self.msg, self.errcode, consent_uri=self._consent_uri) @@ -254,14 +262,10 @@ def __init__(self, session_id: str, result: "JsonDict"): class UnrecognizedRequestError(SynapseError): """An error indicating we don't understand the request you're trying to make""" - def __init__(self, *args, **kwargs): - if "errcode" not in kwargs: - kwargs["errcode"] = Codes.UNRECOGNIZED - if len(args) == 0: - message = "Unrecognized request" - else: - message = args[0] - super().__init__(400, message, **kwargs) + def __init__( + self, msg: str = "Unrecognized request", errcode: str = Codes.UNRECOGNIZED + ): + super().__init__(400, msg, errcode) class NotFoundError(SynapseError): @@ -276,10 +280,8 @@ class AuthError(SynapseError): other poorly-defined times. """ - def __init__(self, *args, **kwargs): - if "errcode" not in kwargs: - kwargs["errcode"] = Codes.FORBIDDEN - super().__init__(*args, **kwargs) + def __init__(self, code: int, msg: str, errcode: str = Codes.FORBIDDEN): + super().__init__(code, msg, errcode) class InvalidClientCredentialsError(SynapseError): @@ -313,7 +315,7 @@ def __init__( super().__init__(msg=msg, errcode="M_UNKNOWN_TOKEN") self._soft_logout = soft_logout - def error_dict(self): + def error_dict(self) -> "JsonDict": d = super().error_dict() d["soft_logout"] = self._soft_logout return d @@ -337,7 +339,7 @@ def __init__( self.limit_type = limit_type super().__init__(code, msg, errcode=errcode) - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error( self.msg, self.errcode, @@ -349,32 +351,17 @@ def error_dict(self): class EventSizeError(SynapseError): """An error raised when an event is too big.""" - def __init__(self, *args, **kwargs): - if "errcode" not in kwargs: - kwargs["errcode"] = Codes.TOO_LARGE - super().__init__(413, *args, **kwargs) - - -class EventStreamError(SynapseError): - """An error raised when there a problem with the event stream.""" - - def __init__(self, *args, **kwargs): - if "errcode" not in kwargs: - kwargs["errcode"] = Codes.BAD_PAGINATION - super().__init__(*args, **kwargs) + def __init__(self, msg: str): + super().__init__(413, msg, Codes.TOO_LARGE) class LoginError(SynapseError): """An error raised when there was a problem logging in.""" - pass - class StoreError(SynapseError): """An error raised when there was a problem storing some data.""" - pass - class InvalidCaptchaError(SynapseError): def __init__( @@ -387,7 +374,7 @@ def __init__( super().__init__(code, msg, errcode) self.error_url = error_url - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error(self.msg, self.errcode, error_url=self.error_url) @@ -404,7 +391,7 @@ def __init__( super().__init__(code, msg, errcode) self.retry_after_ms = retry_after_ms - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error(self.msg, self.errcode, retry_after_ms=self.retry_after_ms) @@ -435,10 +422,8 @@ def __init__(self, msg: str = "Homeserver does not support this room version"): class ThreepidValidationError(SynapseError): """An error raised when there was a problem authorising an event.""" - def __init__(self, *args, **kwargs): - if "errcode" not in kwargs: - kwargs["errcode"] = Codes.FORBIDDEN - super().__init__(*args, **kwargs) + def __init__(self, msg: str, errcode: str = Codes.FORBIDDEN): + super().__init__(400, msg, errcode) class IncompatibleRoomVersionError(SynapseError): @@ -458,7 +443,7 @@ def __init__(self, room_version: str): self._room_version = room_version - def error_dict(self): + def error_dict(self) -> "JsonDict": return cs_error(self.msg, self.errcode, room_version=self._room_version) @@ -486,7 +471,7 @@ class RequestSendFailed(RuntimeError): errors (like programming errors). """ - def __init__(self, inner_exception, can_retry): + def __init__(self, inner_exception: BaseException, can_retry: bool): super().__init__( "Failed to send request: %s: %s" % (type(inner_exception).__name__, inner_exception) @@ -495,7 +480,7 @@ def __init__(self, inner_exception, can_retry): self.can_retry = can_retry -def cs_error(msg: str, code: str = Codes.UNKNOWN, **kwargs): +def cs_error(msg: str, code: str = Codes.UNKNOWN, **kwargs: Any) -> "JsonDict": """Utility method for constructing an error response for client-server interactions. @@ -543,7 +528,7 @@ def __init__( msg = "%s %s: %s" % (level, code, reason) super().__init__(msg) - def get_dict(self): + def get_dict(self) -> "JsonDict": return { "level": self.level, "code": self.code, @@ -572,7 +557,7 @@ def __init__(self, code: int, msg: str, response: bytes): super().__init__(code, msg) self.response = response - def to_synapse_error(self): + def to_synapse_error(self) -> SynapseError: """Make a SynapseError based on an HTTPResponseException This is useful when a proxied request has failed, and we need to @@ -611,3 +596,10 @@ class ShadowBanError(Exception): This should be caught and a proper "fake" success response sent to the user. """ + + +class ModuleFailedException(Exception): + """ + Raised when a module API callback fails, for example because it raised an + exception. + """ diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index ad1ff6a9df6c..13dd6ce248e1 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -1,7 +1,7 @@ # Copyright 2015, 2016 OpenMarket Ltd # Copyright 2017 Vector Creations Ltd # Copyright 2018-2019 New Vector Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2019-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,7 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. import json -from typing import List +from typing import ( + TYPE_CHECKING, + Awaitable, + Callable, + Dict, + Iterable, + List, + Optional, + Set, + TypeVar, + Union, +) import jsonschema from jsonschema import FormatChecker @@ -23,7 +34,11 @@ from synapse.api.constants import EventContentFields from synapse.api.errors import SynapseError from synapse.api.presence import UserPresenceState -from synapse.types import RoomID, UserID +from synapse.events import EventBase +from synapse.types import JsonDict, RoomID, UserID + +if TYPE_CHECKING: + from synapse.server import HomeServer FILTER_SCHEMA = { "additionalProperties": False, @@ -71,6 +86,9 @@ # cf https://github.com/matrix-org/matrix-doc/pull/2326 "org.matrix.labels": {"type": "array", "items": {"type": "string"}}, "org.matrix.not_labels": {"type": "array", "items": {"type": "string"}}, + # MSC3440, filtering by event relations. + "io.element.relation_senders": {"type": "array", "items": {"type": "string"}}, + "io.element.relation_types": {"type": "array", "items": {"type": "string"}}, }, } @@ -120,25 +138,31 @@ @FormatChecker.cls_checks("matrix_room_id") -def matrix_room_id_validator(room_id_str): +def matrix_room_id_validator(room_id_str: str) -> RoomID: return RoomID.from_string(room_id_str) @FormatChecker.cls_checks("matrix_user_id") -def matrix_user_id_validator(user_id_str): +def matrix_user_id_validator(user_id_str: str) -> UserID: return UserID.from_string(user_id_str) class Filtering: - def __init__(self, hs): - super().__init__() + def __init__(self, hs: "HomeServer"): + self._hs = hs self.store = hs.get_datastore() - async def get_user_filter(self, user_localpart, filter_id): + self.DEFAULT_FILTER_COLLECTION = FilterCollection(hs, {}) + + async def get_user_filter( + self, user_localpart: str, filter_id: Union[int, str] + ) -> "FilterCollection": result = await self.store.get_user_filter(user_localpart, filter_id) - return FilterCollection(result) + return FilterCollection(self._hs, result) - def add_user_filter(self, user_localpart, user_filter): + def add_user_filter( + self, user_localpart: str, user_filter: JsonDict + ) -> Awaitable[int]: self.check_valid_filter(user_filter) return self.store.add_user_filter(user_localpart, user_filter) @@ -146,13 +170,13 @@ def add_user_filter(self, user_localpart, user_filter): # replace_user_filter at some point? There's no REST API specified for # them however - def check_valid_filter(self, user_filter_json): + def check_valid_filter(self, user_filter_json: JsonDict) -> None: """Check if the provided filter is valid. This inspects all definitions contained within the filter. Args: - user_filter_json(dict): The filter + user_filter_json: The filter Raises: SynapseError: If the filter is not valid. """ @@ -167,80 +191,99 @@ def check_valid_filter(self, user_filter_json): raise SynapseError(400, str(e)) +# Filters work across events, presence EDUs, and account data. +FilterEvent = TypeVar("FilterEvent", EventBase, UserPresenceState, JsonDict) + + class FilterCollection: - def __init__(self, filter_json): + def __init__(self, hs: "HomeServer", filter_json: JsonDict): self._filter_json = filter_json room_filter_json = self._filter_json.get("room", {}) self._room_filter = Filter( - {k: v for k, v in room_filter_json.items() if k in ("rooms", "not_rooms")} + hs, + {k: v for k, v in room_filter_json.items() if k in ("rooms", "not_rooms")}, ) - self._room_timeline_filter = Filter(room_filter_json.get("timeline", {})) - self._room_state_filter = Filter(room_filter_json.get("state", {})) - self._room_ephemeral_filter = Filter(room_filter_json.get("ephemeral", {})) - self._room_account_data = Filter(room_filter_json.get("account_data", {})) - self._presence_filter = Filter(filter_json.get("presence", {})) - self._account_data = Filter(filter_json.get("account_data", {})) + self._room_timeline_filter = Filter(hs, room_filter_json.get("timeline", {})) + self._room_state_filter = Filter(hs, room_filter_json.get("state", {})) + self._room_ephemeral_filter = Filter(hs, room_filter_json.get("ephemeral", {})) + self._room_account_data = Filter(hs, room_filter_json.get("account_data", {})) + self._presence_filter = Filter(hs, filter_json.get("presence", {})) + self._account_data = Filter(hs, filter_json.get("account_data", {})) self.include_leave = filter_json.get("room", {}).get("include_leave", False) self.event_fields = filter_json.get("event_fields", []) self.event_format = filter_json.get("event_format", "client") - def __repr__(self): + def __repr__(self) -> str: return "" % (json.dumps(self._filter_json),) - def get_filter_json(self): + def get_filter_json(self) -> JsonDict: return self._filter_json - def timeline_limit(self): - return self._room_timeline_filter.limit() + def timeline_limit(self) -> int: + return self._room_timeline_filter.limit - def presence_limit(self): - return self._presence_filter.limit() + def presence_limit(self) -> int: + return self._presence_filter.limit - def ephemeral_limit(self): - return self._room_ephemeral_filter.limit() + def ephemeral_limit(self) -> int: + return self._room_ephemeral_filter.limit - def lazy_load_members(self): - return self._room_state_filter.lazy_load_members() + def lazy_load_members(self) -> bool: + return self._room_state_filter.lazy_load_members - def include_redundant_members(self): - return self._room_state_filter.include_redundant_members() + def include_redundant_members(self) -> bool: + return self._room_state_filter.include_redundant_members - def filter_presence(self, events): - return self._presence_filter.filter(events) + async def filter_presence( + self, events: Iterable[UserPresenceState] + ) -> List[UserPresenceState]: + return await self._presence_filter.filter(events) - def filter_account_data(self, events): - return self._account_data.filter(events) + async def filter_account_data(self, events: Iterable[JsonDict]) -> List[JsonDict]: + return await self._account_data.filter(events) - def filter_room_state(self, events): - return self._room_state_filter.filter(self._room_filter.filter(events)) + async def filter_room_state(self, events: Iterable[EventBase]) -> List[EventBase]: + return await self._room_state_filter.filter( + await self._room_filter.filter(events) + ) - def filter_room_timeline(self, events): - return self._room_timeline_filter.filter(self._room_filter.filter(events)) + async def filter_room_timeline( + self, events: Iterable[EventBase] + ) -> List[EventBase]: + return await self._room_timeline_filter.filter( + await self._room_filter.filter(events) + ) - def filter_room_ephemeral(self, events): - return self._room_ephemeral_filter.filter(self._room_filter.filter(events)) + async def filter_room_ephemeral(self, events: Iterable[JsonDict]) -> List[JsonDict]: + return await self._room_ephemeral_filter.filter( + await self._room_filter.filter(events) + ) - def filter_room_account_data(self, events): - return self._room_account_data.filter(self._room_filter.filter(events)) + async def filter_room_account_data( + self, events: Iterable[JsonDict] + ) -> List[JsonDict]: + return await self._room_account_data.filter( + await self._room_filter.filter(events) + ) - def blocks_all_presence(self): + def blocks_all_presence(self) -> bool: return ( self._presence_filter.filters_all_types() or self._presence_filter.filters_all_senders() ) - def blocks_all_room_ephemeral(self): + def blocks_all_room_ephemeral(self) -> bool: return ( self._room_ephemeral_filter.filters_all_types() or self._room_ephemeral_filter.filters_all_senders() or self._room_ephemeral_filter.filters_all_rooms() ) - def blocks_all_room_timeline(self): + def blocks_all_room_timeline(self) -> bool: return ( self._room_timeline_filter.filters_all_types() or self._room_timeline_filter.filters_all_senders() @@ -249,153 +292,211 @@ def blocks_all_room_timeline(self): class Filter: - def __init__(self, filter_json): + def __init__(self, hs: "HomeServer", filter_json: JsonDict): + self._hs = hs + self._store = hs.get_datastore() self.filter_json = filter_json - self.types = self.filter_json.get("types", None) - self.not_types = self.filter_json.get("not_types", []) + self.limit = filter_json.get("limit", 10) + self.lazy_load_members = filter_json.get("lazy_load_members", False) + self.include_redundant_members = filter_json.get( + "include_redundant_members", False + ) + + self.types = filter_json.get("types", None) + self.not_types = filter_json.get("not_types", []) - self.rooms = self.filter_json.get("rooms", None) - self.not_rooms = self.filter_json.get("not_rooms", []) + self.rooms = filter_json.get("rooms", None) + self.not_rooms = filter_json.get("not_rooms", []) - self.senders = self.filter_json.get("senders", None) - self.not_senders = self.filter_json.get("not_senders", []) + self.senders = filter_json.get("senders", None) + self.not_senders = filter_json.get("not_senders", []) - self.contains_url = self.filter_json.get("contains_url", None) + self.contains_url = filter_json.get("contains_url", None) - self.labels = self.filter_json.get("org.matrix.labels", None) - self.not_labels = self.filter_json.get("org.matrix.not_labels", []) + self.labels = filter_json.get("org.matrix.labels", None) + self.not_labels = filter_json.get("org.matrix.not_labels", []) + + # Ideally these would be rejected at the endpoint if they were provided + # and not supported, but that would involve modifying the JSON schema + # based on the homeserver configuration. + if hs.config.experimental.msc3440_enabled: + self.relation_senders = self.filter_json.get( + "io.element.relation_senders", None + ) + self.relation_types = self.filter_json.get( + "io.element.relation_types", None + ) + else: + self.relation_senders = None + self.relation_types = None - def filters_all_types(self): + def filters_all_types(self) -> bool: return "*" in self.not_types - def filters_all_senders(self): + def filters_all_senders(self) -> bool: return "*" in self.not_senders - def filters_all_rooms(self): + def filters_all_rooms(self) -> bool: return "*" in self.not_rooms - def check(self, event): + def _check(self, event: FilterEvent) -> bool: """Checks whether the filter matches the given event. + Args: + event: The event, account data, or presence to check against this + filter. + Returns: - bool: True if the event matches + True if the event matches the filter. """ # We usually get the full "events" as dictionaries coming through, # except for presence which actually gets passed around as its own # namedtuple type. if isinstance(event, UserPresenceState): - sender = event.user_id - room_id = None - ev_type = "m.presence" - contains_url = False - labels: List[str] = [] + user_id = event.user_id + field_matchers = { + "senders": lambda v: user_id == v, + "types": lambda v: "m.presence" == v, + } + return self._check_fields(field_matchers) else: + content = event.get("content") + # Content is assumed to be a dict below, so ensure it is. This should + # always be true for events, but account_data has been allowed to + # have non-dict content. + if not isinstance(content, dict): + content = {} + sender = event.get("sender", None) if not sender: # Presence events had their 'sender' in content.user_id, but are # now handled above. We don't know if anything else uses this # form. TODO: Check this and probably remove it. - content = event.get("content") - # account_data has been allowed to have non-dict content, so - # check type first - if isinstance(content, dict): - sender = content.get("user_id") + sender = content.get("user_id") room_id = event.get("room_id", None) ev_type = event.get("type", None) - content = event.get("content", {}) # check if there is a string url field in the content for filtering purposes - contains_url = isinstance(content.get("url"), str) labels = content.get(EventContentFields.LABELS, []) - return self.check_fields(room_id, sender, ev_type, labels, contains_url) + field_matchers = { + "rooms": lambda v: room_id == v, + "senders": lambda v: sender == v, + "types": lambda v: _matches_wildcard(ev_type, v), + "labels": lambda v: v in labels, + } + + result = self._check_fields(field_matchers) + if not result: + return result + + contains_url_filter = self.contains_url + if contains_url_filter is not None: + contains_url = isinstance(content.get("url"), str) + if contains_url_filter != contains_url: + return False + + return True - def check_fields(self, room_id, sender, event_type, labels, contains_url): + def _check_fields(self, field_matchers: Dict[str, Callable[[str], bool]]) -> bool: """Checks whether the filter matches the given event fields. + Args: + field_matchers: A map of attribute name to callable to use for checking + particular fields. + + The attribute name and an inverse (not_) must + exist on the Filter. + + The callable should return true if the event's value matches the + filter's value. + Returns: - bool: True if the event fields match + True if the event fields match """ - literal_keys = { - "rooms": lambda v: room_id == v, - "senders": lambda v: sender == v, - "types": lambda v: _matches_wildcard(event_type, v), - "labels": lambda v: v in labels, - } - - for name, match_func in literal_keys.items(): + + for name, match_func in field_matchers.items(): + # If the event matches one of the disallowed values, reject it. not_name = "not_%s" % (name,) disallowed_values = getattr(self, not_name) if any(map(match_func, disallowed_values)): return False + # Other the event does not match at least one of the allowed values, + # reject it. allowed_values = getattr(self, name) if allowed_values is not None: if not any(map(match_func, allowed_values)): return False - contains_url_filter = self.filter_json.get("contains_url") - if contains_url_filter is not None: - if contains_url_filter != contains_url: - return False - + # Otherwise, accept it. return True - def filter_rooms(self, room_ids): + def filter_rooms(self, room_ids: Iterable[str]) -> Set[str]: """Apply the 'rooms' filter to a given list of rooms. Args: - room_ids (list): A list of room_ids. + room_ids: A list of room_ids. Returns: - list: A list of room_ids that match the filter + A list of room_ids that match the filter """ room_ids = set(room_ids) - disallowed_rooms = set(self.filter_json.get("not_rooms", [])) + disallowed_rooms = set(self.not_rooms) room_ids -= disallowed_rooms - allowed_rooms = self.filter_json.get("rooms", None) + allowed_rooms = self.rooms if allowed_rooms is not None: room_ids &= set(allowed_rooms) return room_ids - def filter(self, events): - return list(filter(self.check, events)) + async def _check_event_relations( + self, events: Iterable[FilterEvent] + ) -> List[FilterEvent]: + # The event IDs to check, mypy doesn't understand the ifinstance check. + event_ids = [event.event_id for event in events if isinstance(event, EventBase)] # type: ignore[attr-defined] + event_ids_to_keep = set( + await self._store.events_have_relations( + event_ids, self.relation_senders, self.relation_types + ) + ) - def limit(self): - return self.filter_json.get("limit", 10) + return [ + event + for event in events + if not isinstance(event, EventBase) or event.event_id in event_ids_to_keep + ] - def lazy_load_members(self): - return self.filter_json.get("lazy_load_members", False) + async def filter(self, events: Iterable[FilterEvent]) -> List[FilterEvent]: + result = [event for event in events if self._check(event)] - def include_redundant_members(self): - return self.filter_json.get("include_redundant_members", False) + if self.relation_senders or self.relation_types: + return await self._check_event_relations(result) - def with_room_ids(self, room_ids): + return result + + def with_room_ids(self, room_ids: Iterable[str]) -> "Filter": """Returns a new filter with the given room IDs appended. Args: - room_ids (iterable[unicode]): The room_ids to add + room_ids: The room_ids to add Returns: filter: A new filter including the given rooms and the old filter's rooms. """ - newFilter = Filter(self.filter_json) + newFilter = Filter(self._hs, self.filter_json) newFilter.rooms += room_ids return newFilter -def _matches_wildcard(actual_value, filter_value): - if filter_value.endswith("*"): +def _matches_wildcard(actual_value: Optional[str], filter_value: str) -> bool: + if filter_value.endswith("*") and isinstance(actual_value, str): type_prefix = filter_value[:-1] return actual_value.startswith(type_prefix) else: return actual_value == filter_value - - -DEFAULT_FILTER_COLLECTION = FilterCollection({}) diff --git a/synapse/api/presence.py b/synapse/api/presence.py index a3bf0348d115..b80aa83cb3d6 100644 --- a/synapse/api/presence.py +++ b/synapse/api/presence.py @@ -12,49 +12,48 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections import namedtuple +from typing import Any, Optional + +import attr from synapse.api.constants import PresenceState +from synapse.types import JsonDict -class UserPresenceState( - namedtuple( - "UserPresenceState", - ( - "user_id", - "state", - "last_active_ts", - "last_federation_update_ts", - "last_user_sync_ts", - "status_msg", - "currently_active", - ), - ) -): +@attr.s(slots=True, frozen=True, auto_attribs=True) +class UserPresenceState: """Represents the current presence state of the user. - user_id (str) - last_active (int): Time in msec that the user last interacted with server. - last_federation_update (int): Time in msec since either a) we sent a presence + user_id + last_active: Time in msec that the user last interacted with server. + last_federation_update: Time in msec since either a) we sent a presence update to other servers or b) we received a presence update, depending on if is a local user or not. - last_user_sync (int): Time in msec that the user last *completed* a sync + last_user_sync: Time in msec that the user last *completed* a sync (or event stream). - status_msg (str): User set status message. + status_msg: User set status message. """ - def as_dict(self): - return dict(self._asdict()) + user_id: str + state: str + last_active_ts: int + last_federation_update_ts: int + last_user_sync_ts: int + status_msg: Optional[str] + currently_active: bool + + def as_dict(self) -> JsonDict: + return attr.asdict(self) @staticmethod - def from_dict(d): + def from_dict(d: JsonDict) -> "UserPresenceState": return UserPresenceState(**d) - def copy_and_replace(self, **kwargs): - return self._replace(**kwargs) + def copy_and_replace(self, **kwargs: Any) -> "UserPresenceState": + return attr.evolve(self, **kwargs) @classmethod - def default(cls, user_id): + def default(cls, user_id: str) -> "UserPresenceState": """Returns a default presence state.""" return cls( user_id=user_id, diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py index 3e3d09bbd244..849c18ceda16 100644 --- a/synapse/api/ratelimiting.py +++ b/synapse/api/ratelimiting.py @@ -17,6 +17,7 @@ from typing import Hashable, Optional, Tuple from synapse.api.errors import LimitExceededError +from synapse.config.ratelimiting import RateLimitConfig from synapse.storage.databases.main import DataStore from synapse.types import Requester from synapse.util import Clock @@ -46,7 +47,7 @@ def __init__( # * How many times an action has occurred since a point in time # * The point in time # * The rate_hz of this particular entry. This can vary per request - self.actions: OrderedDict[Hashable, Tuple[float, int, float]] = OrderedDict() + self.actions: OrderedDict[Hashable, Tuple[float, float, float]] = OrderedDict() async def can_do_action( self, @@ -56,7 +57,7 @@ async def can_do_action( burst_count: Optional[int] = None, update: bool = True, n_actions: int = 1, - _time_now_s: Optional[int] = None, + _time_now_s: Optional[float] = None, ) -> Tuple[bool, float]: """Can the entity (e.g. user or IP address) perform the action? @@ -160,7 +161,7 @@ async def can_do_action( return allowed, time_allowed - def _prune_message_counts(self, time_now_s: int): + def _prune_message_counts(self, time_now_s: float) -> None: """Remove message count entries that have not exceeded their defined rate_hz limit @@ -188,8 +189,8 @@ async def ratelimit( burst_count: Optional[int] = None, update: bool = True, n_actions: int = 1, - _time_now_s: Optional[int] = None, - ): + _time_now_s: Optional[float] = None, + ) -> None: """Checks if an action can be performed. If not, raises a LimitExceededError Checks if the user has ratelimiting disabled in the database by looking @@ -233,3 +234,88 @@ async def ratelimit( raise LimitExceededError( retry_after_ms=int(1000 * (time_allowed - time_now_s)) ) + + +class RequestRatelimiter: + def __init__( + self, + store: DataStore, + clock: Clock, + rc_message: RateLimitConfig, + rc_admin_redaction: Optional[RateLimitConfig], + ): + self.store = store + self.clock = clock + + # The rate_hz and burst_count are overridden on a per-user basis + self.request_ratelimiter = Ratelimiter( + store=self.store, clock=self.clock, rate_hz=0, burst_count=0 + ) + self._rc_message = rc_message + + # Check whether ratelimiting room admin message redaction is enabled + # by the presence of rate limits in the config + if rc_admin_redaction: + self.admin_redaction_ratelimiter: Optional[Ratelimiter] = Ratelimiter( + store=self.store, + clock=self.clock, + rate_hz=rc_admin_redaction.per_second, + burst_count=rc_admin_redaction.burst_count, + ) + else: + self.admin_redaction_ratelimiter = None + + async def ratelimit( + self, + requester: Requester, + update: bool = True, + is_admin_redaction: bool = False, + ) -> None: + """Ratelimits requests. + + Args: + requester + update: Whether to record that a request is being processed. + Set to False when doing multiple checks for one request (e.g. + to check up front if we would reject the request), and set to + True for the last call for a given request. + is_admin_redaction: Whether this is a room admin/moderator + redacting an event. If so then we may apply different + ratelimits depending on config. + + Raises: + LimitExceededError if the request should be ratelimited + """ + user_id = requester.user.to_string() + + # The AS user itself is never rate limited. + app_service = self.store.get_app_service_by_user_id(user_id) + if app_service is not None: + return # do not ratelimit app service senders + + messages_per_second = self._rc_message.per_second + burst_count = self._rc_message.burst_count + + # Check if there is a per user override in the DB. + override = await self.store.get_ratelimit_for_user(user_id) + if override: + # If overridden with a null Hz then ratelimiting has been entirely + # disabled for the user + if not override.messages_per_second: + return + + messages_per_second = override.messages_per_second + burst_count = override.burst_count + + if is_admin_redaction and self.admin_redaction_ratelimiter: + # If we have separate config for admin redactions, use a separate + # ratelimiter as to not have user_ids clash + await self.admin_redaction_ratelimiter.ratelimit(requester, update=update) + else: + # Override rate and burst count per-user + await self.request_ratelimiter.ratelimit( + requester, + rate_hz=messages_per_second, + burst_count=burst_count, + update=update, + ) diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index f32a40ba4ae6..0a895bba480a 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -70,12 +70,17 @@ class RoomVersion: msc2176_redaction_rules = attr.ib(type=bool) # MSC3083: Support the 'restricted' join_rule. msc3083_join_rules = attr.ib(type=bool) + # MSC3375: Support for the proper redaction rules for MSC3083. This mustn't + # be enabled if MSC3083 is not. + msc3375_redaction_rules = attr.ib(type=bool) # MSC2403: Allows join_rules to be set to 'knock', changes auth rules to allow sending # m.room.membership event with membership 'knock'. msc2403_knocking = attr.ib(type=bool) # MSC2716: Adds m.room.power_levels -> content.historical field to control # whether "insertion", "chunk", "marker" events can be sent msc2716_historical = attr.ib(type=bool) + # MSC2716: Adds support for redacting "insertion", "chunk", and "marker" events + msc2716_redactions = attr.ib(type=bool) class RoomVersions: @@ -90,8 +95,10 @@ class RoomVersions: limit_notifications_power_levels=False, msc2176_redaction_rules=False, msc3083_join_rules=False, + msc3375_redaction_rules=False, msc2403_knocking=False, msc2716_historical=False, + msc2716_redactions=False, ) V2 = RoomVersion( "2", @@ -104,8 +111,10 @@ class RoomVersions: limit_notifications_power_levels=False, msc2176_redaction_rules=False, msc3083_join_rules=False, + msc3375_redaction_rules=False, msc2403_knocking=False, msc2716_historical=False, + msc2716_redactions=False, ) V3 = RoomVersion( "3", @@ -118,8 +127,10 @@ class RoomVersions: limit_notifications_power_levels=False, msc2176_redaction_rules=False, msc3083_join_rules=False, + msc3375_redaction_rules=False, msc2403_knocking=False, msc2716_historical=False, + msc2716_redactions=False, ) V4 = RoomVersion( "4", @@ -132,8 +143,10 @@ class RoomVersions: limit_notifications_power_levels=False, msc2176_redaction_rules=False, msc3083_join_rules=False, + msc3375_redaction_rules=False, msc2403_knocking=False, msc2716_historical=False, + msc2716_redactions=False, ) V5 = RoomVersion( "5", @@ -146,8 +159,10 @@ class RoomVersions: limit_notifications_power_levels=False, msc2176_redaction_rules=False, msc3083_join_rules=False, + msc3375_redaction_rules=False, msc2403_knocking=False, msc2716_historical=False, + msc2716_redactions=False, ) V6 = RoomVersion( "6", @@ -160,8 +175,10 @@ class RoomVersions: limit_notifications_power_levels=True, msc2176_redaction_rules=False, msc3083_join_rules=False, + msc3375_redaction_rules=False, msc2403_knocking=False, msc2716_historical=False, + msc2716_redactions=False, ) MSC2176 = RoomVersion( "org.matrix.msc2176", @@ -174,8 +191,10 @@ class RoomVersions: limit_notifications_power_levels=True, msc2176_redaction_rules=True, msc3083_join_rules=False, + msc3375_redaction_rules=False, msc2403_knocking=False, msc2716_historical=False, + msc2716_redactions=False, ) V7 = RoomVersion( "7", @@ -188,12 +207,14 @@ class RoomVersions: limit_notifications_power_levels=True, msc2176_redaction_rules=False, msc3083_join_rules=False, + msc3375_redaction_rules=False, msc2403_knocking=True, msc2716_historical=False, + msc2716_redactions=False, ) - MSC2716 = RoomVersion( - "org.matrix.msc2716", - RoomDisposition.UNSTABLE, + V8 = RoomVersion( + "8", + RoomDisposition.STABLE, EventFormatVersions.V3, StateResolutionVersions.V2, enforce_key_validity=True, @@ -201,12 +222,14 @@ class RoomVersions: strict_canonicaljson=True, limit_notifications_power_levels=True, msc2176_redaction_rules=False, - msc3083_join_rules=False, + msc3083_join_rules=True, + msc3375_redaction_rules=False, msc2403_knocking=True, - msc2716_historical=True, + msc2716_historical=False, + msc2716_redactions=False, ) - V8 = RoomVersion( - "8", + V9 = RoomVersion( + "9", RoomDisposition.STABLE, EventFormatVersions.V3, StateResolutionVersions.V2, @@ -216,8 +239,26 @@ class RoomVersions: limit_notifications_power_levels=True, msc2176_redaction_rules=False, msc3083_join_rules=True, + msc3375_redaction_rules=True, msc2403_knocking=True, msc2716_historical=False, + msc2716_redactions=False, + ) + MSC2716v3 = RoomVersion( + "org.matrix.msc2716v3", + RoomDisposition.UNSTABLE, + EventFormatVersions.V3, + StateResolutionVersions.V2, + enforce_key_validity=True, + special_case_aliases_auth=False, + strict_canonicaljson=True, + limit_notifications_power_levels=True, + msc2176_redaction_rules=False, + msc3083_join_rules=False, + msc3375_redaction_rules=False, + msc2403_knocking=True, + msc2716_historical=True, + msc2716_redactions=True, ) @@ -232,8 +273,9 @@ class RoomVersions: RoomVersions.V6, RoomVersions.MSC2176, RoomVersions.V7, - RoomVersions.MSC2716, RoomVersions.V8, + RoomVersions.V9, + RoomVersions.MSC2716v3, ) } @@ -266,7 +308,7 @@ class RoomVersionCapability: ), RoomVersionCapability( "restricted", - None, + RoomVersions.V9, lambda room_version: room_version.msc3083_join_rules, ), ) diff --git a/synapse/api/urls.py b/synapse/api/urls.py index 4b1f213c75f8..f9f9467dc1d7 100644 --- a/synapse/api/urls.py +++ b/synapse/api/urls.py @@ -19,6 +19,7 @@ from urllib.parse import urlencode from synapse.config import ConfigError +from synapse.config.homeserver import HomeServerConfig SYNAPSE_CLIENT_API_PREFIX = "/_synapse/client" CLIENT_API_PREFIX = "/_matrix/client" @@ -29,33 +30,27 @@ STATIC_PREFIX = "/_matrix/static" WEB_CLIENT_PREFIX = "/_matrix/client" SERVER_KEY_V2_PREFIX = "/_matrix/key/v2" -MEDIA_PREFIX = "/_matrix/media/r0" +MEDIA_R0_PREFIX = "/_matrix/media/r0" +MEDIA_V3_PREFIX = "/_matrix/media/v3" LEGACY_MEDIA_PREFIX = "/_matrix/media/v1" class ConsentURIBuilder: - def __init__(self, hs_config): - """ - Args: - hs_config (synapse.config.homeserver.HomeServerConfig): - """ - if hs_config.form_secret is None: + def __init__(self, hs_config: HomeServerConfig): + if hs_config.key.form_secret is None: raise ConfigError("form_secret not set in config") - if hs_config.public_baseurl is None: - raise ConfigError("public_baseurl not set in config") - - self._hmac_secret = hs_config.form_secret.encode("utf-8") - self._public_baseurl = hs_config.public_baseurl + self._hmac_secret = hs_config.key.form_secret.encode("utf-8") + self._public_baseurl = hs_config.server.public_baseurl - def build_user_consent_uri(self, user_id): + def build_user_consent_uri(self, user_id: str) -> str: """Build a URI which we can give to the user to do their privacy policy consent Args: - user_id (str): mxid or username of user + user_id: mxid or username of user Returns - (str) the URI where the user can do consent + The URI where the user can do consent """ mac = hmac.new( key=self._hmac_secret, msg=user_id.encode("ascii"), digestmod=sha256 diff --git a/synapse/app/__init__.py b/synapse/app/__init__.py index f9940491e8e4..ee51480a9e64 100644 --- a/synapse/app/__init__.py +++ b/synapse/app/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. import logging import sys +from typing import Container from synapse import python_dependencies # noqa: E402 @@ -27,7 +28,9 @@ sys.exit(1) -def check_bind_error(e, address, bind_addresses): +def check_bind_error( + e: Exception, address: str, bind_addresses: Container[str] +) -> None: """ This method checks an exception occurred while binding on 0.0.0.0. If :: is specified in the bind addresses a warning is shown. @@ -38,9 +41,9 @@ def check_bind_error(e, address, bind_addresses): When binding on 0.0.0.0 after :: this can safely be ignored. Args: - e (Exception): Exception that was caught. - address (str): Address on which binding was attempted. - bind_addresses (list): Addresses on which the service listens. + e: Exception that was caught. + address: Address on which binding was attempted. + bind_addresses: Addresses on which the service listens. """ if address == "0.0.0.0" and "::" in bind_addresses: logger.warning( diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 50a02f51f54a..5fc59c1be11d 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import atexit import gc import logging import os @@ -21,105 +22,138 @@ import sys import traceback import warnings -from typing import TYPE_CHECKING, Awaitable, Callable, Iterable +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Collection, + Dict, + Iterable, + List, + NoReturn, + Optional, + Tuple, + cast, +) from cryptography.utils import CryptographyDeprecationWarning -from typing_extensions import NoReturn import twisted -from twisted.internet import defer, error, reactor +from twisted.internet import defer, error, reactor as _reactor +from twisted.internet.interfaces import IOpenSSLContextFactory, IReactorSSL, IReactorTCP +from twisted.internet.protocol import ServerFactory +from twisted.internet.tcp import Port from twisted.logger import LoggingFile, LogLevel from twisted.protocols.tls import TLSMemoryBIOFactory +from twisted.python.threadpool import ThreadPool import synapse from synapse.api.constants import MAX_PDU_SIZE from synapse.app import check_bind_error from synapse.app.phone_stats_home import start_phone_stats_home from synapse.config.homeserver import HomeServerConfig +from synapse.config.server import ManholeConfig from synapse.crypto import context_factory +from synapse.events.presence_router import load_legacy_presence_router from synapse.events.spamcheck import load_legacy_spam_checkers from synapse.events.third_party_rules import load_legacy_third_party_event_rules +from synapse.handlers.auth import load_legacy_password_auth_providers from synapse.logging.context import PreserveLoggingContext +from synapse.metrics import register_threadpool from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.metrics.jemalloc import setup_jemalloc_stats +from synapse.types import ISynapseReactor from synapse.util.caches.lrucache import setup_expire_lru_cache_entries from synapse.util.daemonize import daemonize_process +from synapse.util.gai_resolver import GAIResolver from synapse.util.rlimit import change_resource_limit from synapse.util.versionstring import get_version_string if TYPE_CHECKING: from synapse.server import HomeServer +# Twisted injects the global reactor to make it easier to import, this confuses +# mypy which thinks it is a module. Tell it that it a more proper type. +reactor = cast(ISynapseReactor, _reactor) + + logger = logging.getLogger(__name__) # list of tuples of function, args list, kwargs dict -_sighup_callbacks = [] +_sighup_callbacks: List[ + Tuple[Callable[..., None], Tuple[Any, ...], Dict[str, Any]] +] = [] -def register_sighup(func, *args, **kwargs): +def register_sighup(func: Callable[..., None], *args: Any, **kwargs: Any) -> None: """ Register a function to be called when a SIGHUP occurs. Args: - func (function): Function to be called when sent a SIGHUP signal. + func: Function to be called when sent a SIGHUP signal. *args, **kwargs: args and kwargs to be passed to the target function. """ _sighup_callbacks.append((func, args, kwargs)) -def start_worker_reactor(appname, config, run_command=reactor.run): +def start_worker_reactor( + appname: str, + config: HomeServerConfig, + run_command: Callable[[], None] = reactor.run, +) -> None: """Run the reactor in the main process Daemonizes if necessary, and then configures some resources, before starting the reactor. Pulls configuration from the 'worker' settings in 'config'. Args: - appname (str): application name which will be sent to syslog - config (synapse.config.Config): config object - run_command (Callable[]): callable that actually runs the reactor + appname: application name which will be sent to syslog + config: config object + run_command: callable that actually runs the reactor """ - logger = logging.getLogger(config.worker_app) + logger = logging.getLogger(config.worker.worker_app) start_reactor( appname, - soft_file_limit=config.soft_file_limit, - gc_thresholds=config.gc_thresholds, - pid_file=config.worker_pid_file, - daemonize=config.worker_daemonize, - print_pidfile=config.print_pidfile, + soft_file_limit=config.server.soft_file_limit, + gc_thresholds=config.server.gc_thresholds, + pid_file=config.worker.worker_pid_file, + daemonize=config.worker.worker_daemonize, + print_pidfile=config.server.print_pidfile, logger=logger, run_command=run_command, ) def start_reactor( - appname, - soft_file_limit, - gc_thresholds, - pid_file, - daemonize, - print_pidfile, - logger, - run_command=reactor.run, -): + appname: str, + soft_file_limit: int, + gc_thresholds: Optional[Tuple[int, int, int]], + pid_file: str, + daemonize: bool, + print_pidfile: bool, + logger: logging.Logger, + run_command: Callable[[], None] = reactor.run, +) -> None: """Run the reactor in the main process Daemonizes if necessary, and then configures some resources, before starting the reactor Args: - appname (str): application name which will be sent to syslog - soft_file_limit (int): + appname: application name which will be sent to syslog + soft_file_limit: gc_thresholds: - pid_file (str): name of pid file to write to if daemonize is True - daemonize (bool): true to run the reactor in a background process - print_pidfile (bool): whether to print the pid file, if daemonize is True - logger (logging.Logger): logger instance to pass to Daemonize - run_command (Callable[]): callable that actually runs the reactor + pid_file: name of pid file to write to if daemonize is True + daemonize: true to run the reactor in a background process + print_pidfile: whether to print the pid file, if daemonize is True + logger: logger instance to pass to Daemonize + run_command: callable that actually runs the reactor """ - def run(): + def run() -> None: logger.info("Running") setup_jemalloc_stats() change_resource_limit(soft_file_limit) @@ -178,7 +212,7 @@ def redirect_stdio_to_logs() -> None: print("Redirected stdout/stderr to logs") -def register_start(cb: Callable[..., Awaitable], *args, **kwargs) -> None: +def register_start(cb: Callable[..., Awaitable], *args: Any, **kwargs: Any) -> None: """Register a callback with the reactor, to be called once it is running This can be used to initialise parts of the system which require an asynchronous @@ -188,7 +222,7 @@ def register_start(cb: Callable[..., Awaitable], *args, **kwargs) -> None: will exit. """ - async def wrapper(): + async def wrapper() -> None: try: await cb(*args, **kwargs) except Exception: @@ -217,7 +251,7 @@ async def wrapper(): reactor.callWhenRunning(lambda: defer.ensureDeferred(wrapper())) -def listen_metrics(bind_addresses, port): +def listen_metrics(bind_addresses: Iterable[str], port: int) -> None: """ Start Prometheus metrics server. """ @@ -228,7 +262,12 @@ def listen_metrics(bind_addresses, port): start_http_server(port, addr=host, registry=RegistryProxy) -def listen_manhole(bind_addresses: Iterable[str], port: int, manhole_globals: dict): +def listen_manhole( + bind_addresses: Collection[str], + port: int, + manhole_settings: ManholeConfig, + manhole_globals: dict, +) -> None: # twisted.conch.manhole 21.1.0 uses "int_from_bytes", which produces a confusing # warning. It's fixed by https://github.com/twisted/twisted/pull/1522), so # suppress the warning for now. @@ -243,16 +282,22 @@ def listen_manhole(bind_addresses: Iterable[str], port: int, manhole_globals: di listen_tcp( bind_addresses, port, - manhole(username="matrix", password="rabbithole", globals=manhole_globals), + manhole(settings=manhole_settings, globals=manhole_globals), ) -def listen_tcp(bind_addresses, port, factory, reactor=reactor, backlog=50): +def listen_tcp( + bind_addresses: Collection[str], + port: int, + factory: ServerFactory, + reactor: IReactorTCP = reactor, + backlog: int = 50, +) -> List[Port]: """ Create a TCP socket for a port and several addresses Returns: - list[twisted.internet.tcp.Port]: listening for TCP connections + list of twisted.internet.tcp.Port listening for TCP connections """ r = [] for address in bind_addresses: @@ -261,12 +306,19 @@ def listen_tcp(bind_addresses, port, factory, reactor=reactor, backlog=50): except error.CannotListenError as e: check_bind_error(e, address, bind_addresses) - return r + # IReactorTCP returns an object implementing IListeningPort from listenTCP, + # but we know it will be a Port instance. + return r # type: ignore[return-value] def listen_ssl( - bind_addresses, port, factory, context_factory, reactor=reactor, backlog=50 -): + bind_addresses: Collection[str], + port: int, + factory: ServerFactory, + context_factory: IOpenSSLContextFactory, + reactor: IReactorSSL = reactor, + backlog: int = 50, +) -> List[Port]: """ Create an TLS-over-TCP socket for a port and several addresses @@ -282,18 +334,21 @@ def listen_ssl( except error.CannotListenError as e: check_bind_error(e, address, bind_addresses) - return r + # IReactorSSL incorrectly declares that an int is returned from listenSSL, + # it actually returns an object implementing IListeningPort, but we know it + # will be a Port instance. + return r # type: ignore[return-value] -def refresh_certificate(hs): +def refresh_certificate(hs: "HomeServer") -> None: """ Refresh the TLS certificates that Synapse is using by re-reading them from disk and updating the TLS context factories to use them. """ - if not hs.config.has_tls_listener(): + if not hs.config.server.has_tls_listener(): return - hs.config.read_certificate_from_disk() + hs.config.tls.read_certificate_from_disk() hs.tls_server_context_factory = context_factory.ServerContextFactory(hs.config) if hs._listening_services: @@ -317,7 +372,7 @@ def refresh_certificate(hs): logger.info("Context factories updated.") -async def start(hs: "HomeServer"): +async def start(hs: "HomeServer") -> None: """ Start a Synapse server or worker. @@ -329,12 +384,26 @@ async def start(hs: "HomeServer"): Args: hs: homeserver instance """ + reactor = hs.get_reactor() + + # We want to use a separate thread pool for the resolver so that large + # numbers of DNS requests don't starve out other users of the threadpool. + resolver_threadpool = ThreadPool(name="gai_resolver") + resolver_threadpool.start() + reactor.addSystemEventTrigger("during", "shutdown", resolver_threadpool.stop) + reactor.installNameResolver( + GAIResolver(reactor, getThreadPool=lambda: resolver_threadpool) + ) + + # Register the threadpools with our metrics. + register_threadpool("default", reactor.getThreadPool()) + register_threadpool("gai_resolver", resolver_threadpool) + # Set up the SIGHUP machinery. if hasattr(signal, "SIGHUP"): - reactor = hs.get_reactor() @wrap_as_background_process("sighup") - def handle_sighup(*args, **kwargs): + async def handle_sighup(*args: Any, **kwargs: Any) -> None: # Tell systemd our state, if we're using it. This will silently fail if # we're not using systemd. sdnotify(b"RELOADING=1") @@ -347,7 +416,7 @@ def handle_sighup(*args, **kwargs): # We defer running the sighup handlers until next reactor tick. This # is so that we're in a sane state, e.g. flushing the logs may fail # if the sighup happens in the middle of writing a log entry. - def run_sighup(*args, **kwargs): + def run_sighup(*args: Any, **kwargs: Any) -> None: # `callFromThread` should be "signal safe" as well as thread # safe. reactor.callFromThread(handle_sighup, *args, **kwargs) @@ -370,6 +439,8 @@ def run_sighup(*args, **kwargs): load_legacy_spam_checkers(hs) load_legacy_third_party_event_rules(hs) + load_legacy_presence_router(hs) + load_legacy_password_auth_providers(hs) # If we've configured an expiry time for caches, start the background job now. setup_expire_lru_cache_entries(hs) @@ -389,7 +460,7 @@ def run_sighup(*args, **kwargs): # If background tasks are running on the main process, start collecting the # phone home stats. - if hs.config.run_background_tasks: + if hs.config.worker.run_background_tasks: start_phone_stats_home(hs) # We now freeze all allocated objects in the hopes that (almost) @@ -401,32 +472,40 @@ def run_sighup(*args, **kwargs): gc.collect() gc.freeze() + # Speed up shutdowns by freezing all allocated objects. This moves everything + # into the permanent generation and excludes them from the final GC. + # Unfortunately only works on Python 3.7 + if platform.python_implementation() == "CPython" and sys.version_info >= (3, 7): + atexit.register(gc.freeze) -def setup_sentry(hs): - """Enable sentry integration, if enabled in configuration - Args: - hs (synapse.server.HomeServer) - """ +def setup_sentry(hs: "HomeServer") -> None: + """Enable sentry integration, if enabled in configuration""" - if not hs.config.sentry_enabled: + if not hs.config.metrics.sentry_enabled: return import sentry_sdk - sentry_sdk.init(dsn=hs.config.sentry_dsn, release=get_version_string(synapse)) + sentry_sdk.init( + dsn=hs.config.metrics.sentry_dsn, release=get_version_string(synapse) + ) # We set some default tags that give some context to this instance with sentry_sdk.configure_scope() as scope: - scope.set_tag("matrix_server_name", hs.config.server_name) + scope.set_tag("matrix_server_name", hs.config.server.server_name) - app = hs.config.worker_app if hs.config.worker_app else "synapse.app.homeserver" + app = ( + hs.config.worker.worker_app + if hs.config.worker.worker_app + else "synapse.app.homeserver" + ) name = hs.get_instance_name() scope.set_tag("worker_app", app) scope.set_tag("worker_name", name) -def setup_sdnotify(hs): +def setup_sdnotify(hs: "HomeServer") -> None: """Adds process state hooks to tell systemd what we are up to.""" # Tell systemd our state, if we're using it. This will silently fail if @@ -441,7 +520,7 @@ def setup_sdnotify(hs): sdnotify_sockaddr = os.getenv("NOTIFY_SOCKET") -def sdnotify(state): +def sdnotify(state: bytes) -> None: """ Send a notification to systemd, if the NOTIFY_SOCKET env var is set. @@ -450,7 +529,7 @@ def sdnotify(state): package which many OSes don't include as a matter of principle. Args: - state (bytes): notification to send + state: notification to send """ if not isinstance(state, bytes): raise TypeError("sdnotify should be called with a bytes") diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 3234d9ebba07..42238f7f280b 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -17,6 +17,7 @@ import os import sys import tempfile +from typing import List, Optional from twisted.internet import defer, task @@ -25,6 +26,7 @@ from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging +from synapse.events import EventBase from synapse.handlers.admin import ExfiltrationWriter from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.account_data import SlavedAccountDataStore @@ -38,8 +40,9 @@ from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore -from synapse.replication.slave.storage.room import RoomStore from synapse.server import HomeServer +from synapse.storage.databases.main.room import RoomWorkerStore +from synapse.types import StateMap from synapse.util.logcontext import LoggingContext from synapse.util.versionstring import get_version_string @@ -58,23 +61,18 @@ class AdminCmdSlavedStore( SlavedPushRuleStore, SlavedEventStore, SlavedClientIpStore, - RoomStore, BaseSlavedStore, + RoomWorkerStore, ): pass class AdminCmdServer(HomeServer): - DATASTORE_CLASS = AdminCmdSlavedStore + DATASTORE_CLASS = AdminCmdSlavedStore # type: ignore -async def export_data_command(hs, args): - """Export data for a user. - - Args: - hs (HomeServer) - args (argparse.Namespace) - """ +async def export_data_command(hs: HomeServer, args: argparse.Namespace) -> None: + """Export data for a user.""" user_id = args.user_id directory = args.output_directory @@ -92,12 +90,12 @@ class FileExfiltrationWriter(ExfiltrationWriter): Note: This writes to disk on the main reactor thread. Args: - user_id (str): The user whose data is being exfiltrated. - directory (str|None): The directory to write the data to, if None then - will write to a temporary directory. + user_id: The user whose data is being exfiltrated. + directory: The directory to write the data to, if None then will write + to a temporary directory. """ - def __init__(self, user_id, directory=None): + def __init__(self, user_id: str, directory: Optional[str] = None): self.user_id = user_id if directory: @@ -111,7 +109,7 @@ def __init__(self, user_id, directory=None): if list(os.listdir(self.base_directory)): raise Exception("Directory must be empty") - def write_events(self, room_id, events): + def write_events(self, room_id: str, events: List[EventBase]) -> None: room_directory = os.path.join(self.base_directory, "rooms", room_id) os.makedirs(room_directory, exist_ok=True) events_file = os.path.join(room_directory, "events") @@ -120,7 +118,9 @@ def write_events(self, room_id, events): for event in events: print(json.dumps(event.get_pdu_json()), file=f) - def write_state(self, room_id, event_id, state): + def write_state( + self, room_id: str, event_id: str, state: StateMap[EventBase] + ) -> None: room_directory = os.path.join(self.base_directory, "rooms", room_id) state_directory = os.path.join(room_directory, "state") os.makedirs(state_directory, exist_ok=True) @@ -131,7 +131,9 @@ def write_state(self, room_id, event_id, state): for event in state.values(): print(json.dumps(event.get_pdu_json()), file=f) - def write_invite(self, room_id, event, state): + def write_invite( + self, room_id: str, event: EventBase, state: StateMap[EventBase] + ) -> None: self.write_events(room_id, [event]) # We write the invite state somewhere else as they aren't full events @@ -145,11 +147,27 @@ def write_invite(self, room_id, event, state): for event in state.values(): print(json.dumps(event), file=f) - def finished(self): + def write_knock( + self, room_id: str, event: EventBase, state: StateMap[EventBase] + ) -> None: + self.write_events(room_id, [event]) + + # We write the knock state somewhere else as they aren't full events + # and are only a subset of the state at the event. + room_directory = os.path.join(self.base_directory, "rooms", room_id) + os.makedirs(room_directory, exist_ok=True) + + knock_state = os.path.join(room_directory, "knock_state") + + with open(knock_state, "a") as f: + for event in state.values(): + print(json.dumps(event), file=f) + + def finished(self) -> str: return self.base_directory -def start(config_options): +def start(config_options: List[str]) -> None: parser = argparse.ArgumentParser(description="Synapse Admin Command") HomeServerConfig.add_arguments_to_parser(parser) @@ -180,34 +198,30 @@ def start(config_options): sys.stderr.write("\n" + str(e) + "\n") sys.exit(1) - if config.worker_app is not None: - assert config.worker_app == "synapse.app.admin_cmd" + if config.worker.worker_app is not None: + assert config.worker.worker_app == "synapse.app.admin_cmd" # Update the config with some basic overrides so that don't have to specify # a full worker config. - config.worker_app = "synapse.app.admin_cmd" + config.worker.worker_app = "synapse.app.admin_cmd" - if ( - not config.worker_daemonize - and not config.worker_log_file - and not config.worker_log_config - ): + if not config.worker.worker_daemonize and not config.worker.worker_log_config: # Since we're meant to be run as a "command" let's not redirect stdio # unless we've actually set log config. - config.no_redirect_stdio = True + config.logging.no_redirect_stdio = True # Explicitly disable background processes - config.update_user_directory = False - config.run_background_tasks = False - config.start_pushers = False - config.pusher_shard_config.instances = [] - config.send_federation = False - config.federation_shard_config.instances = [] + config.server.update_user_directory = False + config.worker.run_background_tasks = False + config.worker.start_pushers = False + config.worker.pusher_shard_config.instances = [] + config.worker.send_federation = False + config.worker.federation_shard_config.instances = [] - synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts + synapse.events.USE_FROZEN_DICTS = config.server.use_frozen_dicts ss = AdminCmdServer( - config.server_name, + config.server.server_name, config=config, version_string="Synapse/" + get_version_string(synapse), ) @@ -221,9 +235,9 @@ def start(config_options): # We also make sure that `_base.start` gets run before we actually run the # command. - async def run(): + async def run() -> None: with LoggingContext("command"): - _base.start(ss) + await _base.start(ss) await args.func(ss, args) _base.start_worker_reactor( diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 3b7131af8fa2..e256de200355 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -14,11 +14,10 @@ # limitations under the License. import logging import sys -from typing import Dict, Optional +from typing import Dict, List, Optional, Tuple from twisted.internet import address -from twisted.web.resource import IResource -from twisted.web.server import Request +from twisted.web.resource import Resource import synapse import synapse.events @@ -27,7 +26,8 @@ CLIENT_API_PREFIX, FEDERATION_PREFIX, LEGACY_MEDIA_PREFIX, - MEDIA_PREFIX, + MEDIA_R0_PREFIX, + MEDIA_V3_PREFIX, SERVER_KEY_V2_PREFIX, ) from synapse.app import _base @@ -44,7 +44,7 @@ from synapse.federation.transport.server import TransportLayerServer from synapse.http.server import JsonResource, OptionsResource from synapse.http.servlet import RestServlet, parse_json_object_from_request -from synapse.http.site import SynapseSite +from synapse.http.site import SynapseRequest, SynapseSite from synapse.logging.context import LoggingContext from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource @@ -64,45 +64,43 @@ from synapse.replication.slave.storage.pushers import SlavedPusherStore from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore -from synapse.replication.slave.storage.room import RoomStore from synapse.rest.admin import register_servlets_for_media_repo -from synapse.rest.client.v1 import events, login, presence, room -from synapse.rest.client.v1.initial_sync import InitialSyncRestServlet -from synapse.rest.client.v1.profile import ( - ProfileAvatarURLRestServlet, - ProfileDisplaynameRestServlet, - ProfileRestServlet, -) -from synapse.rest.client.v1.push_rule import PushRuleRestServlet -from synapse.rest.client.v1.voip import VoipRestServlet -from synapse.rest.client.v2_alpha import ( +from synapse.rest.client import ( account_data, + events, groups, + initial_sync, + login, + presence, + profile, + push_rule, read_marker, receipts, + room, room_keys, + sendtodevice, sync, tags, user_directory, + versions, + voip, ) -from synapse.rest.client.v2_alpha._base import client_patterns -from synapse.rest.client.v2_alpha.account import ThreepidRestServlet -from synapse.rest.client.v2_alpha.account_data import ( - AccountDataServlet, - RoomAccountDataServlet, -) -from synapse.rest.client.v2_alpha.devices import DevicesRestServlet -from synapse.rest.client.v2_alpha.keys import ( +from synapse.rest.client._base import client_patterns +from synapse.rest.client.account import ThreepidRestServlet +from synapse.rest.client.devices import DevicesRestServlet +from synapse.rest.client.keys import ( KeyChangesServlet, KeyQueryServlet, OneTimeKeyServlet, ) -from synapse.rest.client.v2_alpha.register import RegisterRestServlet -from synapse.rest.client.v2_alpha.sendtodevice import SendToDeviceRestServlet -from synapse.rest.client.versions import VersionsRestServlet +from synapse.rest.client.register import ( + RegisterRestServlet, + RegistrationTokenValidityRestServlet, +) from synapse.rest.health import HealthResource from synapse.rest.key.v2 import KeyApiV2Resource from synapse.rest.synapse.client import build_synapse_client_resource_tree +from synapse.rest.well_known import well_known_resource from synapse.server import HomeServer from synapse.storage.databases.main.censor_events import CensorEventsStore from synapse.storage.databases.main.client_ips import ClientIpWorkerStore @@ -114,11 +112,15 @@ MonthlyActiveUsersWorkerStore, ) from synapse.storage.databases.main.presence import PresenceStore +from synapse.storage.databases.main.room import RoomWorkerStore +from synapse.storage.databases.main.room_batch import RoomBatchStore from synapse.storage.databases.main.search import SearchStore +from synapse.storage.databases.main.session import SessionStore from synapse.storage.databases.main.stats import StatsStore from synapse.storage.databases.main.transactions import TransactionWorkerStore from synapse.storage.databases.main.ui_auth import UIAuthWorkerStore from synapse.storage.databases.main.user_directory import UserDirectoryStore +from synapse.types import JsonDict from synapse.util.httpresourcetree import create_resource_tree from synapse.util.versionstring import get_version_string @@ -132,18 +134,20 @@ class KeyUploadServlet(RestServlet): PATTERNS = client_patterns("/keys/upload(/(?P[^/]+))?$") - def __init__(self, hs): + def __init__(self, hs: HomeServer): """ Args: - hs (synapse.server.HomeServer): server + hs: server """ super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastore() self.http_client = hs.get_simple_http_client() - self.main_uri = hs.config.worker_main_http_uri + self.main_uri = hs.config.worker.worker_main_http_uri - async def on_POST(self, request: Request, device_id: Optional[str]): + async def on_POST( + self, request: SynapseRequest, device_id: Optional[str] + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) user_id = requester.user.to_string() body = parse_json_object_from_request(request) @@ -187,9 +191,8 @@ async def on_POST(self, request: Request, device_id: Optional[str]): # If the header exists, add to the comma-separated list of the first # instance of the header. Otherwise, generate a new header. if x_forwarded_for: - x_forwarded_for = [ - x_forwarded_for[0] + b", " + previous_host - ] + x_forwarded_for[1:] + x_forwarded_for = [x_forwarded_for[0] + b", " + previous_host] + x_forwarded_for.extend(x_forwarded_for[1:]) else: x_forwarded_for = [previous_host] headers[b"X-Forwarded-For"] = x_forwarded_for @@ -237,7 +240,8 @@ class GenericWorkerSlavedStore( ClientIpWorkerStore, SlavedEventStore, SlavedKeyStore, - RoomStore, + RoomWorkerStore, + RoomBatchStore, DirectoryStore, SlavedApplicationServiceStore, SlavedRegistrationStore, @@ -250,15 +254,19 @@ class GenericWorkerSlavedStore( SearchStore, TransactionWorkerStore, LockStore, + SessionStore, BaseSlavedStore, ): - pass + # Properties that multiple storage classes define. Tell mypy what the + # expected type is. + server_name: str + config: HomeServerConfig class GenericWorkerServer(HomeServer): - DATASTORE_CLASS = GenericWorkerSlavedStore + DATASTORE_CLASS = GenericWorkerSlavedStore # type: ignore - def _listen_http(self, listener_config: ListenerConfig): + def _listen_http(self, listener_config: ListenerConfig) -> None: port = listener_config.port bind_addresses = listener_config.bind_addresses @@ -266,10 +274,10 @@ def _listen_http(self, listener_config: ListenerConfig): site_tag = listener_config.http_options.tag if site_tag is None: - site_tag = port + site_tag = str(port) # We always include a health resource. - resources: Dict[str, IResource] = {"/health": HealthResource()} + resources: Dict[str, Resource] = {"/health": HealthResource()} for res in listener_config.http_options.resources: for name in res.names: @@ -279,35 +287,35 @@ def _listen_http(self, listener_config: ListenerConfig): resource = JsonResource(self, canonical_json=False) RegisterRestServlet(self).register(resource) + RegistrationTokenValidityRestServlet(self).register(resource) login.register_servlets(self, resource) ThreepidRestServlet(self).register(resource) DevicesRestServlet(self).register(resource) + + # Read-only + KeyUploadServlet(self).register(resource) KeyQueryServlet(self).register(resource) - OneTimeKeyServlet(self).register(resource) KeyChangesServlet(self).register(resource) - VoipRestServlet(self).register(resource) - PushRuleRestServlet(self).register(resource) - VersionsRestServlet(self).register(resource) + OneTimeKeyServlet(self).register(resource) - ProfileAvatarURLRestServlet(self).register(resource) - ProfileDisplaynameRestServlet(self).register(resource) - ProfileRestServlet(self).register(resource) - KeyUploadServlet(self).register(resource) - AccountDataServlet(self).register(resource) - RoomAccountDataServlet(self).register(resource) + voip.register_servlets(self, resource) + push_rule.register_servlets(self, resource) + versions.register_servlets(self, resource) + + profile.register_servlets(self, resource) sync.register_servlets(self, resource) events.register_servlets(self, resource) - room.register_servlets(self, resource, True) + room.register_servlets(self, resource, is_worker=True) room.register_deprecated_servlets(self, resource) - InitialSyncRestServlet(self).register(resource) + initial_sync.register_servlets(self, resource) room_keys.register_servlets(self, resource) tags.register_servlets(self, resource) account_data.register_servlets(self, resource) receipts.register_servlets(self, resource) read_marker.register_servlets(self, resource) - SendToDeviceRestServlet(self).register(resource) + sendtodevice.register_servlets(self, resource) user_directory.register_servlets(self, resource) @@ -318,10 +326,12 @@ def _listen_http(self, listener_config: ListenerConfig): resources.update({CLIENT_API_PREFIX: resource}) resources.update(build_synapse_client_resource_tree(self)) + resources.update({"/.well-known": well_known_resource(self)}) + elif name == "federation": resources.update({FEDERATION_PREFIX: TransportLayerServer(self)}) elif name == "media": - if self.config.can_load_media_repo: + if self.config.media.can_load_media_repo: media_repo = self.get_media_repository_resource() # We need to serve the admin servlets for media on the @@ -331,7 +341,8 @@ def _listen_http(self, listener_config: ListenerConfig): resources.update( { - MEDIA_PREFIX: media_repo, + MEDIA_R0_PREFIX: media_repo, + MEDIA_V3_PREFIX: media_repo, LEGACY_MEDIA_PREFIX: media_repo, "/_synapse/admin": admin_resource, } @@ -383,16 +394,19 @@ def _listen_http(self, listener_config: ListenerConfig): logger.info("Synapse worker now listening on port %d", port) - def start_listening(self): - for listener in self.config.worker_listeners: + def start_listening(self) -> None: + for listener in self.config.worker.worker_listeners: if listener.type == "http": self._listen_http(listener) elif listener.type == "manhole": _base.listen_manhole( - listener.bind_addresses, listener.port, manhole_globals={"hs": self} + listener.bind_addresses, + listener.port, + manhole_settings=self.config.server.manhole_settings, + manhole_globals={"hs": self}, ) elif listener.type == "metrics": - if not self.config.enable_metrics: + if not self.config.metrics.enable_metrics: logger.warning( "Metrics listener configured, but " "enable_metrics is not True!" @@ -405,7 +419,7 @@ def start_listening(self): self.get_tcp_replication().start_replication(self) -def start(config_options): +def start(config_options: List[str]) -> None: try: config = HomeServerConfig.load_config("Synapse worker", config_options) except ConfigError as e: @@ -413,7 +427,7 @@ def start(config_options): sys.exit(1) # For backwards compatibility let any of the old app names. - assert config.worker_app in ( + assert config.worker.worker_app in ( "synapse.app.appservice", "synapse.app.client_reader", "synapse.app.event_creator", @@ -427,7 +441,7 @@ def start(config_options): "synapse.app.user_dir", ) - if config.worker_app == "synapse.app.appservice": + if config.worker.worker_app == "synapse.app.appservice": if config.appservice.notify_appservices: sys.stderr.write( "\nThe appservices must be disabled in the main synapse process" @@ -443,7 +457,7 @@ def start(config_options): # For other worker types we force this to off. config.appservice.notify_appservices = False - if config.worker_app == "synapse.app.user_dir": + if config.worker.worker_app == "synapse.app.user_dir": if config.server.update_user_directory: sys.stderr.write( "\nThe update_user_directory must be disabled in the main synapse process" @@ -459,14 +473,14 @@ def start(config_options): # For other worker types we force this to off. config.server.update_user_directory = False - synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts + synapse.events.USE_FROZEN_DICTS = config.server.use_frozen_dicts synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage if config.server.gc_seconds: synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds hs = GenericWorkerServer( - config.server_name, + config.server.server_name, config=config, version_string="Synapse/" + get_version_string(synapse), ) @@ -485,12 +499,16 @@ def start(config_options): register_start(_base.start, hs) # redirect stdio to the logs, if configured. - if not hs.config.no_redirect_stdio: + if not hs.config.logging.no_redirect_stdio: redirect_stdio_to_logs() _base.start_worker_reactor("synapse-generic-worker", config) -if __name__ == "__main__": +def main() -> None: with LoggingContext("main"): start(sys.argv[1:]) + + +if __name__ == "__main__": + main() diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 7dae163c1af3..dd76e0732108 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -16,10 +16,10 @@ import logging import os import sys -from typing import Iterator +from typing import Dict, Iterable, Iterator, List -from twisted.internet import reactor -from twisted.web.resource import EncodingResourceWrapper, IResource +from twisted.internet.tcp import Port +from twisted.web.resource import EncodingResourceWrapper, Resource from twisted.web.server import GzipEncoderFactory from twisted.web.static import File @@ -29,7 +29,8 @@ from synapse.api.urls import ( FEDERATION_PREFIX, LEGACY_MEDIA_PREFIX, - MEDIA_PREFIX, + MEDIA_R0_PREFIX, + MEDIA_V3_PREFIX, SERVER_KEY_V2_PREFIX, STATIC_PREFIX, WEB_CLIENT_PREFIX, @@ -66,7 +67,7 @@ from synapse.rest.health import HealthResource from synapse.rest.key.v2 import KeyApiV2Resource from synapse.rest.synapse.client import build_synapse_client_resource_tree -from synapse.rest.well_known import WellKnownResource +from synapse.rest.well_known import well_known_resource from synapse.server import HomeServer from synapse.storage import DataStore from synapse.util.httpresourcetree import create_resource_tree @@ -76,23 +77,27 @@ logger = logging.getLogger("synapse.app.homeserver") -def gz_wrap(r): +def gz_wrap(r: Resource) -> Resource: return EncodingResourceWrapper(r, [GzipEncoderFactory()]) class SynapseHomeServer(HomeServer): - DATASTORE_CLASS = DataStore + DATASTORE_CLASS = DataStore # type: ignore - def _listener_http(self, config: HomeServerConfig, listener_config: ListenerConfig): + def _listener_http( + self, config: HomeServerConfig, listener_config: ListenerConfig + ) -> Iterable[Port]: port = listener_config.port bind_addresses = listener_config.bind_addresses tls = listener_config.tls + # Must exist since this is an HTTP listener. + assert listener_config.http_options is not None site_tag = listener_config.http_options.tag if site_tag is None: site_tag = str(port) # We always include a health resource. - resources = {"/health": HealthResource()} + resources: Dict[str, Resource] = {"/health": HealthResource()} for res in listener_config.http_options.resources: for name in res.names: @@ -111,7 +116,7 @@ def _listener_http(self, config: HomeServerConfig, listener_config: ListenerConf ("listeners", site_tag, "additional_resources", "<%s>" % (path,)), ) handler = handler_cls(config, module_api) - if IResource.providedBy(handler): + if isinstance(handler, Resource): resource = handler elif hasattr(handler, "handle_request"): resource = AdditionalResource(self, handler.handle_request) @@ -128,7 +133,7 @@ def _listener_http(self, config: HomeServerConfig, listener_config: ListenerConf # try to find something useful to redirect '/' to if WEB_CLIENT_PREFIX in resources: - root_resource = RootOptionsRedirectResource(WEB_CLIENT_PREFIX) + root_resource: Resource = RootOptionsRedirectResource(WEB_CLIENT_PREFIX) elif STATIC_PREFIX in resources: root_resource = RootOptionsRedirectResource(STATIC_PREFIX) else: @@ -145,6 +150,8 @@ def _listener_http(self, config: HomeServerConfig, listener_config: ListenerConf ) if tls: + # refresh_certificate should have been called before this. + assert self.tls_server_context_factory is not None ports = listen_ssl( bind_addresses, port, @@ -165,20 +172,21 @@ def _listener_http(self, config: HomeServerConfig, listener_config: ListenerConf return ports - def _configure_named_resource(self, name, compress=False): + def _configure_named_resource( + self, name: str, compress: bool = False + ) -> Dict[str, Resource]: """Build a resource map for a named resource Args: - name (str): named resource: one of "client", "federation", etc - compress (bool): whether to enable gzip compression for this - resource + name: named resource: one of "client", "federation", etc + compress: whether to enable gzip compression for this resource Returns: - dict[str, Resource]: map from path to HTTP resource + map from path to HTTP resource """ - resources = {} + resources: Dict[str, Resource] = {} if name == "client": - client_resource = ClientRestResource(self) + client_resource: Resource = ClientRestResource(self) if compress: client_resource = gz_wrap(client_resource) @@ -186,16 +194,18 @@ def _configure_named_resource(self, name, compress=False): { "/_matrix/client/api/v1": client_resource, "/_matrix/client/r0": client_resource, + "/_matrix/client/v1": client_resource, + "/_matrix/client/v3": client_resource, "/_matrix/client/unstable": client_resource, "/_matrix/client/v2_alpha": client_resource, "/_matrix/client/versions": client_resource, - "/.well-known/matrix/client": WellKnownResource(self), + "/.well-known": well_known_resource(self), "/_synapse/admin": AdminRestResource(self), **build_synapse_client_resource_tree(self), } ) - if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: from synapse.rest.synapse.client.password_reset import ( PasswordResetSubmitTokenResource, ) @@ -207,7 +217,7 @@ def _configure_named_resource(self, name, compress=False): if name == "consent": from synapse.rest.consent.consent_resource import ConsentResource - consent_resource = ConsentResource(self) + consent_resource: Resource = ConsentResource(self) if compress: consent_resource = gz_wrap(consent_resource) resources.update({"/_matrix/consent": consent_resource}) @@ -234,10 +244,14 @@ def _configure_named_resource(self, name, compress=False): ) if name in ["media", "federation", "client"]: - if self.config.enable_media_repo: + if self.config.server.enable_media_repo: media_repo = self.get_media_repository_resource() resources.update( - {MEDIA_PREFIX: media_repo, LEGACY_MEDIA_PREFIX: media_repo} + { + MEDIA_R0_PREFIX: media_repo, + MEDIA_V3_PREFIX: media_repo, + LEGACY_MEDIA_PREFIX: media_repo, + } ) elif name == "media": raise ConfigError( @@ -248,7 +262,7 @@ def _configure_named_resource(self, name, compress=False): resources[SERVER_KEY_V2_PREFIX] = KeyApiV2Resource(self) if name == "webclient": - webclient_loc = self.config.web_client_location + webclient_loc = self.config.server.web_client_location if webclient_loc is None: logger.warning( @@ -269,7 +283,7 @@ def _configure_named_resource(self, name, compress=False): # https://twistedmatrix.com/trac/ticket/7678 resources[WEB_CLIENT_PREFIX] = File(webclient_loc) - if name == "metrics" and self.config.enable_metrics: + if name == "metrics" and self.config.metrics.enable_metrics: resources[METRICS_PREFIX] = MetricsResource(RegistryProxy) if name == "replication": @@ -277,8 +291,8 @@ def _configure_named_resource(self, name, compress=False): return resources - def start_listening(self): - if self.config.redis_enabled: + def start_listening(self) -> None: + if self.config.redis.redis_enabled: # If redis is enabled we connect via the replication command handler # in the same way as the workers (since we're effectively a client # rather than a server). @@ -291,7 +305,10 @@ def start_listening(self): ) elif listener.type == "manhole": _base.listen_manhole( - listener.bind_addresses, listener.port, manhole_globals={"hs": self} + listener.bind_addresses, + listener.port, + manhole_settings=self.config.server.manhole_settings, + manhole_globals={"hs": self}, ) elif listener.type == "replication": services = listen_tcp( @@ -300,9 +317,11 @@ def start_listening(self): ReplicationStreamProtocolFactory(self), ) for s in services: - reactor.addSystemEventTrigger("before", "shutdown", s.stopListening) + self.get_reactor().addSystemEventTrigger( + "before", "shutdown", s.stopListening + ) elif listener.type == "metrics": - if not self.config.enable_metrics: + if not self.config.metrics.enable_metrics: logger.warning( "Metrics listener configured, but " "enable_metrics is not True!" @@ -315,14 +334,13 @@ def start_listening(self): logger.warning("Unrecognized listener type: %s", listener.type) -def setup(config_options): +def setup(config_options: List[str]) -> SynapseHomeServer: """ Args: - config_options_options: The options passed to Synapse. Usually - `sys.argv[1:]`. + config_options_options: The options passed to Synapse. Usually `sys.argv[1:]`. Returns: - HomeServer + A homeserver instance. """ try: config = HomeServerConfig.load_or_generate_config( @@ -340,14 +358,21 @@ def setup(config_options): # generating config files and shouldn't try to continue. sys.exit(0) - events.USE_FROZEN_DICTS = config.use_frozen_dicts + if config.worker.worker_app: + raise ConfigError( + "You have specified `worker_app` in the config but are attempting to start a non-worker " + "instance. Please use `python -m synapse.app.generic_worker` instead (or remove the option if this is the main process)." + ) + sys.exit(1) + + events.USE_FROZEN_DICTS = config.server.use_frozen_dicts synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage if config.server.gc_seconds: synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds hs = SynapseHomeServer( - config.server_name, + config.server.server_name, config=config, version_string="Synapse/" + get_version_string(synapse), ) @@ -361,9 +386,9 @@ def setup(config_options): except Exception as e: handle_startup_exception(e) - async def start(): + async def start() -> None: # Load the OIDC provider metadatas, if OIDC is enabled. - if hs.config.oidc_enabled: + if hs.config.oidc.oidc_enabled: oidc = hs.get_oidc_handler() # Loading the provider metadata also ensures the provider config is valid. await oidc.load_metadata() @@ -401,58 +426,34 @@ def format_config_error(e: ConfigError) -> Iterator[str]: yield ":\n %s" % (e.msg,) - e = e.__cause__ + parent_e = e.__cause__ indent = 1 - while e: + while parent_e: indent += 1 - yield ":\n%s%s" % (" " * indent, str(e)) - e = e.__cause__ - - -def run(hs): - PROFILE_SYNAPSE = False - if PROFILE_SYNAPSE: - - def profile(func): - from cProfile import Profile - from threading import current_thread - - def profiled(*args, **kargs): - profile = Profile() - profile.enable() - func(*args, **kargs) - profile.disable() - ident = current_thread().ident - profile.dump_stats( - "/tmp/%s.%s.%i.pstat" % (hs.hostname, func.__name__, ident) - ) - - return profiled - - from twisted.python.threadpool import ThreadPool + yield ":\n%s%s" % (" " * indent, str(parent_e)) + parent_e = parent_e.__cause__ - ThreadPool._worker = profile(ThreadPool._worker) - reactor.run = profile(reactor.run) +def run(hs: HomeServer) -> None: _base.start_reactor( "synapse-homeserver", - soft_file_limit=hs.config.soft_file_limit, - gc_thresholds=hs.config.gc_thresholds, - pid_file=hs.config.pid_file, - daemonize=hs.config.daemonize, - print_pidfile=hs.config.print_pidfile, + soft_file_limit=hs.config.server.soft_file_limit, + gc_thresholds=hs.config.server.gc_thresholds, + pid_file=hs.config.server.pid_file, + daemonize=hs.config.server.daemonize, + print_pidfile=hs.config.server.print_pidfile, logger=logger, ) -def main(): +def main() -> None: with LoggingContext("main"): # check base requirements check_requirements() hs = setup(sys.argv[1:]) # redirect stdio to the logs, if configured. - if not hs.config.no_redirect_stdio: + if not hs.config.logging.no_redirect_stdio: redirect_stdio_to_logs() run(hs) diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py index 86ad7337a995..899dba5c3d76 100644 --- a/synapse/app/phone_stats_home.py +++ b/synapse/app/phone_stats_home.py @@ -15,16 +15,21 @@ import math import resource import sys +from typing import TYPE_CHECKING, List, Sized, Tuple from prometheus_client import Gauge from synapse.metrics.background_process_metrics import wrap_as_background_process +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger("synapse.app.homeserver") # Contains the list of processes we will be monitoring # currently either 0 or 1 -_stats_process = [] +_stats_process: List[Tuple[int, "resource.struct_rusage"]] = [] # Gauges to expose monthly active user control metrics current_mau_gauge = Gauge("synapse_admin_mau:current", "Current MAU") @@ -41,9 +46,15 @@ @wrap_as_background_process("phone_stats_home") -async def phone_stats_home(hs, stats, stats_process=_stats_process): +async def phone_stats_home( + hs: "HomeServer", + stats: JsonDict, + stats_process: List[Tuple[int, "resource.struct_rusage"]] = _stats_process, +) -> None: logger.info("Gathering stats for reporting") now = int(hs.get_clock().time()) + # Ensure the homeserver has started. + assert hs.start_time is not None uptime = int(now - hs.start_time) if uptime < 0: uptime = 0 @@ -73,8 +84,8 @@ async def phone_stats_home(hs, stats, stats_process=_stats_process): store = hs.get_datastore() - stats["homeserver"] = hs.config.server_name - stats["server_context"] = hs.config.server_context + stats["homeserver"] = hs.config.server.server_name + stats["server_context"] = hs.config.server.server_context stats["timestamp"] = now stats["uptime_seconds"] = uptime version = sys.version_info @@ -131,24 +142,26 @@ async def phone_stats_home(hs, stats, stats_process=_stats_process): log_level = synapse_logger.getEffectiveLevel() stats["log_level"] = logging.getLevelName(log_level) - logger.info("Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats)) + logger.info( + "Reporting stats to %s: %s" % (hs.config.metrics.report_stats_endpoint, stats) + ) try: await hs.get_proxied_http_client().put_json( - hs.config.report_stats_endpoint, stats + hs.config.metrics.report_stats_endpoint, stats ) except Exception as e: logger.warning("Error reporting stats: %s", e) -def start_phone_stats_home(hs): +def start_phone_stats_home(hs: "HomeServer") -> None: """ Start the background tasks which report phone home stats. """ clock = hs.get_clock() - stats = {} + stats: JsonDict = {} - def performance_stats_init(): + def performance_stats_init() -> None: _stats_process.clear() _stats_process.append( (int(hs.get_clock().time()), resource.getrusage(resource.RUSAGE_SELF)) @@ -164,12 +177,12 @@ def performance_stats_init(): hs.get_datastore().reap_monthly_active_users() @wrap_as_background_process("generate_monthly_active_users") - async def generate_monthly_active_users(): + async def generate_monthly_active_users() -> None: current_mau_count = 0 current_mau_count_by_service = {} - reserved_users = () + reserved_users: Sized = () store = hs.get_datastore() - if hs.config.limit_usage_by_mau or hs.config.mau_stats_only: + if hs.config.server.limit_usage_by_mau or hs.config.server.mau_stats_only: current_mau_count = await store.get_monthly_active_count() current_mau_count_by_service = ( await store.get_monthly_active_count_by_service() @@ -181,14 +194,14 @@ async def generate_monthly_active_users(): current_mau_by_service_gauge.labels(app_service).set(float(count)) registered_reserved_users_mau_gauge.set(float(len(reserved_users))) - max_mau_gauge.set(float(hs.config.max_mau_value)) + max_mau_gauge.set(float(hs.config.server.max_mau_value)) - if hs.config.limit_usage_by_mau or hs.config.mau_stats_only: + if hs.config.server.limit_usage_by_mau or hs.config.server.mau_stats_only: generate_monthly_active_users() clock.looping_call(generate_monthly_active_users, 5 * 60 * 1000) # End of monthly active user settings - if hs.config.report_stats: + if hs.config.metrics.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000, hs, stats) diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py index 6504c6bd3f59..f9d3bd337d3b 100644 --- a/synapse/appservice/__init__.py +++ b/synapse/appservice/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. import logging import re +from enum import Enum from typing import TYPE_CHECKING, Iterable, List, Match, Optional from synapse.api.constants import EventTypes @@ -27,7 +28,7 @@ logger = logging.getLogger(__name__) -class ApplicationServiceState: +class ApplicationServiceState(Enum): DOWN = "down" UP = "up" diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 935f24263c98..f51b636417bb 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -27,6 +27,7 @@ if TYPE_CHECKING: from synapse.appservice import ApplicationService + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -84,7 +85,7 @@ class ApplicationServiceApi(SimpleHttpClient): pushing. """ - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.clock = hs.get_clock() @@ -230,13 +231,32 @@ async def push_bulk( json_body=body, args={"access_token": service.hs_token}, ) + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "push_bulk to %s succeeded! events=%s", + uri, + [event.get("event_id") for event in events], + ) sent_transactions_counter.labels(service.id).inc() sent_events_counter.labels(service.id).inc(len(events)) return True except CodeMessageException as e: - logger.warning("push_bulk to %s received %s", uri, e.code) + logger.warning( + "push_bulk to %s received code=%s msg=%s", + uri, + e.code, + e.msg, + exc_info=logger.isEnabledFor(logging.DEBUG), + ) except Exception as ex: - logger.warning("push_bulk to %s threw exception %s", uri, ex) + logger.warning( + "push_bulk to %s threw exception(%s) %s args=%s", + uri, + type(ex).__name__, + ex, + ex.args, + exc_info=logger.isEnabledFor(logging.DEBUG), + ) failed_transactions_counter.labels(service.id).inc() return False diff --git a/synapse/config/__main__.py b/synapse/config/__main__.py index b5b6735a8faa..b2a7a89a3563 100644 --- a/synapse/config/__main__.py +++ b/synapse/config/__main__.py @@ -1,4 +1,5 @@ # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,25 +12,45 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import sys +from typing import List + from synapse.config._base import ConfigError +from synapse.config.homeserver import HomeServerConfig -if __name__ == "__main__": - import sys - from synapse.config.homeserver import HomeServerConfig +def main(args: List[str]) -> None: + action = args[1] if len(args) > 1 and args[1] == "read" else None + # If we're reading a key in the config file, then `args[1]` will be `read` and `args[2]` + # will be the key to read. + # We'll want to rework this code if we want to support more actions than just `read`. + load_config_args = args[3:] if action else args[1:] - action = sys.argv[1] + try: + config = HomeServerConfig.load_config("", load_config_args) + except ConfigError as e: + sys.stderr.write("\n" + str(e) + "\n") + sys.exit(1) + + print("Config parses OK!") if action == "read": - key = sys.argv[2] + key = args[2] + key_parts = key.split(".") + + value = config try: - config = HomeServerConfig.load_config("", sys.argv[3:]) - except ConfigError as e: - sys.stderr.write("\n" + str(e) + "\n") + while len(key_parts): + value = getattr(value, key_parts[0]) + key_parts.pop(0) + + print(f"\n{key}: {value}") + except AttributeError: + print( + f"\nNo '{key}' key could be found in the provided configuration file." + ) sys.exit(1) - print(getattr(config, key)) - sys.exit(0) - else: - sys.stderr.write("Unknown command %r\n" % (action,)) - sys.exit(1) + +if __name__ == "__main__": + main(sys.argv) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index d6ec618f8f52..1265738dc12e 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -20,7 +20,18 @@ from collections import OrderedDict from hashlib import sha256 from textwrap import dedent -from typing import Any, Iterable, List, MutableMapping, Optional, Union +from typing import ( + Any, + Dict, + Iterable, + List, + MutableMapping, + Optional, + Tuple, + Type, + TypeVar, + Union, +) import attr import jinja2 @@ -78,7 +89,7 @@ def __init__(self, msg: str, path: Optional[Iterable[str]] = None): """ -def path_exists(file_path): +def path_exists(file_path: str) -> bool: """Check if a file exists Unlike os.path.exists, this throws an exception if there is an error @@ -86,7 +97,7 @@ def path_exists(file_path): the parent dir). Returns: - bool: True if the file exists; False if not. + True if the file exists; False if not. """ try: os.stat(file_path) @@ -102,15 +113,15 @@ class Config: A configuration section, containing configuration keys and values. Attributes: - section (str): The section title of this config object, such as + section: The section title of this config object, such as "tls" or "logger". This is used to refer to it on the root logger (for example, `config.tls.some_option`). Must be defined in subclasses. """ - section = None + section: str - def __init__(self, root_config=None): + def __init__(self, root_config: "RootConfig" = None): self.root = root_config # Get the path to the default Synapse template directory @@ -118,23 +129,8 @@ def __init__(self, root_config=None): "synapse", "res/templates" ) - def __getattr__(self, item: str) -> Any: - """ - Try and fetch a configuration option that does not exist on this class. - - This is so that existing configs that rely on `self.value`, where value - is actually from a different config section, continue to work. - """ - if item in ["generate_config_section", "read_config"]: - raise AttributeError(item) - - if self.root is None: - raise AttributeError(item) - else: - return self.root._get_unclassed_config(self.section, item) - @staticmethod - def parse_size(value): + def parse_size(value: Union[str, int]) -> int: if isinstance(value, int): return value sizes = {"K": 1024, "M": 1024 * 1024} @@ -177,15 +173,15 @@ def parse_duration(value: Union[str, int]) -> int: return int(value) * size @staticmethod - def abspath(file_path): + def abspath(file_path: str) -> str: return os.path.abspath(file_path) if file_path else file_path @classmethod - def path_exists(cls, file_path): + def path_exists(cls, file_path: str) -> bool: return path_exists(file_path) @classmethod - def check_file(cls, file_path, config_name): + def check_file(cls, file_path: Optional[str], config_name: str) -> str: if file_path is None: raise ConfigError("Missing config for %s." % (config_name,)) try: @@ -198,19 +194,15 @@ def check_file(cls, file_path, config_name): return cls.abspath(file_path) @classmethod - def ensure_directory(cls, dir_path): + def ensure_directory(cls, dir_path: str) -> str: dir_path = cls.abspath(dir_path) - try: - os.makedirs(dir_path) - except OSError as e: - if e.errno != errno.EEXIST: - raise + os.makedirs(dir_path, exist_ok=True) if not os.path.isdir(dir_path): raise ConfigError("%s is not a directory" % (dir_path,)) return dir_path @classmethod - def read_file(cls, file_path, config_name): + def read_file(cls, file_path: Any, config_name: str) -> str: """Deprecated: call read_file directly""" return read_file(file_path, (config_name,)) @@ -237,13 +229,14 @@ def read_template(self, filename: str) -> jinja2.Template: def read_templates( self, filenames: List[str], - custom_template_directory: Optional[str] = None, + custom_template_directories: Optional[Iterable[str]] = None, ) -> List[jinja2.Template]: """Load a list of template files from disk using the given variables. This function will attempt to load the given templates from the default Synapse - template directory. If `custom_template_directory` is supplied, that directory - is tried first. + template directory. If `custom_template_directories` is supplied, any directory + in this list is tried (in the order they appear in the list) before trying + Synapse's default directory. Files read are treated as Jinja templates. The templates are not rendered yet and have autoescape enabled. @@ -251,8 +244,8 @@ def read_templates( Args: filenames: A list of template filenames to read. - custom_template_directory: A directory to try to look for the templates - before using the default Synapse template directory instead. + custom_template_directories: A list of directory to try to look for the + templates before using the default Synapse template directory instead. Raises: ConfigError: if the file's path is incorrect or otherwise cannot be read. @@ -260,20 +253,26 @@ def read_templates( Returns: A list of jinja2 templates. """ - search_directories = [self.default_template_dir] - - # The loader will first look in the custom template directory (if specified) for the - # given filename. If it doesn't find it, it will use the default template dir instead - if custom_template_directory: - # Check that the given template directory exists - if not self.path_exists(custom_template_directory): - raise ConfigError( - "Configured template directory does not exist: %s" - % (custom_template_directory,) - ) + search_directories = [] + + # The loader will first look in the custom template directories (if specified) + # for the given filename. If it doesn't find it, it will use the default + # template dir instead. + if custom_template_directories is not None: + for custom_template_directory in custom_template_directories: + # Check that the given template directory exists + if not self.path_exists(custom_template_directory): + raise ConfigError( + "Configured template directory does not exist: %s" + % (custom_template_directory,) + ) + + # Search the custom template directory as well + search_directories.append(custom_template_directory) - # Search the custom template directory as well - search_directories.insert(0, custom_template_directory) + # Append the default directory at the end of the list so Jinja can fallback on it + # if a template is missing from any custom directory. + search_directories.append(self.default_template_dir) # TODO: switch to synapse.util.templates.build_jinja_env loader = jinja2.FileSystemLoader(search_directories) @@ -286,7 +285,9 @@ def read_templates( env.filters.update( { "format_ts": _format_ts_filter, - "mxc_to_http": _create_mxc_to_http_filter(self.public_baseurl), + "mxc_to_http": _create_mxc_to_http_filter( + self.root.server.public_baseurl + ), } ) @@ -294,6 +295,9 @@ def read_templates( return [env.get_template(filename) for filename in filenames] +TRootConfig = TypeVar("TRootConfig", bound="RootConfig") + + class RootConfig: """ Holder of an application's configuration. @@ -308,8 +312,6 @@ class RootConfig: config_classes = [] def __init__(self): - self._configs = OrderedDict() - for config_class in self.config_classes: if config_class.section is None: raise ValueError("%r requires a section name" % (config_class,)) @@ -318,44 +320,11 @@ def __init__(self): conf = config_class(self) except Exception as e: raise Exception("Failed making %s: %r" % (config_class.section, e)) - self._configs[config_class.section] = conf + setattr(self, config_class.section, conf) - def __getattr__(self, item: str) -> Any: - """ - Redirect lookups on this object either to config objects, or values on - config objects, so that `config.tls.blah` works, as well as legacy uses - of things like `config.server_name`. It will first look up the config - section name, and then values on those config classes. - """ - if item in self._configs.keys(): - return self._configs[item] - - return self._get_unclassed_config(None, item) - - def _get_unclassed_config(self, asking_section: Optional[str], item: str): - """ - Fetch a config value from one of the instantiated config classes that - has not been fetched directly. - - Args: - asking_section: If this check is coming from a Config child, which - one? This section will not be asked if it has the value. - item: The configuration value key. - - Raises: - AttributeError if no config classes have the config key. The body - will contain what sections were checked. - """ - for key, val in self._configs.items(): - if key == asking_section: - continue - - if item in dir(val): - return getattr(val, item) - - raise AttributeError(item, "not found in %s" % (list(self._configs.keys()),)) - - def invoke_all(self, func_name: str, *args, **kwargs) -> MutableMapping[str, Any]: + def invoke_all( + self, func_name: str, *args: Any, **kwargs: Any + ) -> MutableMapping[str, Any]: """ Invoke a function on all instantiated config objects this RootConfig is configured to use. @@ -364,20 +333,23 @@ def invoke_all(self, func_name: str, *args, **kwargs) -> MutableMapping[str, Any func_name: Name of function to invoke *args **kwargs + Returns: ordered dictionary of config section name and the result of the function from it. """ res = OrderedDict() - for name, config in self._configs.items(): + for config_class in self.config_classes: + config = getattr(self, config_class.section) + if hasattr(config, func_name): - res[name] = getattr(config, func_name)(*args, **kwargs) + res[config_class.section] = getattr(config, func_name)(*args, **kwargs) return res @classmethod - def invoke_all_static(cls, func_name: str, *args, **kwargs): + def invoke_all_static(cls, func_name: str, *args: Any, **kwargs: any) -> None: """ Invoke a static function on config objects this RootConfig is configured to use. @@ -386,6 +358,7 @@ def invoke_all_static(cls, func_name: str, *args, **kwargs): func_name: Name of function to invoke *args **kwargs + Returns: ordered dictionary of config section name and the result of the function from it. @@ -396,16 +369,16 @@ def invoke_all_static(cls, func_name: str, *args, **kwargs): def generate_config( self, - config_dir_path, - data_dir_path, - server_name, - generate_secrets=False, - report_stats=None, - open_private_ports=False, - listeners=None, - tls_certificate_path=None, - tls_private_key_path=None, - ): + config_dir_path: str, + data_dir_path: str, + server_name: str, + generate_secrets: bool = False, + report_stats: Optional[bool] = None, + open_private_ports: bool = False, + listeners: Optional[List[dict]] = None, + tls_certificate_path: Optional[str] = None, + tls_private_key_path: Optional[str] = None, + ) -> str: """ Build a default configuration file @@ -413,27 +386,27 @@ def generate_config( (eg with --generate_config). Args: - config_dir_path (str): The path where the config files are kept. Used to + config_dir_path: The path where the config files are kept. Used to create filenames for things like the log config and the signing key. - data_dir_path (str): The path where the data files are kept. Used to create + data_dir_path: The path where the data files are kept. Used to create filenames for things like the database and media store. - server_name (str): The server name. Used to initialise the server_name + server_name: The server name. Used to initialise the server_name config param, but also used in the names of some of the config files. - generate_secrets (bool): True if we should generate new secrets for things + generate_secrets: True if we should generate new secrets for things like the macaroon_secret_key. If False, these parameters will be left unset. - report_stats (bool|None): Initial setting for the report_stats setting. + report_stats: Initial setting for the report_stats setting. If None, report_stats will be left unset. - open_private_ports (bool): True to leave private ports (such as the non-TLS + open_private_ports: True to leave private ports (such as the non-TLS HTTP listener) open to the internet. - listeners (list(dict)|None): A list of descriptions of the listeners - synapse should start with each of which specifies a port (str), a list of + listeners: A list of descriptions of the listeners synapse should + start with each of which specifies a port (int), a list of resources (list(str)), tls (bool) and type (str). For example: [{ "port": 8448, @@ -448,16 +421,12 @@ def generate_config( "type": "http", }], + tls_certificate_path: The path to the tls certificate. - database (str|None): The database type to configure, either `psycog2` - or `sqlite3`. - - tls_certificate_path (str|None): The path to the tls certificate. - - tls_private_key_path (str|None): The path to the tls private key. + tls_private_key_path: The path to the tls private key. Returns: - str: the yaml config file + The yaml config file """ return CONFIG_FILE_HEADER + "\n\n".join( @@ -477,12 +446,15 @@ def generate_config( ) @classmethod - def load_config(cls, description, argv): + def load_config( + cls: Type[TRootConfig], description: str, argv: List[str] + ) -> TRootConfig: """Parse the commandline and config files Doesn't support config-file-generation: used by the worker apps. - Returns: Config object. + Returns: + Config object. """ config_parser = argparse.ArgumentParser(description=description) cls.add_arguments_to_parser(config_parser) @@ -491,7 +463,7 @@ def load_config(cls, description, argv): return obj @classmethod - def add_arguments_to_parser(cls, config_parser): + def add_arguments_to_parser(cls, config_parser: argparse.ArgumentParser) -> None: """Adds all the config flags to an ArgumentParser. Doesn't support config-file-generation: used by the worker apps. @@ -499,7 +471,7 @@ def add_arguments_to_parser(cls, config_parser): Used for workers where we want to add extra flags/subcommands. Args: - config_parser (ArgumentParser): App description + config_parser: App description """ config_parser.add_argument( @@ -522,7 +494,9 @@ def add_arguments_to_parser(cls, config_parser): cls.invoke_all_static("add_arguments", config_parser) @classmethod - def load_config_with_parser(cls, parser, argv): + def load_config_with_parser( + cls: Type[TRootConfig], parser: argparse.ArgumentParser, argv: List[str] + ) -> Tuple[TRootConfig, argparse.Namespace]: """Parse the commandline and config files with the given parser Doesn't support config-file-generation: used by the worker apps. @@ -530,13 +504,12 @@ def load_config_with_parser(cls, parser, argv): Used for workers where we want to add extra flags/subcommands. Args: - parser (ArgumentParser) - argv (list[str]) + parser + argv Returns: - tuple[HomeServerConfig, argparse.Namespace]: Returns the parsed - config object and the parsed argparse.Namespace object from - `parser.parse_args(..)` + Returns the parsed config object and the parsed argparse.Namespace + object from parser.parse_args(..)` """ obj = cls() @@ -565,12 +538,15 @@ def load_config_with_parser(cls, parser, argv): return obj, config_args @classmethod - def load_or_generate_config(cls, description, argv): + def load_or_generate_config( + cls: Type[TRootConfig], description: str, argv: List[str] + ) -> Optional[TRootConfig]: """Parse the commandline and config files Supports generation of config files, so is used for the main homeserver app. - Returns: Config object, or None if --generate-config or --generate-keys was set + Returns: + Config object, or None if --generate-config or --generate-keys was set """ parser = argparse.ArgumentParser(description=description) parser.add_argument( @@ -686,8 +662,7 @@ def load_or_generate_config(cls, description, argv): open_private_ports=config_args.open_private_ports, ) - if not path_exists(config_dir_path): - os.makedirs(config_dir_path) + os.makedirs(config_dir_path, exist_ok=True) with open(config_path, "w") as config_file: config_file.write(config_str) config_file.write("\n\n# vim:ft=yaml") @@ -726,16 +701,21 @@ def load_or_generate_config(cls, description, argv): return obj - def parse_config_dict(self, config_dict, config_dir_path=None, data_dir_path=None): + def parse_config_dict( + self, + config_dict: Dict[str, Any], + config_dir_path: Optional[str] = None, + data_dir_path: Optional[str] = None, + ) -> None: """Read the information from the config dict into this Config object. Args: - config_dict (dict): Configuration data, as read from the yaml + config_dict: Configuration data, as read from the yaml - config_dir_path (str): The path where the config files are kept. Used to + config_dir_path: The path where the config files are kept. Used to create filenames for things like the log config and the signing key. - data_dir_path (str): The path where the data files are kept. Used to create + data_dir_path: The path where the data files are kept. Used to create filenames for things like the database and media store. """ self.invoke_all( @@ -745,17 +725,20 @@ def parse_config_dict(self, config_dict, config_dir_path=None, data_dir_path=Non data_dir_path=data_dir_path, ) - def generate_missing_files(self, config_dict, config_dir_path): + def generate_missing_files( + self, config_dict: Dict[str, Any], config_dir_path: str + ) -> None: self.invoke_all("generate_files", config_dict, config_dir_path) -def read_config_files(config_files): +def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]: """Read the config files into a dict Args: - config_files (iterable[str]): A list of the config files to read + config_files: A list of the config files to read - Returns: dict + Returns: + The configuration dictionary. """ specified_config = {} for config_file in config_files: @@ -779,17 +762,17 @@ def read_config_files(config_files): return specified_config -def find_config_files(search_paths): +def find_config_files(search_paths: List[str]) -> List[str]: """Finds config files using a list of search paths. If a path is a file then that file path is added to the list. If a search path is a directory then all the "*.yaml" files in that directory are added to the list in sorted order. Args: - search_paths(list(str)): A list of paths to search. + search_paths: A list of paths to search. Returns: - list(str): A list of file paths. + A list of file paths. """ config_files = [] @@ -823,7 +806,7 @@ def find_config_files(search_paths): return config_files -@attr.s +@attr.s(auto_attribs=True) class ShardedWorkerHandlingConfig: """Algorithm for choosing which instance is responsible for handling some sharded work. @@ -833,7 +816,7 @@ class ShardedWorkerHandlingConfig: below). """ - instances = attr.ib(type=List[str]) + instances: List[str] def should_handle(self, instance_name: str, key: str) -> bool: """Whether this instance is responsible for handling the given key.""" diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index 06fbd1166b88..1eb5f5a68cf8 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -1,4 +1,18 @@ -from typing import Any, Iterable, List, Optional +import argparse +from typing import ( + Any, + Dict, + Iterable, + List, + MutableMapping, + Optional, + Tuple, + Type, + TypeVar, + Union, +) + +import jinja2 from synapse.config import ( account_validity, @@ -19,6 +33,7 @@ from synapse.config import ( logger, metrics, modules, + oembed, oidc, password_auth_providers, push, @@ -26,6 +41,8 @@ from synapse.config import ( redis, registration, repository, + retention, + room, room_directory, saml2, server, @@ -50,7 +67,9 @@ MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str MISSING_REPORT_STATS_SPIEL: str MISSING_SERVER_NAME: str -def path_exists(file_path: str): ... +def path_exists(file_path: str) -> bool: ... + +TRootConfig = TypeVar("TRootConfig", bound="RootConfig") class RootConfig: server: server.ServerConfig @@ -60,6 +79,7 @@ class RootConfig: logging: logger.LoggingConfig ratelimiting: ratelimiting.RatelimitConfig media: repository.ContentRepositoryConfig + oembed: oembed.OembedConfig captcha: captcha.CaptchaConfig voip: voip.VoipConfig registration: registration.RegistrationConfig @@ -79,6 +99,7 @@ class RootConfig: authproviders: password_auth_providers.PasswordAuthProviderConfig push: push.PushConfig spamchecker: spam_checker.SpamCheckerConfig + room: room.RoomConfig groups: groups.GroupsConfig userdirectory: user_directory.UserDirectoryConfig consent: consent.ConsentConfig @@ -86,71 +107,85 @@ class RootConfig: servernotices: server_notices.ServerNoticesConfig roomdirectory: room_directory.RoomDirectoryConfig thirdpartyrules: third_party_event_rules.ThirdPartyRulesConfig - tracer: tracer.TracerConfig + tracing: tracer.TracerConfig redis: redis.RedisConfig modules: modules.ModulesConfig caches: cache.CacheConfig federation: federation.FederationConfig + retention: retention.RetentionConfig - config_classes: List = ... + config_classes: List[Type["Config"]] = ... def __init__(self) -> None: ... - def invoke_all(self, func_name: str, *args: Any, **kwargs: Any): ... + def invoke_all( + self, func_name: str, *args: Any, **kwargs: Any + ) -> MutableMapping[str, Any]: ... @classmethod def invoke_all_static(cls, func_name: str, *args: Any, **kwargs: Any) -> None: ... - def __getattr__(self, item: str): ... def parse_config_dict( self, - config_dict: Any, - config_dir_path: Optional[Any] = ..., - data_dir_path: Optional[Any] = ..., + config_dict: Dict[str, Any], + config_dir_path: Optional[str] = ..., + data_dir_path: Optional[str] = ..., ) -> None: ... - read_config: Any = ... def generate_config( self, config_dir_path: str, data_dir_path: str, server_name: str, generate_secrets: bool = ..., - report_stats: Optional[str] = ..., + report_stats: Optional[bool] = ..., open_private_ports: bool = ..., listeners: Optional[Any] = ..., - database_conf: Optional[Any] = ..., tls_certificate_path: Optional[str] = ..., tls_private_key_path: Optional[str] = ..., - ): ... + ) -> str: ... @classmethod - def load_or_generate_config(cls, description: Any, argv: Any): ... + def load_or_generate_config( + cls: Type[TRootConfig], description: str, argv: List[str] + ) -> Optional[TRootConfig]: ... @classmethod - def load_config(cls, description: Any, argv: Any): ... + def load_config( + cls: Type[TRootConfig], description: str, argv: List[str] + ) -> TRootConfig: ... @classmethod - def add_arguments_to_parser(cls, config_parser: Any) -> None: ... + def add_arguments_to_parser( + cls, config_parser: argparse.ArgumentParser + ) -> None: ... @classmethod - def load_config_with_parser(cls, parser: Any, argv: Any): ... + def load_config_with_parser( + cls: Type[TRootConfig], parser: argparse.ArgumentParser, argv: List[str] + ) -> Tuple[TRootConfig, argparse.Namespace]: ... def generate_missing_files( self, config_dict: dict, config_dir_path: str ) -> None: ... class Config: root: RootConfig + default_template_dir: str def __init__(self, root_config: Optional[RootConfig] = ...) -> None: ... - def __getattr__(self, item: str, from_root: bool = ...): ... @staticmethod - def parse_size(value: Any): ... + def parse_size(value: Union[str, int]) -> int: ... @staticmethod - def parse_duration(value: Any): ... + def parse_duration(value: Union[str, int]) -> int: ... @staticmethod - def abspath(file_path: Optional[str]): ... + def abspath(file_path: Optional[str]) -> str: ... @classmethod - def path_exists(cls, file_path: str): ... + def path_exists(cls, file_path: str) -> bool: ... @classmethod - def check_file(cls, file_path: str, config_name: str): ... + def check_file(cls, file_path: str, config_name: str) -> str: ... @classmethod - def ensure_directory(cls, dir_path: str): ... + def ensure_directory(cls, dir_path: str) -> str: ... @classmethod - def read_file(cls, file_path: str, config_name: str): ... + def read_file(cls, file_path: str, config_name: str) -> str: ... + def read_template(self, filenames: str) -> jinja2.Template: ... + def read_templates( + self, + filenames: List[str], + custom_template_directories: Optional[Iterable[str]] = None, + ) -> List[jinja2.Template]: ... -def read_config_files(config_files: List[str]): ... -def find_config_files(search_paths: List[str]): ... +def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]: ... +def find_config_files(search_paths: List[str]) -> List[str]: ... class ShardedWorkerHandlingConfig: instances: List[str] diff --git a/synapse/config/account_validity.py b/synapse/config/account_validity.py index 6be4eafe5582..c533452cabb5 100644 --- a/synapse/config/account_validity.py +++ b/synapse/config/account_validity.py @@ -11,8 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging + from synapse.config._base import Config, ConfigError +logger = logging.getLogger(__name__) + +LEGACY_TEMPLATE_DIR_WARNING = """ +This server's configuration file is using the deprecated 'template_dir' setting in the +'account_validity' section. Support for this setting has been deprecated and will be +removed in a future version of Synapse. Server admins should instead use the new +'custom_templates_directory' setting documented here: +https://matrix-org.github.io/synapse/latest/templates.html +---------------------------------------------------------------------------------------""" + class AccountValidityConfig(Config): section = "account_validity" @@ -63,12 +75,10 @@ def read_config(self, config, **kwargs): self.account_validity_period * 10.0 / 100.0 ) - if self.account_validity_renew_by_email_enabled: - if not self.public_baseurl: - raise ConfigError("Can't send renewal emails without 'public_baseurl'") - # Load account validity templates. account_validity_template_dir = account_validity_config.get("template_dir") + if account_validity_template_dir is not None: + logger.warning(LEGACY_TEMPLATE_DIR_WARNING) account_renewed_template_filename = account_validity_config.get( "account_renewed_html_path", "account_renewed.html" @@ -78,6 +88,11 @@ def read_config(self, config, **kwargs): ) # Read and store template content + custom_template_directories = ( + self.root.server.custom_template_directory, + account_validity_template_dir, + ) + ( self.account_validity_account_renewed_template, self.account_validity_account_previously_renewed_template, @@ -88,5 +103,5 @@ def read_config(self, config, **kwargs): "account_previously_renewed.html", invalid_token_template_filename, ], - account_validity_template_dir, + (td for td in custom_template_directories if td), ) diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index 1ebea88db2a2..e4bb7224a410 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -1,4 +1,5 @@ # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,14 +14,14 @@ # limitations under the License. import logging -from typing import Dict +from typing import Dict, List from urllib import parse as urlparse import yaml from netaddr import IPSet from synapse.appservice import ApplicationService -from synapse.types import UserID +from synapse.types import JsonDict, UserID from ._base import Config, ConfigError @@ -30,12 +31,12 @@ class AppServiceConfig(Config): section = "appservice" - def read_config(self, config, **kwargs): + def read_config(self, config, **kwargs) -> None: self.app_service_config_files = config.get("app_service_config_files", []) self.notify_appservices = config.get("notify_appservices", True) self.track_appservice_user_ips = config.get("track_appservice_user_ips", False) - def generate_config_section(cls, **kwargs): + def generate_config_section(cls, **kwargs) -> str: return """\ # A list of application service config files to use # @@ -50,7 +51,9 @@ def generate_config_section(cls, **kwargs): """ -def load_appservices(hostname, config_files): +def load_appservices( + hostname: str, config_files: List[str] +) -> List[ApplicationService]: """Returns a list of Application Services from the config files.""" if not isinstance(config_files, list): logger.warning("Expected %s to be a list of AS config files.", config_files) @@ -93,7 +96,9 @@ def load_appservices(hostname, config_files): return appservices -def _load_appservice(hostname, as_info, config_filename): +def _load_appservice( + hostname: str, as_info: JsonDict, config_filename: str +) -> ApplicationService: required_string_fields = ["id", "as_token", "hs_token", "sender_localpart"] for field in required_string_fields: if not isinstance(as_info.get(field), str): @@ -115,9 +120,9 @@ def _load_appservice(hostname, as_info, config_filename): user_id = user.to_string() # Rate limiting for users of this AS is on by default (excludes sender) - rate_limited = True - if isinstance(as_info.get("rate_limited"), bool): - rate_limited = as_info.get("rate_limited") + rate_limited = as_info.get("rate_limited") + if not isinstance(rate_limited, bool): + rate_limited = True # namespace checks if not isinstance(as_info.get("namespaces"), dict): diff --git a/synapse/config/auth.py b/synapse/config/auth.py index 53809cee2ecd..ba8bf9cbe7d4 100644 --- a/synapse/config/auth.py +++ b/synapse/config/auth.py @@ -88,7 +88,7 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): # #require_lowercase: true - # Whether a password must contain at least one lowercase letter. + # Whether a password must contain at least one uppercase letter. # Defaults to 'false'. # #require_uppercase: true diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 8d5f38b5d934..d9d85f98e155 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -1,4 +1,4 @@ -# Copyright 2019 Matrix.org Foundation C.I.C. +# Copyright 2019-2021 Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,7 +15,9 @@ import os import re import threading -from typing import Callable, Dict +from typing import Callable, Dict, Optional + +import attr from synapse.python_dependencies import DependencyException, check_requirements @@ -34,13 +36,13 @@ _DEFAULT_EVENT_CACHE_SIZE = "10K" +@attr.s(slots=True, auto_attribs=True) class CacheProperties: - def __init__(self): - # The default factor size for all caches - self.default_factor_size = float( - os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE) - ) - self.resize_all_caches_func = None + # The default factor size for all caches + default_factor_size: float = float( + os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE) + ) + resize_all_caches_func: Optional[Callable[[], None]] = None properties = CacheProperties() @@ -62,7 +64,7 @@ def _canonicalise_cache_name(cache_name: str) -> str: def add_resizable_cache( cache_name: str, cache_resize_callback: Callable[[float], None] -): +) -> None: """Register a cache that's size can dynamically change Args: @@ -91,7 +93,7 @@ class CacheConfig(Config): _environ = os.environ @staticmethod - def reset(): + def reset() -> None: """Resets the caches to their defaults. Used for tests.""" properties.default_factor_size = float( os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE) @@ -100,7 +102,7 @@ def reset(): with _CACHES_LOCK: _CACHES.clear() - def generate_config_section(self, **kwargs): + def generate_config_section(self, **kwargs) -> str: return """\ ## Caching ## @@ -151,9 +153,18 @@ def generate_config_section(self, **kwargs): # entries are never evicted based on time. # #expiry_time: 30m + + # Controls how long the results of a /sync request are cached for after + # a successful response is returned. A higher duration can help clients with + # intermittent connections, at the cost of higher memory usage. + # + # By default, this is zero, which means that sync responses are not cached + # at all. + # + #sync_response_cache_duration: 2m """ - def read_config(self, config, **kwargs): + def read_config(self, config, **kwargs) -> None: self.event_cache_size = self.parse_size( config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE) ) @@ -208,10 +219,14 @@ def read_config(self, config, **kwargs): expiry_time = cache_config.get("expiry_time") if expiry_time: - self.expiry_time_msec = self.parse_duration(expiry_time) + self.expiry_time_msec: Optional[int] = self.parse_duration(expiry_time) else: self.expiry_time_msec = None + self.sync_response_cache_duration = self.parse_duration( + cache_config.get("sync_response_cache_duration", 0) + ) + # Resize all caches (if necessary) with the new factors we've loaded self.resize_all_caches() @@ -219,7 +234,7 @@ def read_config(self, config, **kwargs): # needing an instance of Config properties.resize_all_caches_func = self.resize_all_caches - def resize_all_caches(self): + def resize_all_caches(self) -> None: """Ensure all cache sizes are up to date For each cache, run the mapped callback function with either diff --git a/synapse/config/cas.py b/synapse/config/cas.py index 901f4123e187..6f2754092e76 100644 --- a/synapse/config/cas.py +++ b/synapse/config/cas.py @@ -1,4 +1,5 @@ # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,7 +17,7 @@ from synapse.config.sso import SsoAttributeRequirement -from ._base import Config, ConfigError +from ._base import Config from ._util import validate_config @@ -28,21 +29,17 @@ class CasConfig(Config): section = "cas" - def read_config(self, config, **kwargs): + def read_config(self, config, **kwargs) -> None: cas_config = config.get("cas_config", None) self.cas_enabled = cas_config and cas_config.get("enabled", True) if self.cas_enabled: self.cas_server_url = cas_config["server_url"] - # The public baseurl is required because it is used by the redirect - # template. - public_baseurl = self.public_baseurl - if not public_baseurl: - raise ConfigError("cas_config requires a public_baseurl to be set") - # TODO Update this to a _synapse URL. + public_baseurl = self.root.server.public_baseurl self.cas_service_url = public_baseurl + "_matrix/client/r0/login/cas/ticket" + self.cas_displayname_attribute = cas_config.get("displayname_attribute") required_attributes = cas_config.get("required_attributes") or {} self.cas_required_attributes = _parsed_required_attributes_def( @@ -55,7 +52,7 @@ def read_config(self, config, **kwargs): self.cas_displayname_attribute = None self.cas_required_attributes = [] - def generate_config_section(self, config_dir_path, server_name, **kwargs): + def generate_config_section(self, config_dir_path, server_name, **kwargs) -> str: return """\ # Enable Central Authentication Service (CAS) for registration and login. # diff --git a/synapse/config/consent.py b/synapse/config/consent.py index b05a9bd97f3b..ecc43b08b9fe 100644 --- a/synapse/config/consent.py +++ b/synapse/config/consent.py @@ -13,6 +13,7 @@ # limitations under the License. from os import path +from typing import Optional from synapse.config import ConfigError @@ -78,8 +79,8 @@ class ConsentConfig(Config): def __init__(self, *args): super().__init__(*args) - self.user_consent_version = None - self.user_consent_template_dir = None + self.user_consent_version: Optional[str] = None + self.user_consent_template_dir: Optional[str] = None self.user_consent_server_notice_content = None self.user_consent_server_notice_to_guests = False self.block_events_without_consent_error = None @@ -94,7 +95,9 @@ def read_config(self, config, **kwargs): return self.user_consent_version = str(consent_config["version"]) self.user_consent_template_dir = self.abspath(consent_config["template_dir"]) - if not path.isdir(self.user_consent_template_dir): + if not isinstance(self.user_consent_template_dir, str) or not path.isdir( + self.user_consent_template_dir + ): raise ConfigError( "Could not find template directory '%s'" % (self.user_consent_template_dir,) diff --git a/synapse/config/database.py b/synapse/config/database.py index 651e31b57621..06ccf15cd9f8 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -1,5 +1,5 @@ # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2020 The Matrix.org Foundation C.I.C. +# Copyright 2020-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import argparse import logging import os @@ -119,7 +120,7 @@ def __init__(self, *args, **kwargs): self.databases = [] - def read_config(self, config, **kwargs): + def read_config(self, config, **kwargs) -> None: # We *experimentally* support specifying multiple databases via the # `databases` key. This is a map from a label to database config in the # same format as the `database` config option, plus an extra @@ -163,12 +164,12 @@ def read_config(self, config, **kwargs): self.databases = [DatabaseConnectionConfig("master", database_config)] self.set_databasepath(database_path) - def generate_config_section(self, data_dir_path, **kwargs): + def generate_config_section(self, data_dir_path, **kwargs) -> str: return DEFAULT_CONFIG % { "database_path": os.path.join(data_dir_path, "homeserver.db") } - def read_arguments(self, args): + def read_arguments(self, args: argparse.Namespace) -> None: """ Cases for the cli input: - If no databases are configured and no database_path is set, raise. @@ -194,7 +195,7 @@ def read_arguments(self, args): else: logger.warning(NON_SQLITE_DATABASE_PATH_WARNING) - def set_databasepath(self, database_path): + def set_databasepath(self, database_path: str) -> None: if database_path != ":memory:": database_path = self.abspath(database_path) @@ -202,7 +203,7 @@ def set_databasepath(self, database_path): self.databases[0].config["args"]["database"] = database_path @staticmethod - def add_arguments(parser): + def add_arguments(parser: argparse.ArgumentParser) -> None: db_group = parser.add_argument_group("database") db_group.add_argument( "-d", diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index 8d8f166e9bfb..510b647c6343 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -16,14 +16,16 @@ # This file can't be called email.py because if it is, we cannot: import email.utils +import logging import os from enum import Enum -from typing import Optional import attr from ._base import Config, ConfigError +logger = logging.getLogger(__name__) + MISSING_PASSWORD_RESET_CONFIG_ERROR = """\ Password reset emails are enabled on this homeserver due to a partial 'email' block. However, the following required keys are missing: @@ -44,6 +46,14 @@ "email_validation": "[%(server_name)s] Validate your email", } +LEGACY_TEMPLATE_DIR_WARNING = """ +This server's configuration file is using the deprecated 'template_dir' setting in the +'email' section. Support for this setting has been deprecated and will be removed in a +future version of Synapse. Server admins should instead use the new +'custom_templates_directory' setting documented here: +https://matrix-org.github.io/synapse/latest/templates.html +---------------------------------------------------------------------------------------""" + @attr.s(slots=True, frozen=True) class EmailSubjectConfig: @@ -80,6 +90,12 @@ def read_config(self, config, **kwargs): self.require_transport_security = email_config.get( "require_transport_security", False ) + self.enable_smtp_tls = email_config.get("enable_tls", True) + if self.require_transport_security and not self.enable_smtp_tls: + raise ConfigError( + "email.require_transport_security requires email.enable_tls to be true" + ) + if "app_name" in email_config: self.email_app_name = email_config["app_name"] else: @@ -99,6 +115,9 @@ def read_config(self, config, **kwargs): # A user-configurable template directory template_dir = email_config.get("template_dir") + if template_dir is not None: + logger.warning(LEGACY_TEMPLATE_DIR_WARNING) + if isinstance(template_dir, str): # We need an absolute path, because we change directory after starting (and # we don't yet know what auxiliary templates like mail.css we will need). @@ -115,36 +134,17 @@ def read_config(self, config, **kwargs): # msisdn is currently always remote while Synapse does not support any method of # sending SMS messages ThreepidBehaviour.REMOTE - if self.account_threepid_delegate_email + if self.root.registration.account_threepid_delegate_email else ThreepidBehaviour.LOCAL ) - # Prior to Synapse v1.4.0, there was another option that defined whether Synapse would - # use an identity server to password reset tokens on its behalf. We now warn the user - # if they have this set and tell them to use the updated option, while using a default - # identity server in the process. - self.using_identity_server_from_trusted_list = False - if ( - not self.account_threepid_delegate_email - and config.get("trust_identity_server_for_password_resets", False) is True - ): - # Use the first entry in self.trusted_third_party_id_servers instead - if self.trusted_third_party_id_servers: - # XXX: It's a little confusing that account_threepid_delegate_email is modified - # both in RegistrationConfig and here. We should factor this bit out - - first_trusted_identity_server = self.trusted_third_party_id_servers[0] - # trusted_third_party_id_servers does not contain a scheme whereas - # account_threepid_delegate_email is expected to. Presume https - self.account_threepid_delegate_email: Optional[str] = ( - "https://" + first_trusted_identity_server - ) - self.using_identity_server_from_trusted_list = True - else: - raise ConfigError( - "Attempted to use an identity server from" - '"trusted_third_party_id_servers" but it is empty.' - ) + if config.get("trust_identity_server_for_password_resets"): + raise ConfigError( + 'The config option "trust_identity_server_for_password_resets" ' + 'has been replaced by "account_threepid_delegate". ' + "Please consult the sample config at docs/sample_config.yaml for " + "details and update your config file." + ) self.local_threepid_handling_disabled_due_to_email_config = False if ( @@ -167,11 +167,6 @@ def read_config(self, config, **kwargs): if not self.email_notif_from: missing.append("email.notif_from") - # public_baseurl is required to build password reset and validation links that - # will be emailed to users - if config.get("public_baseurl") is None: - missing.append("public_baseurl") - if missing: raise ConfigError( MISSING_PASSWORD_RESET_CONFIG_ERROR % (", ".join(missing),) @@ -251,7 +246,14 @@ def read_config(self, config, **kwargs): registration_template_success_html, add_threepid_template_success_html, ], - template_dir, + ( + td + for td in ( + self.root.server.custom_template_directory, + template_dir, + ) + if td + ), # Filter out template_dir if not provided ) # Render templates that do not contain any placeholders @@ -270,9 +272,6 @@ def read_config(self, config, **kwargs): if not self.email_notif_from: missing.append("email.notif_from") - if config.get("public_baseurl") is None: - missing.append("public_baseurl") - if missing: raise ConfigError( "email.enable_notifs is True but required keys are missing: %s" @@ -291,7 +290,14 @@ def read_config(self, config, **kwargs): self.email_notif_template_text, ) = self.read_templates( [notif_template_html, notif_template_text], - template_dir, + ( + td + for td in ( + self.root.server.custom_template_directory, + template_dir, + ) + if td + ), # Filter out template_dir if not provided ) self.email_notif_for_new_users = email_config.get( @@ -301,7 +307,7 @@ def read_config(self, config, **kwargs): "client_base_url", email_config.get("riot_base_url", None) ) - if self.account_validity_renew_by_email_enabled: + if self.root.account_validity.account_validity_renew_by_email_enabled: expiry_template_html = email_config.get( "expiry_template_html", "notice_expiry.html" ) @@ -314,7 +320,14 @@ def read_config(self, config, **kwargs): self.account_validity_template_text, ) = self.read_templates( [expiry_template_html, expiry_template_text], - template_dir, + ( + td + for td in ( + self.root.server.custom_template_directory, + template_dir, + ) + if td + ), # Filter out template_dir if not provided ) subjects_config = email_config.get("subjects", {}) @@ -346,6 +359,9 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): """\ # Configuration for sending emails from Synapse. # + # Server admins can configure custom templates for email content. See + # https://matrix-org.github.io/synapse/latest/templates.html for more information. + # email: # The hostname of the outgoing SMTP server to use. Defaults to 'localhost'. # @@ -368,6 +384,14 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): # #require_transport_security: true + # Uncomment the following to disable TLS for SMTP. + # + # By default, if the server supports TLS, it will be used, and the server + # must present a certificate that is valid for 'smtp_host'. If this option + # is set to false, TLS will not be used. + # + #enable_tls: false + # notif_from defines the "From" address to use when sending emails. # It must be set if email sending is enabled. # @@ -414,49 +438,6 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): # #invite_client_location: https://app.element.io - # Directory in which Synapse will try to find the template files below. - # If not set, or the files named below are not found within the template - # directory, default templates from within the Synapse package will be used. - # - # Synapse will look for the following templates in this directory: - # - # * The contents of email notifications of missed events: 'notif_mail.html' and - # 'notif_mail.txt'. - # - # * The contents of account expiry notice emails: 'notice_expiry.html' and - # 'notice_expiry.txt'. - # - # * The contents of password reset emails sent by the homeserver: - # 'password_reset.html' and 'password_reset.txt' - # - # * An HTML page that a user will see when they follow the link in the password - # reset email. The user will be asked to confirm the action before their - # password is reset: 'password_reset_confirmation.html' - # - # * HTML pages for success and failure that a user will see when they confirm - # the password reset flow using the page above: 'password_reset_success.html' - # and 'password_reset_failure.html' - # - # * The contents of address verification emails sent during registration: - # 'registration.html' and 'registration.txt' - # - # * HTML pages for success and failure that a user will see when they follow - # the link in an address verification email sent during registration: - # 'registration_success.html' and 'registration_failure.html' - # - # * The contents of address verification emails sent when an address is added - # to a Matrix account: 'add_threepid.html' and 'add_threepid.txt' - # - # * HTML pages for success and failure that a user will see when they follow - # the link in an address verification email sent when an address is added - # to a Matrix account: 'add_threepid_success.html' and - # 'add_threepid_failure.html' - # - # You can see the default templates at: - # https://github.com/matrix-org/synapse/tree/master/synapse/res/templates - # - #template_dir: "res/templates" - # Subjects to use when sending emails from Synapse. # # The placeholder '%%(app)s' will be replaced with the value of the 'app_name' diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 4c60ee8c2859..d78a15097c87 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -24,8 +24,10 @@ class ExperimentalConfig(Config): def read_config(self, config: JsonDict, **kwargs): experimental = config.get("experimental_features") or {} - # MSC2858 (multiple SSO identity providers) - self.msc2858_enabled: bool = experimental.get("msc2858_enabled", False) + # Whether to enable experimental MSC1849 (aka relations) support + self.msc1849_enabled = config.get("experimental_msc1849_support_enabled", True) + # MSC3440 (thread relation) + self.msc3440_enabled: bool = experimental.get("msc3440_enabled", False) # MSC3026 (busy presence state) self.msc3026_enabled: bool = experimental.get("msc3026_enabled", False) @@ -37,4 +39,13 @@ def read_config(self, config: JsonDict, **kwargs): self.msc2285_enabled: bool = experimental.get("msc2285_enabled", False) # MSC3244 (room version capabilities) - self.msc3244_enabled: bool = experimental.get("msc3244_enabled", False) + self.msc3244_enabled: bool = experimental.get("msc3244_enabled", True) + + # MSC3283 (set displayname, avatar_url and change 3pid capabilities) + self.msc3283_enabled: bool = experimental.get("msc3283_enabled", False) + + # MSC3266 (room summary api) + self.msc3266_enabled: bool = experimental.get("msc3266_enabled", False) + + # MSC3030 (Jump to date API endpoint) + self.msc3030_enabled: bool = experimental.get("msc3030_enabled", False) diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 1f42a51857c6..001605c265fb 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -30,6 +30,7 @@ from .logger import LoggingConfig from .metrics import MetricsConfig from .modules import ModulesConfig +from .oembed import OembedConfig from .oidc import OIDCConfig from .password_auth_providers import PasswordAuthProviderConfig from .push import PushConfig @@ -37,6 +38,7 @@ from .redis import RedisConfig from .registration import RegistrationConfig from .repository import ContentRepositoryConfig +from .retention import RetentionConfig from .room import RoomConfig from .room_directory import RoomDirectoryConfig from .saml2 import SAML2Config @@ -58,6 +60,7 @@ class HomeServerConfig(RootConfig): config_classes = [ ModulesConfig, ServerConfig, + RetentionConfig, TlsConfig, FederationConfig, CacheConfig, @@ -65,6 +68,7 @@ class HomeServerConfig(RootConfig): LoggingConfig, RatelimitConfig, ContentRepositoryConfig, + OembedConfig, CaptchaConfig, VoipConfig, RegistrationConfig, diff --git a/synapse/config/jwt.py b/synapse/config/jwt.py index 9d295f5856e6..24c3ef01fc4c 100644 --- a/synapse/config/jwt.py +++ b/synapse/config/jwt.py @@ -31,6 +31,8 @@ def read_config(self, config, **kwargs): self.jwt_secret = jwt_config["secret"] self.jwt_algorithm = jwt_config["algorithm"] + self.jwt_subject_claim = jwt_config.get("subject_claim", "sub") + # The issuer and audiences are optional, if provided, it is asserted # that the claims exist on the JWT. self.jwt_issuer = jwt_config.get("issuer") @@ -46,6 +48,7 @@ def read_config(self, config, **kwargs): self.jwt_enabled = False self.jwt_secret = None self.jwt_algorithm = None + self.jwt_subject_claim = None self.jwt_issuer = None self.jwt_audiences = None @@ -88,6 +91,12 @@ def generate_config_section(self, **kwargs): # #algorithm: "provided-by-your-issuer" + # Name of the claim containing a unique identifier for the user. + # + # Optional, defaults to `sub`. + # + #subject_claim: "sub" + # The issuer to validate the "iss" claim against. # # Optional, if provided the "iss" claim will be required and diff --git a/synapse/config/key.py b/synapse/config/key.py index 94a9063043a2..035ee2416bd6 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -16,6 +16,7 @@ import hashlib import logging import os +from typing import Any, Dict import attr import jsonschema @@ -145,11 +146,13 @@ def read_config(self, config, config_dir_path, **kwargs): # list of TrustedKeyServer objects self.key_servers = list( - _parse_key_servers(key_servers, self.federation_verify_certificates) + _parse_key_servers( + key_servers, self.root.tls.federation_verify_certificates + ) ) self.macaroon_secret_key = config.get( - "macaroon_secret_key", self.registration_shared_secret + "macaroon_secret_key", self.root.registration.registration_shared_secret ) if not self.macaroon_secret_key: @@ -310,7 +313,7 @@ def read_old_signing_keys(self, old_signing_keys): ) return keys - def generate_files(self, config, config_dir_path): + def generate_files(self, config: Dict[str, Any], config_dir_path: str) -> None: if "signing_key" in config: return diff --git a/synapse/config/logger.py b/synapse/config/logger.py index ad4e6e61c3bf..ea69b9bd9b50 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -1,4 +1,5 @@ # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +19,7 @@ import sys import threading from string import Template +from typing import TYPE_CHECKING, Any, Dict, Optional import yaml from zope.interface import implementer @@ -38,6 +40,10 @@ from ._base import Config, ConfigError +if TYPE_CHECKING: + from synapse.config.homeserver import HomeServerConfig + from synapse.server import HomeServer + DEFAULT_LOG_CONFIG = Template( """\ # Log configuration for Synapse. @@ -67,18 +73,31 @@ backupCount: 3 # Does not include the current log file. encoding: utf8 - # Default to buffering writes to log file for efficiency. This means that - # will be a delay for INFO/DEBUG logs to get written, but WARNING/ERROR - # logs will still be flushed immediately. + # Default to buffering writes to log file for efficiency. + # WARNING/ERROR logs will still be flushed immediately, but there will be a + # delay (of up to `period` seconds, or until the buffer is full with + # `capacity` messages) before INFO/DEBUG logs get written. buffer: - class: logging.handlers.MemoryHandler + class: synapse.logging.handlers.PeriodicallyFlushingMemoryHandler target: file - # The capacity is the number of log lines that are buffered before - # being written to disk. Increasing this will lead to better + + # The capacity is the maximum number of log lines that are buffered + # before being written to disk. Increasing this will lead to better # performance, at the expensive of it taking longer for log lines to # be written to disk. + # This parameter is required. capacity: 10 - flushLevel: 30 # Flush for WARNING logs as well + + # Logs with a level at or above the flush level will cause the buffer to + # be flushed immediately. + # Default value: 40 (ERROR) + # Other values: 50 (CRITICAL), 30 (WARNING), 20 (INFO), 10 (DEBUG) + flushLevel: 30 # Flush immediately for WARNING logs and higher + + # The period of time, in seconds, between forced flushes. + # Messages will not be delayed for longer than this time. + # Default value: 5 seconds + period: 5 # A handler that writes logs to stderr. Unused by default, but can be used # instead of "buffer" and "file" in the logger handlers. @@ -124,13 +143,13 @@ class LoggingConfig(Config): section = "logging" - def read_config(self, config, **kwargs): + def read_config(self, config, **kwargs) -> None: if config.get("log_file"): raise ConfigError(LOG_FILE_ERROR) self.log_config = self.abspath(config.get("log_config")) self.no_redirect_stdio = config.get("no_redirect_stdio", False) - def generate_config_section(self, config_dir_path, server_name, **kwargs): + def generate_config_section(self, config_dir_path, server_name, **kwargs) -> str: log_config = os.path.join(config_dir_path, server_name + ".log.config") return ( """\ @@ -144,14 +163,14 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): % locals() ) - def read_arguments(self, args): + def read_arguments(self, args: argparse.Namespace) -> None: if args.no_redirect_stdio is not None: self.no_redirect_stdio = args.no_redirect_stdio if args.log_file is not None: raise ConfigError(LOG_FILE_ERROR) @staticmethod - def add_arguments(parser): + def add_arguments(parser: argparse.ArgumentParser) -> None: logging_group = parser.add_argument_group("logging") logging_group.add_argument( "-n", @@ -168,7 +187,7 @@ def add_arguments(parser): help=argparse.SUPPRESS, ) - def generate_files(self, config, config_dir_path): + def generate_files(self, config: Dict[str, Any], config_dir_path: str) -> None: log_config = config.get("log_config") if log_config and not os.path.exists(log_config): log_file = self.abspath("homeserver.log") @@ -180,7 +199,9 @@ def generate_files(self, config, config_dir_path): log_config_file.write(DEFAULT_LOG_CONFIG.substitute(log_file=log_file)) -def _setup_stdlib_logging(config, log_config_path, logBeginner: LogBeginner) -> None: +def _setup_stdlib_logging( + config: "HomeServerConfig", log_config_path: Optional[str], logBeginner: LogBeginner +) -> None: """ Set up Python standard library logging. """ @@ -210,10 +231,10 @@ def _setup_stdlib_logging(config, log_config_path, logBeginner: LogBeginner) -> # writes. log_context_filter = LoggingContextFilter() - log_metadata_filter = MetadataFilter({"server_name": config.server_name}) + log_metadata_filter = MetadataFilter({"server_name": config.server.server_name}) old_factory = logging.getLogRecordFactory() - def factory(*args, **kwargs): + def factory(*args: Any, **kwargs: Any) -> logging.LogRecord: record = old_factory(*args, **kwargs) log_context_filter.filter(record) log_metadata_filter.filter(record) @@ -280,7 +301,7 @@ def _load_logging_config(log_config_path: str) -> None: logging.config.dictConfig(log_config) -def _reload_logging_config(log_config_path): +def _reload_logging_config(log_config_path: Optional[str]) -> None: """ Reload the log configuration from the file and apply it. """ @@ -293,7 +314,10 @@ def _reload_logging_config(log_config_path): def setup_logging( - hs, config, use_worker_options=False, logBeginner: LogBeginner = globalLogBeginner + hs: "HomeServer", + config: "HomeServerConfig", + use_worker_options: bool = False, + logBeginner: LogBeginner = globalLogBeginner, ) -> None: """ Set up the logging subsystem. @@ -309,7 +333,9 @@ def setup_logging( """ log_config_path = ( - config.worker_log_config if use_worker_options else config.log_config + config.worker.worker_log_config + if use_worker_options + else config.logging.log_config ) # Perform one-time logging configuration. @@ -322,5 +348,5 @@ def setup_logging( # Log immediately so we can grep backwards. logging.warning("***** STARTING SERVER *****") logging.warning("Server %s version %s", sys.argv[0], get_version_string(synapse)) - logging.info("Server hostname: %s", config.server_name) + logging.info("Server hostname: %s", config.server.server_name) logging.info("Instance name: %s", hs.get_instance_name()) diff --git a/synapse/config/oembed.py b/synapse/config/oembed.py new file mode 100644 index 000000000000..ea6ace4729ba --- /dev/null +++ b/synapse/config/oembed.py @@ -0,0 +1,196 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import re +from typing import Any, Dict, Iterable, List, Optional, Pattern +from urllib import parse as urlparse + +import attr +import pkg_resources + +from synapse.types import JsonDict + +from ._base import Config, ConfigError +from ._util import validate_config + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class OEmbedEndpointConfig: + # The API endpoint to fetch. + api_endpoint: str + # The patterns to match. + url_patterns: List[Pattern] + # The supported formats. + formats: Optional[List[str]] + + +class OembedConfig(Config): + """oEmbed Configuration""" + + section = "oembed" + + def read_config(self, config, **kwargs): + oembed_config: Dict[str, Any] = config.get("oembed") or {} + + # A list of patterns which will be used. + self.oembed_patterns: List[OEmbedEndpointConfig] = list( + self._parse_and_validate_providers(oembed_config) + ) + + def _parse_and_validate_providers( + self, oembed_config: dict + ) -> Iterable[OEmbedEndpointConfig]: + """Extract and parse the oEmbed providers from the given JSON file. + + Returns a generator which yields the OidcProviderConfig objects + """ + # Whether to use the packaged providers.json file. + if not oembed_config.get("disable_default_providers") or False: + providers = json.load( + pkg_resources.resource_stream("synapse", "res/providers.json") + ) + yield from self._parse_and_validate_provider( + providers, config_path=("oembed",) + ) + + # The JSON files which includes additional provider information. + for i, file in enumerate(oembed_config.get("additional_providers") or []): + # TODO Error checking. + with open(file) as f: + providers = json.load(f) + + yield from self._parse_and_validate_provider( + providers, + config_path=( + "oembed", + "additional_providers", + f"", + ), + ) + + def _parse_and_validate_provider( + self, providers: List[JsonDict], config_path: Iterable[str] + ) -> Iterable[OEmbedEndpointConfig]: + # Ensure it is the proper form. + validate_config( + _OEMBED_PROVIDER_SCHEMA, + providers, + config_path=config_path, + ) + + # Parse it and yield each result. + for provider in providers: + # Each provider might have multiple API endpoints, each which + # might have multiple patterns to match. + for endpoint in provider["endpoints"]: + api_endpoint = endpoint["url"] + + # The API endpoint must be an HTTP(S) URL. + results = urlparse.urlparse(api_endpoint) + if results.scheme not in {"http", "https"}: + raise ConfigError( + f"Unsupported oEmbed scheme ({results.scheme}) for endpoint {api_endpoint}", + config_path, + ) + + patterns = [ + self._glob_to_pattern(glob, config_path) + for glob in endpoint["schemes"] + ] + yield OEmbedEndpointConfig( + api_endpoint, patterns, endpoint.get("formats") + ) + + def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern: + """ + Convert the glob into a sane regular expression to match against. The + rules followed will be slightly different for the domain portion vs. + the rest. + + 1. The scheme must be one of HTTP / HTTPS (and have no globs). + 2. The domain can have globs, but we limit it to characters that can + reasonably be a domain part. + TODO: This does not attempt to handle Unicode domain names. + TODO: The domain should not allow wildcard TLDs. + 3. Other parts allow a glob to be any one, or more, characters. + """ + results = urlparse.urlparse(glob) + + # The scheme must be HTTP(S) (and cannot contain wildcards). + if results.scheme not in {"http", "https"}: + raise ConfigError( + f"Unsupported oEmbed scheme ({results.scheme}) for pattern: {glob}", + config_path, + ) + + pattern = urlparse.urlunparse( + [ + results.scheme, + re.escape(results.netloc).replace("\\*", "[a-zA-Z0-9_-]+"), + ] + + [re.escape(part).replace("\\*", ".+") for part in results[2:]] + ) + return re.compile(pattern) + + def generate_config_section(self, **kwargs): + return """\ + # oEmbed allows for easier embedding content from a website. It can be + # used for generating URLs previews of services which support it. + # + oembed: + # A default list of oEmbed providers is included with Synapse. + # + # Uncomment the following to disable using these default oEmbed URLs. + # Defaults to 'false'. + # + #disable_default_providers: true + + # Additional files with oEmbed configuration (each should be in the + # form of providers.json). + # + # By default, this list is empty (so only the default providers.json + # is used). + # + #additional_providers: + # - oembed/my_providers.json + """ + + +_OEMBED_PROVIDER_SCHEMA = { + "type": "array", + "items": { + "type": "object", + "properties": { + "provider_name": {"type": "string"}, + "provider_url": {"type": "string"}, + "endpoints": { + "type": "array", + "items": { + "type": "object", + "properties": { + "schemes": { + "type": "array", + "items": {"type": "string"}, + }, + "url": {"type": "string"}, + "formats": {"type": "array", "items": {"type": "string"}}, + "discovery": {"type": "boolean"}, + }, + "required": ["schemes", "url"], + }, + }, + }, + "required": ["provider_name", "provider_url", "endpoints"], + }, +} diff --git a/synapse/config/oidc.py b/synapse/config/oidc.py index ba89d11cf016..79c400fe30b8 100644 --- a/synapse/config/oidc.py +++ b/synapse/config/oidc.py @@ -14,7 +14,7 @@ # limitations under the License. from collections import Counter -from typing import Collection, Iterable, List, Mapping, Optional, Tuple, Type +from typing import Any, Collection, Iterable, List, Mapping, Optional, Tuple, Type import attr @@ -36,7 +36,7 @@ class OIDCConfig(Config): section = "oidc" - def read_config(self, config, **kwargs): + def read_config(self, config, **kwargs) -> None: self.oidc_providers = tuple(_parse_oidc_provider_configs(config)) if not self.oidc_providers: return @@ -58,9 +58,7 @@ def read_config(self, config, **kwargs): "Multiple OIDC providers have the idp_id %r." % idp_id ) - public_baseurl = self.public_baseurl - if public_baseurl is None: - raise ConfigError("oidc_config requires a public_baseurl to be set") + public_baseurl = self.root.server.public_baseurl self.oidc_callback_url = public_baseurl + "_synapse/client/oidc/callback" @property @@ -68,7 +66,7 @@ def oidc_enabled(self) -> bool: # OIDC is enabled if we have a provider return bool(self.oidc_providers) - def generate_config_section(self, config_dir_path, server_name, **kwargs): + def generate_config_section(self, config_dir_path, server_name, **kwargs) -> str: return """\ # List of OpenID Connect (OIDC) / OAuth 2.0 identity providers, for registration # and login. @@ -277,12 +275,6 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): "maxLength": 255, "pattern": "^[a-z][a-z0-9_.-]*$", }, - "idp_unstable_brand": { - "type": "string", - "minLength": 1, - "maxLength": 255, - "pattern": "^[a-z][a-z0-9_.-]*$", - }, "discover": {"type": "boolean"}, "issuer": {"type": "string"}, "client_id": {"type": "string"}, @@ -483,7 +475,6 @@ def _parse_oidc_config_dict( idp_name=oidc_config.get("idp_name", "OIDC"), idp_icon=idp_icon, idp_brand=oidc_config.get("idp_brand"), - unstable_idp_brand=oidc_config.get("unstable_idp_brand"), discover=oidc_config.get("discover", True), issuer=oidc_config["issuer"], client_id=oidc_config["client_id"], @@ -504,92 +495,89 @@ def _parse_oidc_config_dict( ) -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class OidcProviderClientSecretJwtKey: # a pem-encoded signing key - key = attr.ib(type=str) + key: str # properties to include in the JWT header - jwt_header = attr.ib(type=Mapping[str, str]) + jwt_header: Mapping[str, str] # properties to include in the JWT payload. - jwt_payload = attr.ib(type=Mapping[str, str]) + jwt_payload: Mapping[str, str] -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class OidcProviderConfig: # a unique identifier for this identity provider. Used in the 'user_external_ids' # table, as well as the query/path parameter used in the login protocol. - idp_id = attr.ib(type=str) + idp_id: str # user-facing name for this identity provider. - idp_name = attr.ib(type=str) + idp_name: str # Optional MXC URI for icon for this IdP. - idp_icon = attr.ib(type=Optional[str]) + idp_icon: Optional[str] # Optional brand identifier for this IdP. - idp_brand = attr.ib(type=Optional[str]) - - # Optional brand identifier for the unstable API (see MSC2858). - unstable_idp_brand = attr.ib(type=Optional[str]) + idp_brand: Optional[str] # whether the OIDC discovery mechanism is used to discover endpoints - discover = attr.ib(type=bool) + discover: bool # the OIDC issuer. Used to validate tokens and (if discovery is enabled) to # discover the provider's endpoints. - issuer = attr.ib(type=str) + issuer: str # oauth2 client id to use - client_id = attr.ib(type=str) + client_id: str # oauth2 client secret to use. if `None`, use client_secret_jwt_key to generate # a secret. - client_secret = attr.ib(type=Optional[str]) + client_secret: Optional[str] # key to use to construct a JWT to use as a client secret. May be `None` if # `client_secret` is set. - client_secret_jwt_key = attr.ib(type=Optional[OidcProviderClientSecretJwtKey]) + client_secret_jwt_key: Optional[OidcProviderClientSecretJwtKey] # auth method to use when exchanging the token. # Valid values are 'client_secret_basic', 'client_secret_post' and # 'none'. - client_auth_method = attr.ib(type=str) + client_auth_method: str # list of scopes to request - scopes = attr.ib(type=Collection[str]) + scopes: Collection[str] # the oauth2 authorization endpoint. Required if discovery is disabled. - authorization_endpoint = attr.ib(type=Optional[str]) + authorization_endpoint: Optional[str] # the oauth2 token endpoint. Required if discovery is disabled. - token_endpoint = attr.ib(type=Optional[str]) + token_endpoint: Optional[str] # the OIDC userinfo endpoint. Required if discovery is disabled and the # "openid" scope is not requested. - userinfo_endpoint = attr.ib(type=Optional[str]) + userinfo_endpoint: Optional[str] # URI where to fetch the JWKS. Required if discovery is disabled and the # "openid" scope is used. - jwks_uri = attr.ib(type=Optional[str]) + jwks_uri: Optional[str] # Whether to skip metadata verification - skip_verification = attr.ib(type=bool) + skip_verification: bool # Whether to fetch the user profile from the userinfo endpoint. Valid # values are: "auto" or "userinfo_endpoint". - user_profile_method = attr.ib(type=str) + user_profile_method: str # whether to allow a user logging in via OIDC to match a pre-existing account # instead of failing - allow_existing_users = attr.ib(type=bool) + allow_existing_users: bool # the class of the user mapping provider - user_mapping_provider_class = attr.ib(type=Type) + user_mapping_provider_class: Type # the config of the user mapping provider - user_mapping_provider_config = attr.ib() + user_mapping_provider_config: Any # required attributes to require in userinfo to allow login/registration - attribute_requirements = attr.ib(type=List[SsoAttributeRequirement]) + attribute_requirements: List[SsoAttributeRequirement] diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py index 0f5b2b397754..f980102b45e2 100644 --- a/synapse/config/password_auth_providers.py +++ b/synapse/config/password_auth_providers.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, List +from typing import Any, List, Tuple, Type from synapse.util.module_loader import load_module @@ -25,7 +25,30 @@ class PasswordAuthProviderConfig(Config): section = "authproviders" def read_config(self, config, **kwargs): - self.password_providers: List[Any] = [] + """Parses the old password auth providers config. The config format looks like this: + + password_providers: + # Example config for an LDAP auth provider + - module: "ldap_auth_provider.LdapAuthProvider" + config: + enabled: true + uri: "ldap://ldap.example.com:389" + start_tls: true + base: "ou=users,dc=example,dc=com" + attributes: + uid: "cn" + mail: "email" + name: "givenName" + #bind_dn: + #bind_password: + #filter: "(objectClass=posixAccount)" + + We expect admins to use modules for this feature (which is why it doesn't appear + in the sample config file), but we want to keep support for it around for a bit + for backwards compatibility. + """ + + self.password_providers: List[Tuple[Type, Any]] = [] providers = [] # We want to be backwards compatible with the old `ldap_config` @@ -49,33 +72,3 @@ def read_config(self, config, **kwargs): ) self.password_providers.append((provider_class, provider_config)) - - def generate_config_section(self, **kwargs): - return """\ - # Password providers allow homeserver administrators to integrate - # their Synapse installation with existing authentication methods - # ex. LDAP, external tokens, etc. - # - # For more information and known implementations, please see - # https://matrix-org.github.io/synapse/latest/password_auth_providers.html - # - # Note: instances wishing to use SAML or CAS authentication should - # instead use the `saml2_config` or `cas_config` options, - # respectively. - # - password_providers: - # # Example config for an LDAP auth provider - # - module: "ldap_auth_provider.LdapAuthProvider" - # config: - # enabled: true - # uri: "ldap://ldap.example.com:389" - # start_tls: true - # base: "ou=users,dc=example,dc=com" - # attributes: - # uid: "cn" - # mail: "email" - # name: "givenName" - # #bind_dn: - # #bind_password: - # #filter: "(objectClass=posixAccount)" - """ diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 7a8d5851c40b..36636ab07e40 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -14,6 +14,8 @@ from typing import Dict, Optional +import attr + from ._base import Config @@ -29,18 +31,13 @@ def __init__( self.burst_count = int(config.get("burst_count", defaults["burst_count"])) +@attr.s(auto_attribs=True) class FederationRateLimitConfig: - _items_and_default = { - "window_size": 1000, - "sleep_limit": 10, - "sleep_delay": 500, - "reject_limit": 50, - "concurrent": 3, - } - - def __init__(self, **kwargs): - for i in self._items_and_default.keys(): - setattr(self, i, kwargs.get(i) or self._items_and_default[i]) + window_size: int = 1000 + sleep_limit: int = 10 + sleep_delay: int = 500 + reject_limit: int = 50 + concurrent: int = 3 class RatelimitConfig(Config): @@ -69,16 +66,25 @@ def read_config(self, config, **kwargs): else: self.rc_federation = FederationRateLimitConfig( **{ - "window_size": config.get("federation_rc_window_size"), - "sleep_limit": config.get("federation_rc_sleep_limit"), - "sleep_delay": config.get("federation_rc_sleep_delay"), - "reject_limit": config.get("federation_rc_reject_limit"), - "concurrent": config.get("federation_rc_concurrent"), + k: v + for k, v in { + "window_size": config.get("federation_rc_window_size"), + "sleep_limit": config.get("federation_rc_sleep_limit"), + "sleep_delay": config.get("federation_rc_sleep_delay"), + "reject_limit": config.get("federation_rc_reject_limit"), + "concurrent": config.get("federation_rc_concurrent"), + }.items() + if v is not None } ) self.rc_registration = RateLimitConfig(config.get("rc_registration", {})) + self.rc_registration_token_validity = RateLimitConfig( + config.get("rc_registration_token_validity", {}), + defaults={"per_second": 0.1, "burst_count": 5}, + ) + rc_login_config = config.get("rc_login", {}) self.rc_login_address = RateLimitConfig(rc_login_config.get("address", {})) self.rc_login_account = RateLimitConfig(rc_login_config.get("account", {})) @@ -143,6 +149,8 @@ def generate_config_section(self, **kwargs): # is using # - one for registration that ratelimits registration requests based on the # client's IP address. + # - one for checking the validity of registration tokens that ratelimits + # requests based on the client's IP address. # - one for login that ratelimits login requests based on the client's IP # address. # - one for login that ratelimits login requests based on the account the @@ -171,6 +179,10 @@ def generate_config_section(self, **kwargs): # per_second: 0.17 # burst_count: 3 # + #rc_registration_token_validity: + # per_second: 0.1 + # burst_count: 5 + # #rc_login: # address: # per_second: 0.17 diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 0ad919b1394b..7a059c6decc0 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -1,4 +1,5 @@ # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Optional from synapse.api.constants import RoomCreationPreset from synapse.config._base import Config, ConfigError @@ -33,23 +36,16 @@ def read_config(self, config, **kwargs): self.registrations_require_3pid = config.get("registrations_require_3pid", []) self.allowed_local_3pids = config.get("allowed_local_3pids", []) self.enable_3pid_lookup = config.get("enable_3pid_lookup", True) + self.registration_requires_token = config.get( + "registration_requires_token", False + ) self.registration_shared_secret = config.get("registration_shared_secret") self.bcrypt_rounds = config.get("bcrypt_rounds", 12) - self.trusted_third_party_id_servers = config.get( - "trusted_third_party_id_servers", ["matrix.org", "vector.im"] - ) + account_threepid_delegates = config.get("account_threepid_delegates") or {} self.account_threepid_delegate_email = account_threepid_delegates.get("email") self.account_threepid_delegate_msisdn = account_threepid_delegates.get("msisdn") - if self.account_threepid_delegate_msisdn and not self.public_baseurl: - raise ConfigError( - "The configuration option `public_baseurl` is required if " - "`account_threepid_delegate.msisdn` is set, such that " - "clients know where to submit validation tokens to. Please " - "configure `public_baseurl`." - ) - self.default_identity_server = config.get("default_identity_server") self.allow_guest_access = config.get("allow_guest_access", False) @@ -82,7 +78,7 @@ def read_config(self, config, **kwargs): if mxid_localpart: # Convert the localpart to a full mxid. self.auto_join_user_id = UserID( - mxid_localpart, self.server_name + mxid_localpart, self.root.server.server_name ).to_string() if self.autocreate_auto_join_rooms: @@ -119,26 +115,77 @@ def read_config(self, config, **kwargs): session_lifetime = self.parse_duration(session_lifetime) self.session_lifetime = session_lifetime - # The `access_token_lifetime` applies for tokens that can be renewed - # using a refresh token, as per MSC2918. If it is `None`, the refresh - # token mechanism is disabled. - # - # Since it is incompatible with the `session_lifetime` mechanism, it is set to - # `None` by default if a `session_lifetime` is set. - access_token_lifetime = config.get( - "access_token_lifetime", "5m" if session_lifetime is None else None + # The `refreshable_access_token_lifetime` applies for tokens that can be renewed + # using a refresh token, as per MSC2918. + # If it is `None`, the refresh token mechanism is disabled. + refreshable_access_token_lifetime = config.get( + "refreshable_access_token_lifetime", + "5m", ) - if access_token_lifetime is not None: - access_token_lifetime = self.parse_duration(access_token_lifetime) - self.access_token_lifetime = access_token_lifetime - - if session_lifetime is not None and access_token_lifetime is not None: - raise ConfigError( - "The refresh token mechanism is incompatible with the " - "`session_lifetime` option. Consider disabling the " - "`session_lifetime` option or disabling the refresh token " - "mechanism by removing the `access_token_lifetime` option." + if refreshable_access_token_lifetime is not None: + refreshable_access_token_lifetime = self.parse_duration( + refreshable_access_token_lifetime ) + self.refreshable_access_token_lifetime: Optional[ + int + ] = refreshable_access_token_lifetime + + if ( + self.session_lifetime is not None + and "refreshable_access_token_lifetime" in config + ): + if self.session_lifetime < self.refreshable_access_token_lifetime: + raise ConfigError( + "Both `session_lifetime` and `refreshable_access_token_lifetime` " + "configuration options have been set, but `refreshable_access_token_lifetime` " + " exceeds `session_lifetime`!" + ) + + # The `nonrefreshable_access_token_lifetime` applies for tokens that can NOT be + # refreshed using a refresh token. + # If it is None, then these tokens last for the entire length of the session, + # which is infinite by default. + # The intention behind this configuration option is to help with requiring + # all clients to use refresh tokens, if the homeserver administrator requires. + nonrefreshable_access_token_lifetime = config.get( + "nonrefreshable_access_token_lifetime", + None, + ) + if nonrefreshable_access_token_lifetime is not None: + nonrefreshable_access_token_lifetime = self.parse_duration( + nonrefreshable_access_token_lifetime + ) + self.nonrefreshable_access_token_lifetime = nonrefreshable_access_token_lifetime + + if ( + self.session_lifetime is not None + and self.nonrefreshable_access_token_lifetime is not None + ): + if self.session_lifetime < self.nonrefreshable_access_token_lifetime: + raise ConfigError( + "Both `session_lifetime` and `nonrefreshable_access_token_lifetime` " + "configuration options have been set, but `nonrefreshable_access_token_lifetime` " + " exceeds `session_lifetime`!" + ) + + refresh_token_lifetime = config.get("refresh_token_lifetime") + if refresh_token_lifetime is not None: + refresh_token_lifetime = self.parse_duration(refresh_token_lifetime) + self.refresh_token_lifetime: Optional[int] = refresh_token_lifetime + + if ( + self.session_lifetime is not None + and self.refresh_token_lifetime is not None + ): + if self.session_lifetime < self.refresh_token_lifetime: + raise ConfigError( + "Both `session_lifetime` and `refresh_token_lifetime` " + "configuration options have been set, but `refresh_token_lifetime` " + " exceeds `session_lifetime`!" + ) + + # The fallback template used for authenticating using a registration token + self.registration_token_template = self.read_template("registration_token.html") # The success template used during fallback auth. self.fallback_success_template = self.read_template("auth_success.html") @@ -173,6 +220,44 @@ def generate_config_section(self, generate_secrets=False, **kwargs): # #session_lifetime: 24h + # Time that an access token remains valid for, if the session is + # using refresh tokens. + # For more information about refresh tokens, please see the manual. + # Note that this only applies to clients which advertise support for + # refresh tokens. + # + # Note also that this is calculated at login time and refresh time: + # changes are not applied to existing sessions until they are refreshed. + # + # By default, this is 5 minutes. + # + #refreshable_access_token_lifetime: 5m + + # Time that a refresh token remains valid for (provided that it is not + # exchanged for another one first). + # This option can be used to automatically log-out inactive sessions. + # Please see the manual for more information. + # + # Note also that this is calculated at login time and refresh time: + # changes are not applied to existing sessions until they are refreshed. + # + # By default, this is infinite. + # + #refresh_token_lifetime: 24h + + # Time that an access token remains valid for, if the session is NOT + # using refresh tokens. + # Please note that not all clients support refresh tokens, so setting + # this to a short value may be inconvenient for some users who will + # then be logged out frequently. + # + # Note also that this is calculated at login time: changes are not applied + # retrospectively to existing sessions for users that have already logged in. + # + # By default, this is infinite. + # + #nonrefreshable_access_token_lifetime: 24h + # The user must provide all of the below types of 3PID when registering. # #registrations_require_3pid: @@ -199,6 +284,15 @@ def generate_config_section(self, generate_secrets=False, **kwargs): # #enable_3pid_lookup: true + # Require users to submit a token during registration. + # Tokens can be managed using the admin API: + # https://matrix-org.github.io/synapse/latest/usage/administration/admin_api/registration_tokens.html + # Note that `enable_registration` must be set to `true`. + # Disabling this option will not delete any tokens previously generated. + # Defaults to false. Uncomment the following to require tokens: + # + #registration_requires_token: true + # If set, allows registration of standard or admin accounts by anyone who # has the shared secret, even if registration is otherwise disabled. # @@ -222,7 +316,7 @@ def generate_config_section(self, generate_secrets=False, **kwargs): # in on this server. # # (By default, no suggestion is made, so it is left up to the client. - # This setting is ignored unless public_baseurl is also set.) + # This setting is ignored unless public_baseurl is also explicitly set.) # #default_identity_server: https://matrix.org @@ -247,8 +341,6 @@ def generate_config_section(self, generate_secrets=False, **kwargs): # by the Matrix Identity Service API specification: # https://matrix.org/docs/spec/identity_service/latest # - # If a delegate is specified, the config option public_baseurl must also be filled out. - # account_threepid_delegates: #email: https://example.com # Delegate email sending to example.com #msisdn: http://localhost:8090 # Delegate SMS sending to this local process @@ -359,7 +451,7 @@ def generate_config_section(self, generate_secrets=False, **kwargs): ) @staticmethod - def add_arguments(parser): + def add_arguments(parser: argparse.ArgumentParser) -> None: reg_group = parser.add_argument_group("registration") reg_group.add_argument( "--enable-registration", @@ -368,6 +460,6 @@ def add_arguments(parser): help="Enable registration for new users.", ) - def read_arguments(self, args): + def read_arguments(self, args: argparse.Namespace) -> None: if args.enable_registration is not None: self.enable_registration = strtobool(str(args.enable_registration)) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 0dfb3a227a3b..b129b9dd681c 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -12,16 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os from collections import namedtuple -from typing import Dict, List +from typing import Dict, List, Tuple +from urllib.request import getproxies_environment # type: ignore from synapse.config.server import DEFAULT_IP_RANGE_BLACKLIST, generate_ip_set from synapse.python_dependencies import DependencyException, check_requirements +from synapse.types import JsonDict from synapse.util.module_loader import load_module from ._base import Config, ConfigError +logger = logging.getLogger(__name__) + DEFAULT_THUMBNAIL_SIZES = [ {"width": 32, "height": 32, "method": "crop"}, {"width": 96, "height": 96, "method": "crop"}, @@ -36,6 +41,9 @@ # method: %(method)s """ +HTTP_PROXY_SET_WARNING = """\ +The Synapse config url_preview_ip_range_blacklist will be ignored as an HTTP(s) proxy is configured.""" + ThumbnailRequirement = namedtuple( "ThumbnailRequirement", ["width", "height", "method", "media_type"] ) @@ -50,7 +58,9 @@ ) -def parse_thumbnail_requirements(thumbnail_sizes): +def parse_thumbnail_requirements( + thumbnail_sizes: List[JsonDict], +) -> Dict[str, Tuple[ThumbnailRequirement, ...]]: """Takes a list of dictionaries with "width", "height", and "method" keys and creates a map from image media types to the thumbnail size, thumbnailing method, and thumbnail media type to precalculate @@ -62,7 +72,7 @@ def parse_thumbnail_requirements(thumbnail_sizes): Dictionary mapping from media type string to list of ThumbnailRequirement tuples. """ - requirements: Dict[str, List] = {} + requirements: Dict[str, List[ThumbnailRequirement]] = {} for size in thumbnail_sizes: width = size["width"] height = size["height"] @@ -87,7 +97,7 @@ def read_config(self, config, **kwargs): # Only enable the media repo if either the media repo is enabled or the # current worker app is the media repo. if ( - self.enable_media_repo is False + self.root.server.enable_media_repo is False and config.get("worker_app") != "synapse.app.media_repository" ): self.can_load_media_repo = False @@ -180,12 +190,17 @@ def read_config(self, config, **kwargs): e.message # noqa: B306, DependencyException.message is a property ) + proxy_env = getproxies_environment() if "url_preview_ip_range_blacklist" not in config: - raise ConfigError( - "For security, you must specify an explicit target IP address " - "blacklist in url_preview_ip_range_blacklist for url previewing " - "to work" - ) + if "http" not in proxy_env or "https" not in proxy_env: + raise ConfigError( + "For security, you must specify an explicit target IP address " + "blacklist in url_preview_ip_range_blacklist for url previewing " + "to work" + ) + else: + if "http" in proxy_env or "https" in proxy_env: + logger.warning("".join(HTTP_PROXY_SET_WARNING)) # we always blacklist '0.0.0.0' and '::', which are supposed to be # unroutable addresses. @@ -292,6 +307,8 @@ def generate_config_section(self, data_dir_path, **kwargs): # This must be specified if url_preview_enabled is set. It is recommended that # you uncomment the following list as a starting point. # + # Note: The value is ignored when an HTTP proxy is in use + # #url_preview_ip_range_blacklist: %(ip_range_blacklist)s diff --git a/synapse/config/retention.py b/synapse/config/retention.py new file mode 100644 index 000000000000..aed9bf458f0c --- /dev/null +++ b/synapse/config/retention.py @@ -0,0 +1,226 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import List, Optional + +import attr + +from synapse.config._base import Config, ConfigError + +logger = logging.getLogger(__name__) + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class RetentionPurgeJob: + """Object describing the configuration of the manhole""" + + interval: int + shortest_max_lifetime: Optional[int] + longest_max_lifetime: Optional[int] + + +class RetentionConfig(Config): + section = "retention" + + def read_config(self, config, **kwargs): + retention_config = config.get("retention") + if retention_config is None: + retention_config = {} + + self.retention_enabled = retention_config.get("enabled", False) + + retention_default_policy = retention_config.get("default_policy") + + if retention_default_policy is not None: + self.retention_default_min_lifetime = retention_default_policy.get( + "min_lifetime" + ) + if self.retention_default_min_lifetime is not None: + self.retention_default_min_lifetime = self.parse_duration( + self.retention_default_min_lifetime + ) + + self.retention_default_max_lifetime = retention_default_policy.get( + "max_lifetime" + ) + if self.retention_default_max_lifetime is not None: + self.retention_default_max_lifetime = self.parse_duration( + self.retention_default_max_lifetime + ) + + if ( + self.retention_default_min_lifetime is not None + and self.retention_default_max_lifetime is not None + and ( + self.retention_default_min_lifetime + > self.retention_default_max_lifetime + ) + ): + raise ConfigError( + "The default retention policy's 'min_lifetime' can not be greater" + " than its 'max_lifetime'" + ) + else: + self.retention_default_min_lifetime = None + self.retention_default_max_lifetime = None + + if self.retention_enabled: + logger.info( + "Message retention policies support enabled with the following default" + " policy: min_lifetime = %s ; max_lifetime = %s", + self.retention_default_min_lifetime, + self.retention_default_max_lifetime, + ) + + self.retention_allowed_lifetime_min = retention_config.get( + "allowed_lifetime_min" + ) + if self.retention_allowed_lifetime_min is not None: + self.retention_allowed_lifetime_min = self.parse_duration( + self.retention_allowed_lifetime_min + ) + + self.retention_allowed_lifetime_max = retention_config.get( + "allowed_lifetime_max" + ) + if self.retention_allowed_lifetime_max is not None: + self.retention_allowed_lifetime_max = self.parse_duration( + self.retention_allowed_lifetime_max + ) + + if ( + self.retention_allowed_lifetime_min is not None + and self.retention_allowed_lifetime_max is not None + and self.retention_allowed_lifetime_min + > self.retention_allowed_lifetime_max + ): + raise ConfigError( + "Invalid retention policy limits: 'allowed_lifetime_min' can not be" + " greater than 'allowed_lifetime_max'" + ) + + self.retention_purge_jobs: List[RetentionPurgeJob] = [] + for purge_job_config in retention_config.get("purge_jobs", []): + interval_config = purge_job_config.get("interval") + + if interval_config is None: + raise ConfigError( + "A retention policy's purge jobs configuration must have the" + " 'interval' key set." + ) + + interval = self.parse_duration(interval_config) + + shortest_max_lifetime = purge_job_config.get("shortest_max_lifetime") + + if shortest_max_lifetime is not None: + shortest_max_lifetime = self.parse_duration(shortest_max_lifetime) + + longest_max_lifetime = purge_job_config.get("longest_max_lifetime") + + if longest_max_lifetime is not None: + longest_max_lifetime = self.parse_duration(longest_max_lifetime) + + if ( + shortest_max_lifetime is not None + and longest_max_lifetime is not None + and shortest_max_lifetime > longest_max_lifetime + ): + raise ConfigError( + "A retention policy's purge jobs configuration's" + " 'shortest_max_lifetime' value can not be greater than its" + " 'longest_max_lifetime' value." + ) + + self.retention_purge_jobs.append( + RetentionPurgeJob(interval, shortest_max_lifetime, longest_max_lifetime) + ) + + if not self.retention_purge_jobs: + self.retention_purge_jobs = [ + RetentionPurgeJob(self.parse_duration("1d"), None, None) + ] + + def generate_config_section(self, config_dir_path, server_name, **kwargs): + return """\ + # Message retention policy at the server level. + # + # Room admins and mods can define a retention period for their rooms using the + # 'm.room.retention' state event, and server admins can cap this period by setting + # the 'allowed_lifetime_min' and 'allowed_lifetime_max' config options. + # + # If this feature is enabled, Synapse will regularly look for and purge events + # which are older than the room's maximum retention period. Synapse will also + # filter events received over federation so that events that should have been + # purged are ignored and not stored again. + # + retention: + # The message retention policies feature is disabled by default. Uncomment the + # following line to enable it. + # + #enabled: true + + # Default retention policy. If set, Synapse will apply it to rooms that lack the + # 'm.room.retention' state event. Currently, the value of 'min_lifetime' doesn't + # matter much because Synapse doesn't take it into account yet. + # + #default_policy: + # min_lifetime: 1d + # max_lifetime: 1y + + # Retention policy limits. If set, and the state of a room contains a + # 'm.room.retention' event in its state which contains a 'min_lifetime' or a + # 'max_lifetime' that's out of these bounds, Synapse will cap the room's policy + # to these limits when running purge jobs. + # + #allowed_lifetime_min: 1d + #allowed_lifetime_max: 1y + + # Server admins can define the settings of the background jobs purging the + # events which lifetime has expired under the 'purge_jobs' section. + # + # If no configuration is provided, a single job will be set up to delete expired + # events in every room daily. + # + # Each job's configuration defines which range of message lifetimes the job + # takes care of. For example, if 'shortest_max_lifetime' is '2d' and + # 'longest_max_lifetime' is '3d', the job will handle purging expired events in + # rooms whose state defines a 'max_lifetime' that's both higher than 2 days, and + # lower than or equal to 3 days. Both the minimum and the maximum value of a + # range are optional, e.g. a job with no 'shortest_max_lifetime' and a + # 'longest_max_lifetime' of '3d' will handle every room with a retention policy + # which 'max_lifetime' is lower than or equal to three days. + # + # The rationale for this per-job configuration is that some rooms might have a + # retention policy with a low 'max_lifetime', where history needs to be purged + # of outdated messages on a more frequent basis than for the rest of the rooms + # (e.g. every 12h), but not want that purge to be performed by a job that's + # iterating over every room it knows, which could be heavy on the server. + # + # If any purge job is configured, it is strongly recommended to have at least + # a single job with neither 'shortest_max_lifetime' nor 'longest_max_lifetime' + # set, or one job without 'shortest_max_lifetime' and one job without + # 'longest_max_lifetime' set. Otherwise some rooms might be ignored, even if + # 'allowed_lifetime_min' and 'allowed_lifetime_max' are set, because capping a + # room's policy to these values is done after the policies are retrieved from + # Synapse's database (which is done using the range specified in a purge job's + # configuration). + # + #purge_jobs: + # - longest_max_lifetime: 3d + # interval: 12h + # - shortest_max_lifetime: 3d + # interval: 1d + """ diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 56981cac79c2..57316c59b6a0 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -1,4 +1,5 @@ # Copyright 2018 New Vector Ltd +# Copyright 2021 Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + +from synapse.types import JsonDict from synapse.util import glob_to_regex from ._base import Config, ConfigError @@ -20,7 +24,7 @@ class RoomDirectoryConfig(Config): section = "roomdirectory" - def read_config(self, config, **kwargs): + def read_config(self, config, **kwargs) -> None: self.enable_room_list_search = config.get("enable_room_list_search", True) alias_creation_rules = config.get("alias_creation_rules") @@ -47,7 +51,7 @@ def read_config(self, config, **kwargs): _RoomDirectoryRule("room_list_publication_rules", {"action": "allow"}) ] - def generate_config_section(self, config_dir_path, server_name, **kwargs): + def generate_config_section(self, config_dir_path, server_name, **kwargs) -> str: return """ # Uncomment to disable searching the public room list. When disabled # blocks searching local and remote room lists for local and remote @@ -113,16 +117,16 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): # action: allow """ - def is_alias_creation_allowed(self, user_id, room_id, alias): + def is_alias_creation_allowed(self, user_id: str, room_id: str, alias: str) -> bool: """Checks if the given user is allowed to create the given alias Args: - user_id (str) - room_id (str) - alias (str) + user_id: The user to check. + room_id: The room ID for the alias. + alias: The alias being created. Returns: - boolean: True if user is allowed to create the alias + True if user is allowed to create the alias """ for rule in self._alias_creation_rules: if rule.matches(user_id, room_id, [alias]): @@ -130,16 +134,18 @@ def is_alias_creation_allowed(self, user_id, room_id, alias): return False - def is_publishing_room_allowed(self, user_id, room_id, aliases): + def is_publishing_room_allowed( + self, user_id: str, room_id: str, aliases: List[str] + ) -> bool: """Checks if the given user is allowed to publish the room Args: - user_id (str) - room_id (str) - aliases (list[str]): any local aliases associated with the room + user_id: The user ID publishing the room. + room_id: The room being published. + aliases: any local aliases associated with the room Returns: - boolean: True if user can publish room + True if user can publish room """ for rule in self._room_list_publication_rules: if rule.matches(user_id, room_id, aliases): @@ -153,11 +159,11 @@ class _RoomDirectoryRule: creating an alias or publishing a room. """ - def __init__(self, option_name, rule): + def __init__(self, option_name: str, rule: JsonDict): """ Args: - option_name (str): Name of the config option this rule belongs to - rule (dict): The rule as specified in the config + option_name: Name of the config option this rule belongs to + rule: The rule as specified in the config """ action = rule["action"] @@ -181,18 +187,18 @@ def __init__(self, option_name, rule): except Exception as e: raise ConfigError("Failed to parse glob into regex") from e - def matches(self, user_id, room_id, aliases): + def matches(self, user_id: str, room_id: str, aliases: List[str]) -> bool: """Tests if this rule matches the given user_id, room_id and aliases. Args: - user_id (str) - room_id (str) - aliases (list[str]): The associated aliases to the room. Will be a - single element for testing alias creation, and can be empty for - testing room publishing. + user_id: The user ID to check. + room_id: The room ID to check. + aliases: The associated aliases to the room. Will be a single element + for testing alias creation, and can be empty for testing room + publishing. Returns: - boolean + True if the rule matches. """ # Note: The regexes are anchored at both ends diff --git a/synapse/config/saml2.py b/synapse/config/saml2.py index 05e983625dc8..ec9d9f65e7b5 100644 --- a/synapse/config/saml2.py +++ b/synapse/config/saml2.py @@ -1,5 +1,5 @@ # Copyright 2018 New Vector Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2019-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,10 +14,11 @@ # limitations under the License. import logging -from typing import Any, List +from typing import Any, List, Set from synapse.config.sso import SsoAttributeRequirement from synapse.python_dependencies import DependencyException, check_requirements +from synapse.types import JsonDict from synapse.util.module_loader import load_module, load_python_module from ._base import Config, ConfigError @@ -33,7 +34,7 @@ ) -def _dict_merge(merge_dict, into_dict): +def _dict_merge(merge_dict: dict, into_dict: dict) -> None: """Do a deep merge of two dicts Recursively merges `merge_dict` into `into_dict`: @@ -43,8 +44,8 @@ def _dict_merge(merge_dict, into_dict): the value from `merge_dict`. Args: - merge_dict (dict): dict to merge - into_dict (dict): target dict + merge_dict: dict to merge + into_dict: target dict to be modified """ for k, v in merge_dict.items(): if k not in into_dict: @@ -64,7 +65,7 @@ def _dict_merge(merge_dict, into_dict): class SAML2Config(Config): section = "saml2" - def read_config(self, config, **kwargs): + def read_config(self, config, **kwargs) -> None: self.saml2_enabled = False saml2_config = config.get("saml2_config") @@ -183,8 +184,8 @@ def read_config(self, config, **kwargs): ) def _default_saml_config_dict( - self, required_attributes: set, optional_attributes: set - ): + self, required_attributes: Set[str], optional_attributes: Set[str] + ) -> JsonDict: """Generate a configuration dictionary with required and optional attributes that will be needed to process new user registration @@ -195,18 +196,15 @@ def _default_saml_config_dict( additional information to Synapse user accounts, but are not required Returns: - dict: A SAML configuration dictionary + A SAML configuration dictionary """ import saml2 - public_baseurl = self.public_baseurl - if public_baseurl is None: - raise ConfigError("saml2_config requires a public_baseurl to be set") - if self.saml2_grandfathered_mxid_source_attribute: optional_attributes.add(self.saml2_grandfathered_mxid_source_attribute) optional_attributes -= required_attributes + public_baseurl = self.root.server.public_baseurl metadata_url = public_baseurl + "_synapse/client/saml2/metadata.xml" response_url = public_baseurl + "_synapse/client/saml2/authn_response" return { @@ -225,7 +223,7 @@ def _default_saml_config_dict( }, } - def generate_config_section(self, config_dir_path, server_name, **kwargs): + def generate_config_section(self, config_dir_path, server_name, **kwargs) -> str: return """\ ## Single sign-on integration ## diff --git a/synapse/config/server.py b/synapse/config/server.py index b9e0c0b30093..ba5b95426338 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -1,6 +1,4 @@ -# Copyright 2014-2016 OpenMarket Ltd -# Copyright 2017-2018 New Vector Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2014-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,22 +12,28 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse import itertools import logging import os.path import re +import urllib.parse from textwrap import indent -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union import attr import yaml from netaddr import AddrFormatError, IPNetwork, IPSet +from twisted.conch.ssh.keys import Key + from synapse.api.room_versions import KNOWN_ROOM_VERSIONS +from synapse.types import JsonDict from synapse.util.module_loader import load_module from synapse.util.stringutils import parse_and_validate_server_name from ._base import Config, ConfigError +from ._util import validate_config logger = logging.Logger(__name__) @@ -181,39 +185,65 @@ def generate_ip_set( @attr.s(frozen=True) class HttpResourceConfig: - names = attr.ib( - type=List[str], + names: List[str] = attr.ib( factory=list, validator=attr.validators.deep_iterable(attr.validators.in_(KNOWN_RESOURCES)), # type: ignore ) - compress = attr.ib( - type=bool, + compress: bool = attr.ib( default=False, validator=attr.validators.optional(attr.validators.instance_of(bool)), # type: ignore[arg-type] ) -@attr.s(frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class HttpListenerConfig: """Object describing the http-specific parts of the config of a listener""" - x_forwarded = attr.ib(type=bool, default=False) - resources = attr.ib(type=List[HttpResourceConfig], factory=list) - additional_resources = attr.ib(type=Dict[str, dict], factory=dict) - tag = attr.ib(type=str, default=None) + x_forwarded: bool = False + resources: List[HttpResourceConfig] = attr.ib(factory=list) + additional_resources: Dict[str, dict] = attr.ib(factory=dict) + tag: Optional[str] = None -@attr.s(frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class ListenerConfig: """Object describing the configuration of a single listener.""" - port = attr.ib(type=int, validator=attr.validators.instance_of(int)) - bind_addresses = attr.ib(type=List[str]) - type = attr.ib(type=str, validator=attr.validators.in_(KNOWN_LISTENER_TYPES)) - tls = attr.ib(type=bool, default=False) + port: int = attr.ib(validator=attr.validators.instance_of(int)) + bind_addresses: List[str] + type: str = attr.ib(validator=attr.validators.in_(KNOWN_LISTENER_TYPES)) + tls: bool = False # http_options is only populated if type=http - http_options = attr.ib(type=Optional[HttpListenerConfig], default=None) + http_options: Optional[HttpListenerConfig] = None + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class ManholeConfig: + """Object describing the configuration of the manhole""" + + username: str = attr.ib(validator=attr.validators.instance_of(str)) + password: str = attr.ib(validator=attr.validators.instance_of(str)) + priv_key: Optional[Key] + pub_key: Optional[Key] + + +@attr.s(frozen=True) +class LimitRemoteRoomsConfig: + enabled: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False) + complexity: Union[float, int] = attr.ib( + validator=attr.validators.instance_of( + (float, int) # type: ignore[arg-type] # noqa + ), + default=1.0, + ) + complexity_error: str = attr.ib( + validator=attr.validators.instance_of(str), + default=ROOM_COMPLEXITY_TOO_GREAT, + ) + admins_can_join: bool = attr.ib( + validator=attr.validators.instance_of(bool), default=False + ) class ServerConfig(Config): @@ -235,11 +265,46 @@ def read_config(self, config, **kwargs): self.print_pidfile = config.get("print_pidfile") self.user_agent_suffix = config.get("user_agent_suffix") self.use_frozen_dicts = config.get("use_frozen_dicts", False) + self.serve_server_wellknown = config.get("serve_server_wellknown", False) + + # Whether we should serve a "client well-known": + # (a) at .well-known/matrix/client on our client HTTP listener + # (b) in the response to /login + # + # ... which together help ensure that clients use our public_baseurl instead of + # whatever they were told by the user. + # + # For the sake of backwards compatibility with existing installations, this is + # True if public_baseurl is specified explicitly, and otherwise False. (The + # reasoning here is that we have no way of knowing that the default + # public_baseurl is actually correct for existing installations - many things + # will not work correctly, but that's (probably?) better than sending clients + # to a completely broken URL. + self.serve_client_wellknown = False + + public_baseurl = config.get("public_baseurl") + if public_baseurl is None: + public_baseurl = f"https://{self.server_name}/" + logger.info("Using default public_baseurl %s", public_baseurl) + else: + self.serve_client_wellknown = True + if public_baseurl[-1] != "/": + public_baseurl += "/" + self.public_baseurl = public_baseurl - self.public_baseurl = config.get("public_baseurl") - if self.public_baseurl is not None: - if self.public_baseurl[-1] != "/": - self.public_baseurl += "/" + # check that public_baseurl is valid + try: + splits = urllib.parse.urlsplit(self.public_baseurl) + except Exception as e: + raise ConfigError(f"Unable to parse URL: {e}", ("public_baseurl",)) + if splits.scheme not in ("https", "http"): + raise ConfigError( + f"Invalid scheme '{splits.scheme}': only https and http are supported" + ) + if splits.query or splits.fragment: + raise ConfigError( + "public_baseurl cannot contain query parameters or a #-fragment" + ) # Whether to enable user presence. presence_config = config.get("presence") or {} @@ -248,6 +313,7 @@ def read_config(self, config, **kwargs): self.use_presence = config.get("use_presence", True) # Custom presence router module + # This is the legacy way of configuring it (the config should now be put in the modules section) self.presence_router_module_class = None self.presence_router_config = None presence_router_config = presence_config.get("presence_router") @@ -339,11 +405,6 @@ def read_config(self, config, **kwargs): # (other than those sent by local server admins) self.block_non_admin_invites = config.get("block_non_admin_invites", False) - # Whether to enable experimental MSC1849 (aka relations) support - self.experimental_msc1849_support_enabled = config.get( - "experimental_msc1849_support_enabled", True - ) - # Options to control access by tracking MAU self.limit_usage_by_mau = config.get("limit_usage_by_mau", False) self.max_mau_value = 0 @@ -362,7 +423,7 @@ def read_config(self, config, **kwargs): # before redacting them. redaction_retention_period = config.get("redaction_retention_period", "7d") if redaction_retention_period is not None: - self.redaction_retention_period = self.parse_duration( + self.redaction_retention_period: Optional[int] = self.parse_duration( redaction_retention_period ) else: @@ -371,7 +432,7 @@ def read_config(self, config, **kwargs): # How long to keep entries in the `users_ips` table. user_ips_max_age = config.get("user_ips_max_age", "28d") if user_ips_max_age is not None: - self.user_ips_max_age = self.parse_duration(user_ips_max_age) + self.user_ips_max_age: Optional[int] = self.parse_duration(user_ips_max_age) else: self.user_ips_max_age = None @@ -429,132 +490,6 @@ def read_config(self, config, **kwargs): # events with profile information that differ from the target's global profile. self.allow_per_room_profiles = config.get("allow_per_room_profiles", True) - retention_config = config.get("retention") - if retention_config is None: - retention_config = {} - - self.retention_enabled = retention_config.get("enabled", False) - - retention_default_policy = retention_config.get("default_policy") - - if retention_default_policy is not None: - self.retention_default_min_lifetime = retention_default_policy.get( - "min_lifetime" - ) - if self.retention_default_min_lifetime is not None: - self.retention_default_min_lifetime = self.parse_duration( - self.retention_default_min_lifetime - ) - - self.retention_default_max_lifetime = retention_default_policy.get( - "max_lifetime" - ) - if self.retention_default_max_lifetime is not None: - self.retention_default_max_lifetime = self.parse_duration( - self.retention_default_max_lifetime - ) - - if ( - self.retention_default_min_lifetime is not None - and self.retention_default_max_lifetime is not None - and ( - self.retention_default_min_lifetime - > self.retention_default_max_lifetime - ) - ): - raise ConfigError( - "The default retention policy's 'min_lifetime' can not be greater" - " than its 'max_lifetime'" - ) - else: - self.retention_default_min_lifetime = None - self.retention_default_max_lifetime = None - - if self.retention_enabled: - logger.info( - "Message retention policies support enabled with the following default" - " policy: min_lifetime = %s ; max_lifetime = %s", - self.retention_default_min_lifetime, - self.retention_default_max_lifetime, - ) - - self.retention_allowed_lifetime_min = retention_config.get( - "allowed_lifetime_min" - ) - if self.retention_allowed_lifetime_min is not None: - self.retention_allowed_lifetime_min = self.parse_duration( - self.retention_allowed_lifetime_min - ) - - self.retention_allowed_lifetime_max = retention_config.get( - "allowed_lifetime_max" - ) - if self.retention_allowed_lifetime_max is not None: - self.retention_allowed_lifetime_max = self.parse_duration( - self.retention_allowed_lifetime_max - ) - - if ( - self.retention_allowed_lifetime_min is not None - and self.retention_allowed_lifetime_max is not None - and self.retention_allowed_lifetime_min - > self.retention_allowed_lifetime_max - ): - raise ConfigError( - "Invalid retention policy limits: 'allowed_lifetime_min' can not be" - " greater than 'allowed_lifetime_max'" - ) - - self.retention_purge_jobs: List[Dict[str, Optional[int]]] = [] - for purge_job_config in retention_config.get("purge_jobs", []): - interval_config = purge_job_config.get("interval") - - if interval_config is None: - raise ConfigError( - "A retention policy's purge jobs configuration must have the" - " 'interval' key set." - ) - - interval = self.parse_duration(interval_config) - - shortest_max_lifetime = purge_job_config.get("shortest_max_lifetime") - - if shortest_max_lifetime is not None: - shortest_max_lifetime = self.parse_duration(shortest_max_lifetime) - - longest_max_lifetime = purge_job_config.get("longest_max_lifetime") - - if longest_max_lifetime is not None: - longest_max_lifetime = self.parse_duration(longest_max_lifetime) - - if ( - shortest_max_lifetime is not None - and longest_max_lifetime is not None - and shortest_max_lifetime > longest_max_lifetime - ): - raise ConfigError( - "A retention policy's purge jobs configuration's" - " 'shortest_max_lifetime' value can not be greater than its" - " 'longest_max_lifetime' value." - ) - - self.retention_purge_jobs.append( - { - "interval": interval, - "shortest_max_lifetime": shortest_max_lifetime, - "longest_max_lifetime": longest_max_lifetime, - } - ) - - if not self.retention_purge_jobs: - self.retention_purge_jobs = [ - { - "interval": self.parse_duration("1d"), - "shortest_max_lifetime": None, - "longest_max_lifetime": None, - } - ] - self.listeners = [parse_listener_def(x) for x in config.get("listeners", [])] # no_tls is not really supported any more, but let's grandfather it in @@ -577,25 +512,6 @@ def read_config(self, config, **kwargs): self.gc_thresholds = read_gc_thresholds(config.get("gc_thresholds", None)) self.gc_seconds = self.read_gc_intervals(config.get("gc_min_interval", None)) - @attr.s - class LimitRemoteRoomsConfig: - enabled = attr.ib( - validator=attr.validators.instance_of(bool), default=False - ) - complexity = attr.ib( - validator=attr.validators.instance_of( - (float, int) # type: ignore[arg-type] # noqa - ), - default=1.0, - ) - complexity_error = attr.ib( - validator=attr.validators.instance_of(str), - default=ROOM_COMPLEXITY_TOO_GREAT, - ) - admins_can_join = attr.ib( - validator=attr.validators.instance_of(bool), default=False - ) - self.limit_remote_rooms = LimitRemoteRoomsConfig( **(config.get("limit_remote_rooms") or {}) ) @@ -648,6 +564,41 @@ class LimitRemoteRoomsConfig: ) ) + manhole_settings = config.get("manhole_settings") or {} + validate_config( + _MANHOLE_SETTINGS_SCHEMA, manhole_settings, ("manhole_settings",) + ) + + manhole_username = manhole_settings.get("username", "matrix") + manhole_password = manhole_settings.get("password", "rabbithole") + manhole_priv_key_path = manhole_settings.get("ssh_priv_key_path") + manhole_pub_key_path = manhole_settings.get("ssh_pub_key_path") + + manhole_priv_key = None + if manhole_priv_key_path is not None: + try: + manhole_priv_key = Key.fromFile(manhole_priv_key_path) + except Exception as e: + raise ConfigError( + f"Failed to read manhole private key file {manhole_priv_key_path}" + ) from e + + manhole_pub_key = None + if manhole_pub_key_path is not None: + try: + manhole_pub_key = Key.fromFile(manhole_pub_key_path) + except Exception as e: + raise ConfigError( + f"Failed to read manhole public key file {manhole_pub_key_path}" + ) from e + + self.manhole_settings = ManholeConfig( + username=manhole_username, + password=manhole_password, + priv_key=manhole_priv_key, + pub_key=manhole_pub_key, + ) + metrics_port = config.get("metrics_port") if metrics_port: logger.warning(METRICS_PORT_WARNING) @@ -710,11 +661,29 @@ class LimitRemoteRoomsConfig: # Turn the list into a set to improve lookup speed. self.next_link_domain_whitelist = set(next_link_domain_whitelist) + templates_config = config.get("templates") or {} + if not isinstance(templates_config, dict): + raise ConfigError("The 'templates' section must be a dictionary") + + self.custom_template_directory: Optional[str] = templates_config.get( + "custom_template_directory" + ) + if self.custom_template_directory is not None and not isinstance( + self.custom_template_directory, str + ): + raise ConfigError("'custom_template_directory' must be a string") + def has_tls_listener(self) -> bool: return any(listener.tls for listener in self.listeners) def generate_config_section( - self, server_name, data_dir_path, open_private_ports, listeners, **kwargs + self, + server_name, + data_dir_path, + open_private_ports, + listeners, + config_dir_path, + **kwargs, ): ip_range_blacklist = "\n".join( " # - '%s'" % ip for ip in DEFAULT_IP_RANGE_BLACKLIST @@ -841,8 +810,28 @@ def generate_config_section( # Otherwise, it should be the URL to reach Synapse's client HTTP listener (see # 'listeners' below). # + # Defaults to 'https:///'. + # #public_baseurl: https://example.com/ + # Uncomment the following to tell other servers to send federation traffic on + # port 443. + # + # By default, other servers will try to reach our server on port 8448, which can + # be inconvenient in some environments. + # + # Provided 'https:///' on port 443 is routed to Synapse, this + # option configures Synapse to serve a file at + # 'https:///.well-known/matrix/server'. This will tell other + # servers to send traffic to port 443 instead. + # + # See https://matrix-org.github.io/synapse/latest/delegate.html for more + # information. + # + # Defaults to 'false'. + # + #serve_server_wellknown: true + # Set the soft limit on the number of file descriptors synapse can use # Zero is used to indicate synapse should set the soft limit to the # hard limit. @@ -858,20 +847,6 @@ def generate_config_section( # #enabled: false - # Presence routers are third-party modules that can specify additional logic - # to where presence updates from users are routed. - # - presence_router: - # The custom module's class. Uncomment to use a custom presence router module. - # - #module: "my_custom_router.PresenceRouter" - - # Configuration options of the custom module. Refer to your module's - # documentation for available options. - # - #config: - # example_option: 'something' - # Whether to require authentication to retrieve profile data (avatars, # display names) of other users through the client API. Defaults to # 'false'. Note that profile data is also available via the federation @@ -960,6 +935,8 @@ def generate_config_section( # # This option replaces federation_ip_range_blacklist in Synapse v1.25.0. # + # Note: The value is ignored when an HTTP proxy is in use + # #ip_range_blacklist: %(ip_range_blacklist)s @@ -1067,6 +1044,24 @@ def generate_config_section( # bind_addresses: ['::1', '127.0.0.1'] # type: manhole + # Connection settings for the manhole + # + manhole_settings: + # The username for the manhole. This defaults to 'matrix'. + # + #username: manhole + + # The password for the manhole. This defaults to 'rabbithole'. + # + #password: mypassword + + # The private and public SSH key pair used to encrypt the manhole traffic. + # If these are left unset, then hardcoded and non-secret keys are used, + # which could allow traffic to be intercepted if sent over a public network. + # + #ssh_priv_key_path: %(config_dir_path)s/id_rsa + #ssh_pub_key_path: %(config_dir_path)s/id_rsa.pub + # Forward extremities can build up in a room due to networking delays between # homeservers. Once this happens in a large room, calculation of the state of # that room can become quite expensive. To mitigate this, once the number of @@ -1186,75 +1181,6 @@ def generate_config_section( # #user_ips_max_age: 14d - # Message retention policy at the server level. - # - # Room admins and mods can define a retention period for their rooms using the - # 'm.room.retention' state event, and server admins can cap this period by setting - # the 'allowed_lifetime_min' and 'allowed_lifetime_max' config options. - # - # If this feature is enabled, Synapse will regularly look for and purge events - # which are older than the room's maximum retention period. Synapse will also - # filter events received over federation so that events that should have been - # purged are ignored and not stored again. - # - retention: - # The message retention policies feature is disabled by default. Uncomment the - # following line to enable it. - # - #enabled: true - - # Default retention policy. If set, Synapse will apply it to rooms that lack the - # 'm.room.retention' state event. Currently, the value of 'min_lifetime' doesn't - # matter much because Synapse doesn't take it into account yet. - # - #default_policy: - # min_lifetime: 1d - # max_lifetime: 1y - - # Retention policy limits. If set, and the state of a room contains a - # 'm.room.retention' event in its state which contains a 'min_lifetime' or a - # 'max_lifetime' that's out of these bounds, Synapse will cap the room's policy - # to these limits when running purge jobs. - # - #allowed_lifetime_min: 1d - #allowed_lifetime_max: 1y - - # Server admins can define the settings of the background jobs purging the - # events which lifetime has expired under the 'purge_jobs' section. - # - # If no configuration is provided, a single job will be set up to delete expired - # events in every room daily. - # - # Each job's configuration defines which range of message lifetimes the job - # takes care of. For example, if 'shortest_max_lifetime' is '2d' and - # 'longest_max_lifetime' is '3d', the job will handle purging expired events in - # rooms whose state defines a 'max_lifetime' that's both higher than 2 days, and - # lower than or equal to 3 days. Both the minimum and the maximum value of a - # range are optional, e.g. a job with no 'shortest_max_lifetime' and a - # 'longest_max_lifetime' of '3d' will handle every room with a retention policy - # which 'max_lifetime' is lower than or equal to three days. - # - # The rationale for this per-job configuration is that some rooms might have a - # retention policy with a low 'max_lifetime', where history needs to be purged - # of outdated messages on a more frequent basis than for the rest of the rooms - # (e.g. every 12h), but not want that purge to be performed by a job that's - # iterating over every room it knows, which could be heavy on the server. - # - # If any purge job is configured, it is strongly recommended to have at least - # a single job with neither 'shortest_max_lifetime' nor 'longest_max_lifetime' - # set, or one job without 'shortest_max_lifetime' and one job without - # 'longest_max_lifetime' set. Otherwise some rooms might be ignored, even if - # 'allowed_lifetime_min' and 'allowed_lifetime_max' are set, because capping a - # room's policy to these values is done after the policies are retrieved from - # Synapse's database (which is done using the range specified in a purge job's - # configuration). - # - #purge_jobs: - # - longest_max_lifetime: 3d - # interval: 12h - # - shortest_max_lifetime: 3d - # interval: 1d - # Inhibits the /requestToken endpoints from returning an error that might leak # information about whether an e-mail address is in use or not on this # homeserver. @@ -1282,11 +1208,24 @@ def generate_config_section( # all domains. # #next_link_domain_whitelist: ["matrix.org"] + + # Templates to use when generating email or HTML page contents. + # + templates: + # Directory in which Synapse will try to find template files to use to generate + # email or HTML page contents. + # If not set, or a file is not found within the template directory, a default + # template from within the Synapse package will be used. + # + # See https://matrix-org.github.io/synapse/latest/templates.html for more + # information about using custom templates. + # + #custom_template_directory: /path/to/custom/templates/ """ % locals() ) - def read_arguments(self, args): + def read_arguments(self, args: argparse.Namespace) -> None: if args.manhole is not None: self.manhole = args.manhole if args.daemonize is not None: @@ -1295,7 +1234,7 @@ def read_arguments(self, args): self.print_pidfile = args.print_pidfile @staticmethod - def add_arguments(parser): + def add_arguments(parser: argparse.ArgumentParser) -> None: server_group = parser.add_argument_group("server") server_group.add_argument( "-D", @@ -1337,14 +1276,16 @@ def read_gc_intervals(self, durations) -> Optional[Tuple[float, float, float]]: ) -def is_threepid_reserved(reserved_threepids, threepid): +def is_threepid_reserved( + reserved_threepids: List[JsonDict], threepid: JsonDict +) -> bool: """Check the threepid against the reserved threepid config Args: - reserved_threepids([dict]) - list of reserved threepids - threepid(dict) - The threepid to test for + reserved_threepids: List of reserved threepids + threepid: The threepid to test for Returns: - boolean Is the threepid undertest reserved_user + Is the threepid undertest reserved_user """ for tp in reserved_threepids: @@ -1353,7 +1294,9 @@ def is_threepid_reserved(reserved_threepids, threepid): return False -def read_gc_thresholds(thresholds): +def read_gc_thresholds( + thresholds: Optional[List[Any]], +) -> Optional[Tuple[int, int, int]]: """Reads the three integer thresholds for garbage collection. Ensures that the thresholds are integers if thresholds are supplied. """ @@ -1361,7 +1304,7 @@ def read_gc_thresholds(thresholds): return None try: assert len(thresholds) == 3 - return (int(thresholds[0]), int(thresholds[1]), int(thresholds[2])) + return int(thresholds[0]), int(thresholds[1]), int(thresholds[2]) except Exception: raise ConfigError( "Value of `gc_threshold` must be a list of three integers if set" @@ -1422,3 +1365,14 @@ def _warn_if_webclient_configured(listeners: Iterable[ListenerConfig]) -> None: if name == "webclient": logger.warning(NO_MORE_WEB_CLIENT_WARNING) return + + +_MANHOLE_SETTINGS_SCHEMA = { + "type": "object", + "properties": { + "username": {"type": "string"}, + "password": {"type": "string"}, + "ssh_priv_key_path": {"type": "string"}, + "ssh_pub_key_path": {"type": "string"}, + }, +} diff --git a/synapse/config/server_notices.py b/synapse/config/server_notices.py index 48bf3241b659..bde4e879d9e9 100644 --- a/synapse/config/server_notices.py +++ b/synapse/config/server_notices.py @@ -73,7 +73,9 @@ def read_config(self, config, **kwargs): return mxid_localpart = c["system_mxid_localpart"] - self.server_notices_mxid = UserID(mxid_localpart, self.server_name).to_string() + self.server_notices_mxid = UserID( + mxid_localpart, self.root.server.server_name + ).to_string() self.server_notices_mxid_display_name = c.get("system_mxid_display_name", None) self.server_notices_mxid_avatar_url = c.get("system_mxid_avatar_url", None) # todo: i18n diff --git a/synapse/config/sso.py b/synapse/config/sso.py index d0f04cf8e6b2..e4a424326124 100644 --- a/synapse/config/sso.py +++ b/synapse/config/sso.py @@ -1,4 +1,4 @@ -# Copyright 2020 The Matrix.org Foundation C.I.C. +# Copyright 2020-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,20 +11,31 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging from typing import Any, Dict, Optional import attr from ._base import Config +logger = logging.getLogger(__name__) -@attr.s(frozen=True) +LEGACY_TEMPLATE_DIR_WARNING = """ +This server's configuration file is using the deprecated 'template_dir' setting in the +'sso' section. Support for this setting has been deprecated and will be removed in a +future version of Synapse. Server admins should instead use the new +'custom_templates_directory' setting documented here: +https://matrix-org.github.io/synapse/latest/templates.html +---------------------------------------------------------------------------------------""" + + +@attr.s(frozen=True, auto_attribs=True) class SsoAttributeRequirement: """Object describing a single requirement for SSO attributes.""" - attribute = attr.ib(type=str) + attribute: str # If a value is not given, than the attribute must simply exist. - value = attr.ib(type=Optional[str]) + value: Optional[str] JSON_SCHEMA = { "type": "object", @@ -38,13 +49,20 @@ class SSOConfig(Config): section = "sso" - def read_config(self, config, **kwargs): + def read_config(self, config, **kwargs) -> None: sso_config: Dict[str, Any] = config.get("sso") or {} # The sso-specific template_dir self.sso_template_dir = sso_config.get("template_dir") + if self.sso_template_dir is not None: + logger.warning(LEGACY_TEMPLATE_DIR_WARNING) # Read templates from disk + custom_template_directories = ( + self.root.server.custom_template_directory, + self.sso_template_dir, + ) + ( self.sso_login_idp_picker_template, self.sso_redirect_confirm_template, @@ -63,7 +81,7 @@ def read_config(self, config, **kwargs): "sso_auth_success.html", "sso_auth_bad_user.html", ], - self.sso_template_dir, + (td for td in custom_template_directories if td), ) # These templates have no placeholders, so render them here @@ -83,17 +101,19 @@ def read_config(self, config, **kwargs): # gracefully to the client). This would make it pointless to ask the user for # confirmation, since the URL the confirmation page would be showing wouldn't be # the client's. - # public_baseurl is an optional setting, so we only add the fallback's URL to the - # list if it's provided (because we can't figure out what that URL is otherwise). - if self.public_baseurl: - login_fallback_url = self.public_baseurl + "_matrix/static/client/login" - self.sso_client_whitelist.append(login_fallback_url) + login_fallback_url = ( + self.root.server.public_baseurl + "_matrix/static/client/login" + ) + self.sso_client_whitelist.append(login_fallback_url) - def generate_config_section(self, **kwargs): + def generate_config_section(self, **kwargs) -> str: return """\ # Additional settings to use with single-sign on systems such as OpenID Connect, # SAML2 and CAS. # + # Server admins can configure custom templates for pages related to SSO. See + # https://matrix-org.github.io/synapse/latest/templates.html for more information. + # sso: # A list of client URLs which are whitelisted so that the user does not # have to confirm giving access to their account to the URL. Any client @@ -105,11 +125,10 @@ def generate_config_section(self, **kwargs): # phishing attacks from evil.site. To avoid this, include a slash after the # hostname: "https://my.client/". # - # If public_baseurl is set, then the login fallback page (used by clients - # that don't natively support the required login flows) is whitelisted in - # addition to any URLs in this list. + # The login fallback page (used by clients that don't natively support the + # required login flows) is whitelisted in addition to any URLs in this list. # - # By default, this list is empty. + # By default, this list contains only the login fallback page. # #client_whitelist: # - https://riot.im/develop @@ -125,167 +144,4 @@ def generate_config_section(self, **kwargs): # information when first signing in. Defaults to false. # #update_profile_information: true - - # Directory in which Synapse will try to find the template files below. - # If not set, or the files named below are not found within the template - # directory, default templates from within the Synapse package will be used. - # - # Synapse will look for the following templates in this directory: - # - # * HTML page to prompt the user to choose an Identity Provider during - # login: 'sso_login_idp_picker.html'. - # - # This is only used if multiple SSO Identity Providers are configured. - # - # When rendering, this template is given the following variables: - # * redirect_url: the URL that the user will be redirected to after - # login. - # - # * server_name: the homeserver's name. - # - # * providers: a list of available Identity Providers. Each element is - # an object with the following attributes: - # - # * idp_id: unique identifier for the IdP - # * idp_name: user-facing name for the IdP - # * idp_icon: if specified in the IdP config, an MXC URI for an icon - # for the IdP - # * idp_brand: if specified in the IdP config, a textual identifier - # for the brand of the IdP - # - # The rendered HTML page should contain a form which submits its results - # back as a GET request, with the following query parameters: - # - # * redirectUrl: the client redirect URI (ie, the `redirect_url` passed - # to the template) - # - # * idp: the 'idp_id' of the chosen IDP. - # - # * HTML page to prompt new users to enter a userid and confirm other - # details: 'sso_auth_account_details.html'. This is only shown if the - # SSO implementation (with any user_mapping_provider) does not return - # a localpart. - # - # When rendering, this template is given the following variables: - # - # * server_name: the homeserver's name. - # - # * idp: details of the SSO Identity Provider that the user logged in - # with: an object with the following attributes: - # - # * idp_id: unique identifier for the IdP - # * idp_name: user-facing name for the IdP - # * idp_icon: if specified in the IdP config, an MXC URI for an icon - # for the IdP - # * idp_brand: if specified in the IdP config, a textual identifier - # for the brand of the IdP - # - # * user_attributes: an object containing details about the user that - # we received from the IdP. May have the following attributes: - # - # * display_name: the user's display_name - # * emails: a list of email addresses - # - # The template should render a form which submits the following fields: - # - # * username: the localpart of the user's chosen user id - # - # * HTML page allowing the user to consent to the server's terms and - # conditions. This is only shown for new users, and only if - # `user_consent.require_at_registration` is set. - # - # When rendering, this template is given the following variables: - # - # * server_name: the homeserver's name. - # - # * user_id: the user's matrix proposed ID. - # - # * user_profile.display_name: the user's proposed display name, if any. - # - # * consent_version: the version of the terms that the user will be - # shown - # - # * terms_url: a link to the page showing the terms. - # - # The template should render a form which submits the following fields: - # - # * accepted_version: the version of the terms accepted by the user - # (ie, 'consent_version' from the input variables). - # - # * HTML page for a confirmation step before redirecting back to the client - # with the login token: 'sso_redirect_confirm.html'. - # - # When rendering, this template is given the following variables: - # - # * redirect_url: the URL the user is about to be redirected to. - # - # * display_url: the same as `redirect_url`, but with the query - # parameters stripped. The intention is to have a - # human-readable URL to show to users, not to use it as - # the final address to redirect to. - # - # * server_name: the homeserver's name. - # - # * new_user: a boolean indicating whether this is the user's first time - # logging in. - # - # * user_id: the user's matrix ID. - # - # * user_profile.avatar_url: an MXC URI for the user's avatar, if any. - # None if the user has not set an avatar. - # - # * user_profile.display_name: the user's display name. None if the user - # has not set a display name. - # - # * HTML page which notifies the user that they are authenticating to confirm - # an operation on their account during the user interactive authentication - # process: 'sso_auth_confirm.html'. - # - # When rendering, this template is given the following variables: - # * redirect_url: the URL the user is about to be redirected to. - # - # * description: the operation which the user is being asked to confirm - # - # * idp: details of the Identity Provider that we will use to confirm - # the user's identity: an object with the following attributes: - # - # * idp_id: unique identifier for the IdP - # * idp_name: user-facing name for the IdP - # * idp_icon: if specified in the IdP config, an MXC URI for an icon - # for the IdP - # * idp_brand: if specified in the IdP config, a textual identifier - # for the brand of the IdP - # - # * HTML page shown after a successful user interactive authentication session: - # 'sso_auth_success.html'. - # - # Note that this page must include the JavaScript which notifies of a successful authentication - # (see https://matrix.org/docs/spec/client_server/r0.6.0#fallback). - # - # This template has no additional variables. - # - # * HTML page shown after a user-interactive authentication session which - # does not map correctly onto the expected user: 'sso_auth_bad_user.html'. - # - # When rendering, this template is given the following variables: - # * server_name: the homeserver's name. - # * user_id_to_verify: the MXID of the user that we are trying to - # validate. - # - # * HTML page shown during single sign-on if a deactivated user (according to Synapse's database) - # attempts to login: 'sso_account_deactivated.html'. - # - # This template has no additional variables. - # - # * HTML page to display to users if something goes wrong during the - # OpenID Connect authentication process: 'sso_error.html'. - # - # When rendering, this template is given two variables: - # * error: the technical name of the error - # * error_description: a human-readable message for the error - # - # You can see the default templates at: - # https://github.com/matrix-org/synapse/tree/master/synapse/res/templates - # - #template_dir: "res/templates" """ diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 5679f05e4270..4ca111618fe9 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -14,7 +14,6 @@ import logging import os -from datetime import datetime from typing import List, Optional, Pattern from OpenSSL import SSL, crypto @@ -133,52 +132,6 @@ def read_config(self, config: dict, config_dir_path: str, **kwargs): self.tls_certificate: Optional[crypto.X509] = None self.tls_private_key: Optional[crypto.PKey] = None - def is_disk_cert_valid(self, allow_self_signed=True): - """ - Is the certificate we have on disk valid, and if so, for how long? - - Args: - allow_self_signed (bool): Should we allow the certificate we - read to be self signed? - - Returns: - int: Days remaining of certificate validity. - None: No certificate exists. - """ - if not os.path.exists(self.tls_certificate_file): - return None - - try: - with open(self.tls_certificate_file, "rb") as f: - cert_pem = f.read() - except Exception as e: - raise ConfigError( - "Failed to read existing certificate file %s: %s" - % (self.tls_certificate_file, e) - ) - - try: - tls_certificate = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) - except Exception as e: - raise ConfigError( - "Failed to parse existing certificate file %s: %s" - % (self.tls_certificate_file, e) - ) - - if not allow_self_signed: - if tls_certificate.get_subject() == tls_certificate.get_issuer(): - raise ValueError( - "TLS Certificate is self signed, and this is not permitted" - ) - - # YYYYMMDDhhmmssZ -- in UTC - expires_on = datetime.strptime( - tls_certificate.get_notAfter().decode("ascii"), "%Y%m%d%H%M%SZ" - ) - now = datetime.utcnow() - days_remaining = (expires_on - now).days - return days_remaining - def read_certificate_from_disk(self): """ Read the certificates and private key from disk. @@ -260,8 +213,8 @@ def generate_config_section( # #federation_certificate_verification_whitelist: # - lon.example.com - # - *.domain.com - # - *.onion + # - "*.domain.com" + # - "*.onion" # List of custom certificate authorities for federation traffic. # @@ -292,7 +245,7 @@ def read_tls_certificate(self) -> crypto.X509: cert_path = self.tls_certificate_file logger.info("Loading TLS certificate from %s", cert_path) cert_pem = self.read_file(cert_path, "tls_certificate_path") - cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) + cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem.encode()) return cert diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index b10df8a232cd..6d6678c7e441 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -45,12 +45,16 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): #enabled: false # Defines whether to search all users visible to your HS when searching - # the user directory, rather than limiting to users visible in public - # rooms. Defaults to false. + # the user directory. If false, search results will only contain users + # visible in public rooms and users sharing a room with the requester. + # Defaults to false. # - # If you set it true, you'll have to rebuild the user_directory search - # indexes, see: - # https://matrix-org.github.io/synapse/latest/user_directory.html + # NB. If you set this to true, and the last time the user_directory search + # indexes were (re)built was before Synapse 1.44, you'll have to + # rebuild the indexes in order to search through all known users. + # These indexes are built the first time Synapse starts; admins can + # manually trigger a rebuild via API following the instructions at + # https://matrix-org.github.io/synapse/latest/usage/administration/admin_api/background_updates.html#run # # Uncomment to return search results containing all known users, even if that # user does not share a room with the requester. diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 462630201d2e..576f519188bb 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -1,4 +1,5 @@ # Copyright 2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse from typing import List, Union import attr @@ -63,7 +65,8 @@ class WriterLocations: Attributes: events: The instances that write to the event and backfill streams. - typing: The instance that writes to the typing stream. + typing: The instances that write to the typing stream. Currently + can only be a single instance. to_device: The instances that write to the to_device stream. Currently can only be a single instance. account_data: The instances that write to the account data streams. Currently @@ -75,9 +78,15 @@ class WriterLocations: """ events = attr.ib( - default=["master"], type=List[str], converter=_instance_to_list_converter + default=["master"], + type=List[str], + converter=_instance_to_list_converter, + ) + typing = attr.ib( + default=["master"], + type=List[str], + converter=_instance_to_list_converter, ) - typing = attr.ib(default="master", type=str) to_device = attr.ib( default=["master"], type=List[str], @@ -217,6 +226,11 @@ def read_config(self, config, **kwargs): % (instance, stream) ) + if len(self.writers.typing) != 1: + raise ConfigError( + "Must only specify one instance to handle `typing` messages." + ) + if len(self.writers.to_device) != 1: raise ConfigError( "Must only specify one instance to handle `to_device` messages." @@ -331,7 +345,7 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): #worker_replication_secret: "" """ - def read_arguments(self, args): + def read_arguments(self, args: argparse.Namespace) -> None: # We support a bunch of command line arguments that override options in # the config. A lot of these options have a worker_* prefix when running # on workers so we also have to override them when command line options diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index c644b4dfc5ed..7855f3498b91 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -29,9 +29,12 @@ TLSVersion, platformTrust, ) +from twisted.protocols.tls import TLSMemoryBIOProtocol from twisted.python.failure import Failure from twisted.web.iweb import IPolicyForHTTPS +from synapse.config.homeserver import HomeServerConfig + logger = logging.getLogger(__name__) @@ -51,7 +54,7 @@ class ServerContextFactory(ContextFactory): per https://github.com/matrix-org/synapse/issues/1691 """ - def __init__(self, config): + def __init__(self, config: HomeServerConfig): # TODO: once pyOpenSSL exposes TLS_METHOD and SSL_CTX_set_min_proto_version, # switch to those (see https://github.com/pyca/cryptography/issues/5379). # @@ -64,7 +67,7 @@ def __init__(self, config): self.configure_context(self._context, config) @staticmethod - def configure_context(context, config): + def configure_context(context: SSL.Context, config: HomeServerConfig) -> None: try: _ecCurve = crypto.get_elliptic_curve(_defaultCurveName) context.set_tmp_ecdh(_ecCurve) @@ -74,15 +77,16 @@ def configure_context(context, config): context.set_options( SSL.OP_NO_SSLv2 | SSL.OP_NO_SSLv3 | SSL.OP_NO_TLSv1 | SSL.OP_NO_TLSv1_1 ) - context.use_certificate_chain_file(config.tls_certificate_file) - context.use_privatekey(config.tls_private_key) + context.use_certificate_chain_file(config.tls.tls_certificate_file) + assert config.tls.tls_private_key is not None + context.use_privatekey(config.tls.tls_private_key) # https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ context.set_cipher_list( - "ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES256:ECDH+AES128:!aNULL:!SHA1:!AESCCM" + b"ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES256:ECDH+AES128:!aNULL:!SHA1:!AESCCM" ) - def getContext(self): + def getContext(self) -> SSL.Context: return self._context @@ -98,11 +102,11 @@ class FederationPolicyForHTTPS: constructs an SSLClientConnectionCreator factory accordingly. """ - def __init__(self, config): + def __init__(self, config: HomeServerConfig): self._config = config # Check if we're using a custom list of a CA certificates - trust_root = config.federation_ca_trust_root + trust_root = config.tls.federation_ca_trust_root if trust_root is None: # Use CA root certs provided by OpenSSL trust_root = platformTrust() @@ -113,7 +117,7 @@ def __init__(self, config): # moving to TLS 1.2 by default, we want to respect the config option if # it is set to 1.0 (which the alternate option, raiseMinimumTo, will not # let us do). - minTLS = _TLS_VERSION_MAP[config.federation_client_minimum_tls_version] + minTLS = _TLS_VERSION_MAP[config.tls.federation_client_minimum_tls_version] _verify_ssl = CertificateOptions( trustRoot=trust_root, insecurelyLowerMinimumTo=minTLS @@ -125,13 +129,13 @@ def __init__(self, config): self._no_verify_ssl_context = _no_verify_ssl.getContext() self._no_verify_ssl_context.set_info_callback(_context_info_cb) - self._should_verify = self._config.federation_verify_certificates + self._should_verify = self._config.tls.federation_verify_certificates self._federation_certificate_verification_whitelist = ( - self._config.federation_certificate_verification_whitelist + self._config.tls.federation_certificate_verification_whitelist ) - def get_options(self, host: bytes): + def get_options(self, host: bytes) -> IOpenSSLClientConnectionCreator: # IPolicyForHTTPS.get_options takes bytes, but we want to compare # against the str whitelist. The hostnames in the whitelist are already # IDNA-encoded like the hosts will be here. @@ -153,7 +157,9 @@ def get_options(self, host: bytes): return SSLClientConnectionCreator(host, ssl_context, should_verify) - def creatorForNetloc(self, hostname, port): + def creatorForNetloc( + self, hostname: bytes, port: int + ) -> IOpenSSLClientConnectionCreator: """Implements the IPolicyForHTTPS interface so that this can be passed directly to agents. """ @@ -169,16 +175,18 @@ class RegularPolicyForHTTPS: trust root. """ - def __init__(self): + def __init__(self) -> None: trust_root = platformTrust() self._ssl_context = CertificateOptions(trustRoot=trust_root).getContext() self._ssl_context.set_info_callback(_context_info_cb) - def creatorForNetloc(self, hostname, port): + def creatorForNetloc( + self, hostname: bytes, port: int + ) -> IOpenSSLClientConnectionCreator: return SSLClientConnectionCreator(hostname, self._ssl_context, True) -def _context_info_cb(ssl_connection, where, ret): +def _context_info_cb(ssl_connection: SSL.Connection, where: int, ret: int) -> None: """The 'information callback' for our openssl context objects. Note: Once this is set as the info callback on a Context object, the Context should @@ -204,11 +212,13 @@ class SSLClientConnectionCreator: Replaces twisted.internet.ssl.ClientTLSOptions """ - def __init__(self, hostname: bytes, ctx, verify_certs: bool): + def __init__(self, hostname: bytes, ctx: SSL.Context, verify_certs: bool): self._ctx = ctx self._verifier = ConnectionVerifier(hostname, verify_certs) - def clientConnectionForTLS(self, tls_protocol): + def clientConnectionForTLS( + self, tls_protocol: TLSMemoryBIOProtocol + ) -> SSL.Connection: context = self._ctx connection = SSL.Connection(context, None) @@ -219,7 +229,7 @@ def clientConnectionForTLS(self, tls_protocol): # ... and we also gut-wrench a '_synapse_tls_verifier' attribute into the # tls_protocol so that the SSL context's info callback has something to # call to do the cert verification. - tls_protocol._synapse_tls_verifier = self._verifier + tls_protocol._synapse_tls_verifier = self._verifier # type: ignore[attr-defined] return connection @@ -244,7 +254,9 @@ def __init__(self, hostname: bytes, verify_certs: bool): self._hostnameBytes = hostname self._hostnameASCII = self._hostnameBytes.decode("ascii") - def verify_context_info_cb(self, ssl_connection, where): + def verify_context_info_cb( + self, ssl_connection: SSL.Connection, where: int + ) -> None: if where & SSL.SSL_CB_HANDSHAKE_START and not self._is_ip_address: ssl_connection.set_tlsext_host_name(self._hostnameBytes) diff --git a/synapse/crypto/event_signing.py b/synapse/crypto/event_signing.py index 0f2b632e4738..7520647d1e1d 100644 --- a/synapse/crypto/event_signing.py +++ b/synapse/crypto/event_signing.py @@ -100,7 +100,7 @@ def compute_content_hash( def compute_event_reference_hash( - event, hash_algorithm: Hasher = hashlib.sha256 + event: EventBase, hash_algorithm: Hasher = hashlib.sha256 ) -> Tuple[str, bytes]: """Computes the event reference hash. This is the hash of the redacted event. diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 9e9b1c1c8630..993b04099e28 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -1,5 +1,4 @@ -# Copyright 2014-2016 OpenMarket Ltd -# Copyright 2017, 2018 New Vector Ltd +# Copyright 2014-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,6 +21,7 @@ from signedjson.key import ( decode_verify_key_bytes, encode_verify_key_base64, + get_verify_key, is_signing_algorithm_supported, ) from signedjson.sign import ( @@ -30,6 +30,7 @@ signature_ids, verify_signed_json, ) +from signedjson.types import VerifyKey from unpaddedbase64 import decode_base64 from twisted.internet import defer @@ -87,7 +88,7 @@ def from_json_object( server_name: str, json_object: JsonDict, minimum_valid_until_ms: int, - ): + ) -> "VerifyJsonRequest": """Create a VerifyJsonRequest to verify all signatures on a signed JSON object for the given server. """ @@ -104,7 +105,7 @@ def from_event( server_name: str, event: EventBase, minimum_valid_until_ms: int, - ): + ) -> "VerifyJsonRequest": """Create a VerifyJsonRequest to verify all signatures on an event object for the given server. """ @@ -118,16 +119,6 @@ def from_event( key_ids=key_ids, ) - def to_fetch_key_request(self) -> "_FetchKeyRequest": - """Create a key fetch request for all keys needed to satisfy the - verification request. - """ - return _FetchKeyRequest( - server_name=self.server_name, - minimum_valid_until_ts=self.minimum_valid_until_ts, - key_ids=self.key_ids, - ) - class KeyLookupError(ValueError): pass @@ -178,6 +169,22 @@ def __init__( process_batch_callback=self._inner_fetch_key_requests, ) + self._hostname = hs.hostname + + # build a FetchKeyResult for each of our own keys, to shortcircuit the + # fetcher. + self._local_verify_keys: Dict[str, FetchKeyResult] = {} + for key_id, key in hs.config.key.old_signing_keys.items(): + self._local_verify_keys[key_id] = FetchKeyResult( + verify_key=key, valid_until_ts=key.expired_ts + ) + + vk = get_verify_key(hs.signing_key) + self._local_verify_keys[f"{vk.alg}:{vk.version}"] = FetchKeyResult( + verify_key=vk, + valid_until_ts=2 ** 63, # fake future timestamp + ) + async def verify_json_for_server( self, server_name: str, @@ -196,6 +203,7 @@ async def verify_json_for_server( validity_time: timestamp at which we require the signing key to be valid. (0 implies we don't care) """ + request = VerifyJsonRequest.from_json_object( server_name, json_object, @@ -262,17 +270,32 @@ async def process_request(self, verify_request: VerifyJsonRequest) -> None: Codes.UNAUTHORIZED, ) - # Add the keys we need to verify to the queue for retrieval. We queue - # up requests for the same server so we don't end up with many in flight - # requests for the same keys. - key_request = verify_request.to_fetch_key_request() - found_keys_by_server = await self._server_queue.add_to_queue( - key_request, key=verify_request.server_name - ) + found_keys: Dict[str, FetchKeyResult] = {} - # Since we batch up requests the returned set of keys may contain keys - # from other servers, so we pull out only the ones we care about.s - found_keys = found_keys_by_server.get(verify_request.server_name, {}) + # If we are the originating server, short-circuit the key-fetch for any keys + # we already have + if verify_request.server_name == self._hostname: + for key_id in verify_request.key_ids: + if key_id in self._local_verify_keys: + found_keys[key_id] = self._local_verify_keys[key_id] + + key_ids_to_find = set(verify_request.key_ids) - found_keys.keys() + if key_ids_to_find: + # Add the keys we need to verify to the queue for retrieval. We queue + # up requests for the same server so we don't end up with many in flight + # requests for the same keys. + key_request = _FetchKeyRequest( + server_name=verify_request.server_name, + minimum_valid_until_ts=verify_request.minimum_valid_until_ts, + key_ids=list(key_ids_to_find), + ) + found_keys_by_server = await self._server_queue.add_to_queue( + key_request, key=verify_request.server_name + ) + + # Since we batch up requests the returned set of keys may contain keys + # from other servers, so we pull out only the ones we care about. + found_keys.update(found_keys_by_server.get(verify_request.server_name, {})) # Verify each signature we got valid keys for, raising if we can't # verify any of them. @@ -285,35 +308,8 @@ async def process_request(self, verify_request: VerifyJsonRequest) -> None: if key_result.valid_until_ts < verify_request.minimum_valid_until_ts: continue - verify_key = key_result.verify_key - json_object = verify_request.get_json_object() - try: - verify_signed_json( - json_object, - verify_request.server_name, - verify_key, - ) - verified = True - except SignatureVerifyException as e: - logger.debug( - "Error verifying signature for %s:%s:%s with key %s: %s", - verify_request.server_name, - verify_key.alg, - verify_key.version, - encode_verify_key_base64(verify_key), - str(e), - ) - raise SynapseError( - 401, - "Invalid signature for server %s with key %s:%s: %s" - % ( - verify_request.server_name, - verify_key.alg, - verify_key.version, - str(e), - ), - Codes.UNAUTHORIZED, - ) + await self._process_json(key_result.verify_key, verify_request) + verified = True if not verified: raise SynapseError( @@ -322,6 +318,39 @@ async def process_request(self, verify_request: VerifyJsonRequest) -> None: Codes.UNAUTHORIZED, ) + async def _process_json( + self, verify_key: VerifyKey, verify_request: VerifyJsonRequest + ) -> None: + """Processes the `VerifyJsonRequest`. Raises if the signature can't be + verified. + """ + try: + verify_signed_json( + verify_request.get_json_object(), + verify_request.server_name, + verify_key, + ) + except SignatureVerifyException as e: + logger.debug( + "Error verifying signature for %s:%s:%s with key %s: %s", + verify_request.server_name, + verify_key.alg, + verify_key.version, + encode_verify_key_base64(verify_key), + str(e), + ) + raise SynapseError( + 401, + "Invalid signature for server %s with key %s:%s: %s" + % ( + verify_request.server_name, + verify_key.alg, + verify_key.version, + str(e), + ), + Codes.UNAUTHORIZED, + ) + async def _inner_fetch_key_requests( self, requests: List[_FetchKeyRequest] ) -> Dict[str, Dict[str, FetchKeyResult]]: @@ -449,7 +478,9 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() - async def _fetch_keys(self, keys_to_fetch: List[_FetchKeyRequest]): + async def _fetch_keys( + self, keys_to_fetch: List[_FetchKeyRequest] + ) -> Dict[str, Dict[str, FetchKeyResult]]: key_ids_to_fetch = ( (queue_value.server_name, key_id) for queue_value in keys_to_fetch @@ -572,7 +603,7 @@ def __init__(self, hs: "HomeServer"): super().__init__(hs) self.clock = hs.get_clock() self.client = hs.get_federation_http_client() - self.key_servers = self.config.key_servers + self.key_servers = self.config.key.key_servers async def _fetch_keys( self, keys_to_fetch: List[_FetchKeyRequest] @@ -636,21 +667,25 @@ async def get_server_verify_key_v2_indirect( perspective_name, ) + request: JsonDict = {} + for queue_value in keys_to_fetch: + # there may be multiple requests for each server, so we have to merge + # them intelligently. + request_for_server = { + key_id: { + "minimum_valid_until_ts": queue_value.minimum_valid_until_ts, + } + for key_id in queue_value.key_ids + } + request.setdefault(queue_value.server_name, {}).update(request_for_server) + + logger.debug("Request to notary server %s: %s", perspective_name, request) + try: query_response = await self.client.post_json( destination=perspective_name, path="/_matrix/key/v2/query", - data={ - "server_keys": { - queue_value.server_name: { - key_id: { - "minimum_valid_until_ts": queue_value.minimum_valid_until_ts, - } - for key_id in queue_value.key_ids - } - for queue_value in keys_to_fetch - } - }, + data={"server_keys": request}, ) except (NotRetryingDestination, RequestSendFailed) as e: # these both have str() representations which we can't really improve upon @@ -658,6 +693,10 @@ async def get_server_verify_key_v2_indirect( except HttpResponseException as e: raise KeyLookupError("Remote server returned an error: %s" % (e,)) + logger.debug( + "Response from notary server %s: %s", perspective_name, query_response + ) + keys: Dict[str, Dict[str, FetchKeyResult]] = {} added_keys: List[Tuple[str, str, FetchKeyResult]] = [] diff --git a/synapse/event_auth.py b/synapse/event_auth.py index c3a0c10499d9..e88596169862 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -14,14 +14,20 @@ # limitations under the License. import logging -from typing import Any, Dict, List, Optional, Set, Tuple, Union +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union from canonicaljson import encode_canonical_json from signedjson.key import decode_verify_key_bytes from signedjson.sign import SignatureVerifyException, verify_signed_json from unpaddedbase64 import decode_base64 -from synapse.api.constants import MAX_PDU_SIZE, EventTypes, JoinRules, Membership +from synapse.api.constants import ( + MAX_PDU_SIZE, + EventContentFields, + EventTypes, + JoinRules, + Membership, +) from synapse.api.errors import AuthError, EventSizeError, SynapseError from synapse.api.room_versions import ( KNOWN_ROOM_VERSIONS, @@ -35,43 +41,111 @@ logger = logging.getLogger(__name__) -def check( - room_version_obj: RoomVersion, - event: EventBase, - auth_events: StateMap[EventBase], - do_sig_check: bool = True, - do_size_check: bool = True, +def validate_event_for_room_version( + room_version_obj: RoomVersion, event: EventBase ) -> None: - """Checks if this event is correctly authed. + """Ensure that the event complies with the limits, and has the right signatures + + NB: does not *validate* the signatures - it assumes that any signatures present + have already been checked. + + NB: it does not check that the event satisfies the auth rules (that is done in + check_auth_rules_for_event) - these tests are independent of the rest of the state + in the room. + + NB: This is used to check events that have been received over federation. As such, + it can only enforce the checks specified in the relevant room version, to avoid + a split-brain situation where some servers accept such events, and others reject + them. + + TODO: consider moving this into EventValidator Args: - room_version_obj: the version of the room - event: the event being checked. - auth_events: the existing room state. - do_sig_check: True if it should be verified that the sending server - signed the event. - do_size_check: True if the size of the event fields should be verified. + room_version_obj: the version of the room which contains this event + event: the event to be checked Raises: - AuthError if the checks fail - - Returns: - if the auth checks pass. + SynapseError if there is a problem with the event """ - assert isinstance(auth_events, dict) - - if do_size_check: - _check_size_limits(event) + _check_size_limits(event) if not hasattr(event, "room_id"): raise AuthError(500, "Event has no room_id: %s" % event) - room_id = event.room_id + # check that the event has the correct signatures + sender_domain = get_domain_from_id(event.sender) + + is_invite_via_3pid = ( + event.type == EventTypes.Member + and event.membership == Membership.INVITE + and "third_party_invite" in event.content + ) + + # Check the sender's domain has signed the event + if not event.signatures.get(sender_domain): + # We allow invites via 3pid to have a sender from a different + # HS, as the sender must match the sender of the original + # 3pid invite. This is checked further down with the + # other dedicated membership checks. + if not is_invite_via_3pid: + raise AuthError(403, "Event not signed by sender's server") + + if event.format_version in (EventFormatVersions.V1,): + # Only older room versions have event IDs to check. + event_id_domain = get_domain_from_id(event.event_id) + + # Check the origin domain has signed the event + if not event.signatures.get(event_id_domain): + raise AuthError(403, "Event not signed by sending server") + + is_invite_via_allow_rule = ( + room_version_obj.msc3083_join_rules + and event.type == EventTypes.Member + and event.membership == Membership.JOIN + and EventContentFields.AUTHORISING_USER in event.content + ) + if is_invite_via_allow_rule: + authoriser_domain = get_domain_from_id( + event.content[EventContentFields.AUTHORISING_USER] + ) + if not event.signatures.get(authoriser_domain): + raise AuthError(403, "Event not signed by authorising server") + + +def check_auth_rules_for_event( + room_version_obj: RoomVersion, event: EventBase, auth_events: Iterable[EventBase] +) -> None: + """Check that an event complies with the auth rules + + Checks whether an event passes the auth rules with a given set of state events + + Assumes that we have already checked that the event is the right shape (it has + enough signatures, has a room ID, etc). In other words: + - it's fine for use in state resolution, when we have already decided whether to + accept the event or not, and are now trying to decide whether it should make it + into the room state + + - when we're doing the initial event auth, it is only suitable in combination with + a bunch of other tests. + + Args: + room_version_obj: the version of the room + event: the event being checked. + auth_events: the room state to check the events against. + + Raises: + AuthError if the checks fail + """ # We need to ensure that the auth events are actually for the same room, to # stop people from using powers they've been granted in other rooms for # example. - for auth_event in auth_events.values(): + # + # Arguably we don't need to do this when we're just doing state res, as presumably + # the state res algorithm isn't silly enough to give us events from different rooms. + # Still, it's easier to do it anyway. + room_id = event.room_id + for auth_event in auth_events: if auth_event.room_id != room_id: raise AuthError( 403, @@ -79,44 +153,12 @@ def check( "which is in room %s" % (event.event_id, room_id, auth_event.event_id, auth_event.room_id), ) - - if do_sig_check: - sender_domain = get_domain_from_id(event.sender) - - is_invite_via_3pid = ( - event.type == EventTypes.Member - and event.membership == Membership.INVITE - and "third_party_invite" in event.content - ) - - # Check the sender's domain has signed the event - if not event.signatures.get(sender_domain): - # We allow invites via 3pid to have a sender from a different - # HS, as the sender must match the sender of the original - # 3pid invite. This is checked further down with the - # other dedicated membership checks. - if not is_invite_via_3pid: - raise AuthError(403, "Event not signed by sender's server") - - if event.format_version in (EventFormatVersions.V1,): - # Only older room versions have event IDs to check. - event_id_domain = get_domain_from_id(event.event_id) - - # Check the origin domain has signed the event - if not event.signatures.get(event_id_domain): - raise AuthError(403, "Event not signed by sending server") - - is_invite_via_allow_rule = ( - event.type == EventTypes.Member - and event.membership == Membership.JOIN - and "join_authorised_via_users_server" in event.content - ) - if is_invite_via_allow_rule: - authoriser_domain = get_domain_from_id( - event.content["join_authorised_via_users_server"] + if auth_event.rejected_reason: + raise AuthError( + 403, + "During auth for event %s: found rejected event %s in the state" + % (event.event_id, auth_event.event_id), ) - if not event.signatures.get(authoriser_domain): - raise AuthError(403, "Event not signed by authorising server") # Implementation of https://matrix.org/docs/spec/rooms/v1#authorization-rules # @@ -142,8 +184,10 @@ def check( logger.debug("Allowing! %s", event) return + auth_dict = {(e.type, e.state_key): e for e in auth_events} + # 3. If event does not have a m.room.create in its auth_events, reject. - creation_event = auth_events.get((EventTypes.Create, ""), None) + creation_event = auth_dict.get((EventTypes.Create, ""), None) if not creation_event: raise AuthError(403, "No create event in auth events") @@ -151,7 +195,7 @@ def check( creating_domain = get_domain_from_id(event.room_id) originating_domain = get_domain_from_id(event.sender) if creating_domain != originating_domain: - if not _can_federate(event, auth_events): + if not _can_federate(event, auth_dict): raise AuthError(403, "This room has been marked as unfederatable.") # 4. If type is m.room.aliases @@ -173,23 +217,20 @@ def check( logger.debug("Allowing! %s", event) return - if logger.isEnabledFor(logging.DEBUG): - logger.debug("Auth events: %s", [a.event_id for a in auth_events.values()]) - # 5. If type is m.room.membership if event.type == EventTypes.Member: - _is_membership_change_allowed(room_version_obj, event, auth_events) + _is_membership_change_allowed(room_version_obj, event, auth_dict) logger.debug("Allowing! %s", event) return - _check_event_sender_in_room(event, auth_events) + _check_event_sender_in_room(event, auth_dict) # Special case to allow m.room.third_party_invite events wherever # a user is allowed to issue invites. Fixes # https://github.com/vector-im/vector-web/issues/1208 hopefully if event.type == EventTypes.ThirdPartyInvite: - user_level = get_user_power_level(event.user_id, auth_events) - invite_level = get_named_level(auth_events, "invite", 0) + user_level = get_user_power_level(event.user_id, auth_dict) + invite_level = get_named_level(auth_dict, "invite", 0) if user_level < invite_level: raise AuthError(403, "You don't have permission to invite users") @@ -197,40 +238,37 @@ def check( logger.debug("Allowing! %s", event) return - _can_send_event(event, auth_events) + _can_send_event(event, auth_dict) if event.type == EventTypes.PowerLevels: - _check_power_levels(room_version_obj, event, auth_events) + _check_power_levels(room_version_obj, event, auth_dict) if event.type == EventTypes.Redaction: - check_redaction(room_version_obj, event, auth_events) + check_redaction(room_version_obj, event, auth_dict) if ( event.type == EventTypes.MSC2716_INSERTION - or event.type == EventTypes.MSC2716_CHUNK + or event.type == EventTypes.MSC2716_BATCH or event.type == EventTypes.MSC2716_MARKER ): - check_historical(room_version_obj, event, auth_events) + check_historical(room_version_obj, event, auth_dict) logger.debug("Allowing! %s", event) def _check_size_limits(event: EventBase) -> None: - def too_big(field): - raise EventSizeError("%s too large" % (field,)) - if len(event.user_id) > 255: - too_big("user_id") + raise EventSizeError("'user_id' too large") if len(event.room_id) > 255: - too_big("room_id") + raise EventSizeError("'room_id' too large") if event.is_state() and len(event.state_key) > 255: - too_big("state_key") + raise EventSizeError("'state_key' too large") if len(event.type) > 255: - too_big("type") + raise EventSizeError("'type' too large") if len(event.event_id) > 255: - too_big("event_id") + raise EventSizeError("'event_id' too large") if len(encode_canonical_json(event.get_pdu_json())) > MAX_PDU_SIZE: - too_big("event") + raise EventSizeError("event too large") def _can_federate(event: EventBase, auth_events: StateMap[EventBase]) -> bool: @@ -239,7 +277,7 @@ def _can_federate(event: EventBase, auth_events: StateMap[EventBase]) -> bool: if not creation_event: return False - return creation_event.content.get("m.federate", True) is True + return creation_event.content.get(EventContentFields.FEDERATE, True) is True def _is_membership_change_allowed( @@ -378,7 +416,9 @@ def _is_membership_change_allowed( # Note that if the caller is in the room or invited, then they do # not need to meet the allow rules. if not caller_in_room and not caller_invited: - authorising_user = event.content.get("join_authorised_via_users_server") + authorising_user = event.content.get( + EventContentFields.AUTHORISING_USER + ) if authorising_user is None: raise AuthError(403, "Join event is missing authorising user.") @@ -549,14 +589,14 @@ def check_historical( auth_events: StateMap[EventBase], ) -> None: """Check whether the event sender is allowed to send historical related - events like "insertion", "chunk", and "marker". + events like "insertion", "batch", and "marker". Returns: None Raises: AuthError if the event sender is not allowed to send historical related events - ("insertion", "chunk", and "marker"). + ("insertion", "batch", and "marker"). """ # Ignore the auth checks in room versions that do not support historical # events @@ -570,7 +610,7 @@ def check_historical( if user_level < historical_level: raise AuthError( 403, - 'You don\'t have permission to send send historical related events ("insertion", "chunk", and "marker")', + 'You don\'t have permission to send send historical related events ("insertion", "batch", and "marker")', ) @@ -833,10 +873,10 @@ def auth_types_for_event( auth_types.add(key) if room_version.msc3083_join_rules and membership == Membership.JOIN: - if "join_authorised_via_users_server" in event.content: + if EventContentFields.AUTHORISING_USER in event.content: key = ( EventTypes.Member, - event.content["join_authorised_via_users_server"], + event.content[EventContentFields.AUTHORISING_USER], ) auth_types.add(key) diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 0298af4c02d7..38f3cf4d330d 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -16,8 +16,23 @@ import abc import os -from typing import Dict, Optional, Tuple, Type - +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Generic, + Iterable, + List, + Optional, + Sequence, + Tuple, + Type, + TypeVar, + Union, + overload, +) + +from typing_extensions import Literal from unpaddedbase64 import encode_base64 from synapse.api.room_versions import EventFormatVersions, RoomVersion, RoomVersions @@ -26,6 +41,9 @@ from synapse.util.frozenutils import freeze from synapse.util.stringutils import strtobool +if TYPE_CHECKING: + from synapse.events.builder import EventBuilder + # Whether we should use frozen_dict in FrozenEvent. Using frozen_dicts prevents # bugs where we accidentally share e.g. signature dicts. However, converting a # dict to frozen_dicts is expensive. @@ -37,7 +55,23 @@ USE_FROZEN_DICTS = strtobool(os.environ.get("SYNAPSE_USE_FROZEN_DICTS", "0")) -class DictProperty: +T = TypeVar("T") + + +# DictProperty (and DefaultDictProperty) require the classes they're used with to +# have a _dict property to pull properties from. +# +# TODO _DictPropertyInstance should not include EventBuilder but due to +# https://github.com/python/mypy/issues/5570 it thinks the DictProperty and +# DefaultDictProperty get applied to EventBuilder when it is in a Union with +# EventBase. This is the least invasive hack to get mypy to comply. +# +# Note that DictProperty/DefaultDictProperty cannot actually be used with +# EventBuilder as it lacks a _dict property. +_DictPropertyInstance = Union["_EventInternalMetadata", "EventBase", "EventBuilder"] + + +class DictProperty(Generic[T]): """An object property which delegates to the `_dict` within its parent object.""" __slots__ = ["key"] @@ -45,12 +79,33 @@ class DictProperty: def __init__(self, key: str): self.key = key - def __get__(self, instance, owner=None): + @overload + def __get__( + self, + instance: Literal[None], + owner: Optional[Type[_DictPropertyInstance]] = None, + ) -> "DictProperty": + ... + + @overload + def __get__( + self, + instance: _DictPropertyInstance, + owner: Optional[Type[_DictPropertyInstance]] = None, + ) -> T: + ... + + def __get__( + self, + instance: Optional[_DictPropertyInstance], + owner: Optional[Type[_DictPropertyInstance]] = None, + ) -> Union[T, "DictProperty"]: # if the property is accessed as a class property rather than an instance # property, return the property itself rather than the value if instance is None: return self try: + assert isinstance(instance, (EventBase, _EventInternalMetadata)) return instance._dict[self.key] except KeyError as e1: # We want this to look like a regular attribute error (mostly so that @@ -65,10 +120,12 @@ def __get__(self, instance, owner=None): "'%s' has no '%s' property" % (type(instance), self.key) ) from e1.__context__ - def __set__(self, instance, v): + def __set__(self, instance: _DictPropertyInstance, v: T) -> None: + assert isinstance(instance, (EventBase, _EventInternalMetadata)) instance._dict[self.key] = v - def __delete__(self, instance): + def __delete__(self, instance: _DictPropertyInstance) -> None: + assert isinstance(instance, (EventBase, _EventInternalMetadata)) try: del instance._dict[self.key] except KeyError as e1: @@ -77,7 +134,7 @@ def __delete__(self, instance): ) from e1.__context__ -class DefaultDictProperty(DictProperty): +class DefaultDictProperty(DictProperty, Generic[T]): """An extension of DictProperty which provides a default if the property is not present in the parent's _dict. @@ -86,13 +143,34 @@ class DefaultDictProperty(DictProperty): __slots__ = ["default"] - def __init__(self, key, default): + def __init__(self, key: str, default: T): super().__init__(key) self.default = default - def __get__(self, instance, owner=None): + @overload + def __get__( + self, + instance: Literal[None], + owner: Optional[Type[_DictPropertyInstance]] = None, + ) -> "DefaultDictProperty": + ... + + @overload + def __get__( + self, + instance: _DictPropertyInstance, + owner: Optional[Type[_DictPropertyInstance]] = None, + ) -> T: + ... + + def __get__( + self, + instance: Optional[_DictPropertyInstance], + owner: Optional[Type[_DictPropertyInstance]] = None, + ) -> Union[T, "DefaultDictProperty"]: if instance is None: return self + assert isinstance(instance, (EventBase, _EventInternalMetadata)) return instance._dict.get(self.key, self.default) @@ -111,22 +189,22 @@ def __init__(self, internal_metadata_dict: JsonDict): # in the DAG) self.outlier = False - out_of_band_membership: bool = DictProperty("out_of_band_membership") - send_on_behalf_of: str = DictProperty("send_on_behalf_of") - recheck_redaction: bool = DictProperty("recheck_redaction") - soft_failed: bool = DictProperty("soft_failed") - proactively_send: bool = DictProperty("proactively_send") - redacted: bool = DictProperty("redacted") - txn_id: str = DictProperty("txn_id") - token_id: int = DictProperty("token_id") - historical: bool = DictProperty("historical") + out_of_band_membership: DictProperty[bool] = DictProperty("out_of_band_membership") + send_on_behalf_of: DictProperty[str] = DictProperty("send_on_behalf_of") + recheck_redaction: DictProperty[bool] = DictProperty("recheck_redaction") + soft_failed: DictProperty[bool] = DictProperty("soft_failed") + proactively_send: DictProperty[bool] = DictProperty("proactively_send") + redacted: DictProperty[bool] = DictProperty("redacted") + txn_id: DictProperty[str] = DictProperty("txn_id") + token_id: DictProperty[int] = DictProperty("token_id") + historical: DictProperty[bool] = DictProperty("historical") # XXX: These are set by StreamWorkerStore._set_before_and_after. # I'm pretty sure that these are never persisted to the database, so shouldn't # be here - before: RoomStreamToken = DictProperty("before") - after: RoomStreamToken = DictProperty("after") - order: Tuple[int, int] = DictProperty("order") + before: DictProperty[RoomStreamToken] = DictProperty("before") + after: DictProperty[RoomStreamToken] = DictProperty("after") + order: DictProperty[Tuple[int, int]] = DictProperty("order") def get_dict(self) -> JsonDict: return dict(self._dict) @@ -162,9 +240,6 @@ def need_to_check_redaction(self) -> bool: If the sender of the redaction event is allowed to redact any event due to auth rules, then this will always return false. - - Returns: - bool """ return self._dict.get("recheck_redaction", False) @@ -176,32 +251,23 @@ def is_soft_failed(self) -> bool: sent to clients. 2. They should not be added to the forward extremities (and therefore not to current state). - - Returns: - bool """ return self._dict.get("soft_failed", False) - def should_proactively_send(self): + def should_proactively_send(self) -> bool: """Whether the event, if ours, should be sent to other clients and servers. This is used for sending dummy events internally. Servers and clients can still explicitly fetch the event. - - Returns: - bool """ return self._dict.get("proactively_send", True) - def is_redacted(self): + def is_redacted(self) -> bool: """Whether the event has been redacted. This is used for efficiently checking whether an event has been marked as redacted without needing to make another database call. - - Returns: - bool """ return self._dict.get("redacted", False) @@ -241,29 +307,31 @@ def __init__( self.internal_metadata = _EventInternalMetadata(internal_metadata_dict) - auth_events = DictProperty("auth_events") - depth = DictProperty("depth") - content = DictProperty("content") - hashes = DictProperty("hashes") - origin = DictProperty("origin") - origin_server_ts = DictProperty("origin_server_ts") - prev_events = DictProperty("prev_events") - redacts = DefaultDictProperty("redacts", None) - room_id = DictProperty("room_id") - sender = DictProperty("sender") - state_key = DictProperty("state_key") - type = DictProperty("type") - user_id = DictProperty("sender") + depth: DictProperty[int] = DictProperty("depth") + content: DictProperty[JsonDict] = DictProperty("content") + hashes: DictProperty[Dict[str, str]] = DictProperty("hashes") + origin: DictProperty[str] = DictProperty("origin") + origin_server_ts: DictProperty[int] = DictProperty("origin_server_ts") + redacts: DefaultDictProperty[Optional[str]] = DefaultDictProperty("redacts", None) + room_id: DictProperty[str] = DictProperty("room_id") + sender: DictProperty[str] = DictProperty("sender") + # TODO state_key should be Optional[str], this is generally asserted in Synapse + # by calling is_state() first (which ensures this), but it is hard (not possible?) + # to properly annotate that calling is_state() asserts that state_key exists + # and is non-None. + state_key: DictProperty[str] = DictProperty("state_key") + type: DictProperty[str] = DictProperty("type") + user_id: DictProperty[str] = DictProperty("sender") @property def event_id(self) -> str: raise NotImplementedError() @property - def membership(self): + def membership(self) -> str: return self.content["membership"] - def is_state(self): + def is_state(self) -> bool: return hasattr(self, "state_key") and self.state_key is not None def get_dict(self) -> JsonDict: @@ -272,13 +340,13 @@ def get_dict(self) -> JsonDict: return d - def get(self, key, default=None): + def get(self, key: str, default: Optional[Any] = None) -> Any: return self._dict.get(key, default) - def get_internal_metadata_dict(self): + def get_internal_metadata_dict(self) -> JsonDict: return self.internal_metadata.get_dict() - def get_pdu_json(self, time_now=None) -> JsonDict: + def get_pdu_json(self, time_now: Optional[int] = None) -> JsonDict: pdu_json = self.get_dict() if time_now is not None and "age_ts" in pdu_json["unsigned"]: @@ -305,45 +373,58 @@ def get_templated_pdu_json(self) -> JsonDict: return template_json - def __set__(self, instance, value): - raise AttributeError("Unrecognized attribute %s" % (instance,)) - - def __getitem__(self, field): + def __getitem__(self, field: str) -> Optional[Any]: return self._dict[field] - def __contains__(self, field): + def __contains__(self, field: str) -> bool: return field in self._dict - def items(self): + def items(self) -> List[Tuple[str, Optional[Any]]]: return list(self._dict.items()) - def keys(self): + def keys(self) -> Iterable[str]: return self._dict.keys() - def prev_event_ids(self): + def prev_event_ids(self) -> Sequence[str]: """Returns the list of prev event IDs. The order matches the order specified in the event, though there is no meaning to it. Returns: - list[str]: The list of event IDs of this event's prev_events + The list of event IDs of this event's prev_events """ - return [e for e, _ in self.prev_events] + return [e for e, _ in self._dict["prev_events"]] - def auth_event_ids(self): + def auth_event_ids(self) -> Sequence[str]: """Returns the list of auth event IDs. The order matches the order specified in the event, though there is no meaning to it. Returns: - list[str]: The list of event IDs of this event's auth_events + The list of event IDs of this event's auth_events """ - return [e for e, _ in self.auth_events] + return [e for e, _ in self._dict["auth_events"]] - def freeze(self): + def freeze(self) -> None: """'Freeze' the event dict, so it cannot be modified by accident""" # this will be a no-op if the event dict is already frozen. self._dict = freeze(self._dict) + def __str__(self) -> str: + return self.__repr__() + + def __repr__(self) -> str: + rejection = f"REJECTED={self.rejected_reason}, " if self.rejected_reason else "" + + return ( + f"<{self.__class__.__name__} " + f"{rejection}" + f"event_id={self.event_id}, " + f"type={self.get('type')}, " + f"state_key={self.get('state_key')}, " + f"outlier={self.internal_metadata.is_outlier()}" + ">" + ) + class FrozenEvent(EventBase): format_version = EventFormatVersions.V1 # All events of this type are V1 @@ -392,16 +473,6 @@ def __init__( def event_id(self) -> str: return self._event_id - def __str__(self): - return self.__repr__() - - def __repr__(self): - return "" % ( - self.get("event_id", None), - self.get("type", None), - self.get("state_key", None), - ) - class FrozenEventV2(EventBase): format_version = EventFormatVersions.V2 # All events of this type are V2 @@ -437,7 +508,7 @@ def __init__( else: frozen_dict = event_dict - self._event_id = None + self._event_id: Optional[str] = None super().__init__( frozen_dict, @@ -449,7 +520,7 @@ def __init__( ) @property - def event_id(self): + def event_id(self) -> str: # We have to import this here as otherwise we get an import loop which # is hard to break. from synapse.crypto.event_signing import compute_event_reference_hash @@ -459,34 +530,23 @@ def event_id(self): self._event_id = "$" + encode_base64(compute_event_reference_hash(self)[1]) return self._event_id - def prev_event_ids(self): + def prev_event_ids(self) -> Sequence[str]: """Returns the list of prev event IDs. The order matches the order specified in the event, though there is no meaning to it. Returns: - list[str]: The list of event IDs of this event's prev_events + The list of event IDs of this event's prev_events """ - return self.prev_events + return self._dict["prev_events"] - def auth_event_ids(self): + def auth_event_ids(self) -> Sequence[str]: """Returns the list of auth event IDs. The order matches the order specified in the event, though there is no meaning to it. Returns: - list[str]: The list of event IDs of this event's auth_events + The list of event IDs of this event's auth_events """ - return self.auth_events - - def __str__(self): - return self.__repr__() - - def __repr__(self): - return "<%s event_id=%r, type=%r, state_key=%r>" % ( - self.__class__.__name__, - self.event_id, - self.get("type", None), - self.get("state_key", None), - ) + return self._dict["auth_events"] class FrozenEventV3(FrozenEventV2): @@ -495,7 +555,7 @@ class FrozenEventV3(FrozenEventV2): format_version = EventFormatVersions.V3 # All events of this type are V3 @property - def event_id(self): + def event_id(self) -> str: # We have to import this here as otherwise we get an import loop which # is hard to break. from synapse.crypto.event_signing import compute_event_reference_hash @@ -508,12 +568,14 @@ def event_id(self): return self._event_id -def _event_type_from_format_version(format_version: int) -> Type[EventBase]: +def _event_type_from_format_version( + format_version: int, +) -> Type[Union[FrozenEvent, FrozenEventV2, FrozenEventV3]]: """Returns the python type to use to construct an Event object for the given event format version. Args: - format_version (int): The event format version + format_version: The event format version Returns: type: A type that can be initialized as per the initializer of diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 87e2bb123b6d..eb39e0ae3291 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -18,10 +18,8 @@ from nacl.signing import SigningKey from synapse.api.constants import MAX_DEPTH -from synapse.api.errors import UnsupportedRoomVersionError from synapse.api.room_versions import ( KNOWN_EVENT_FORMAT_VERSIONS, - KNOWN_ROOM_VERSIONS, EventFormatVersions, RoomVersion, ) @@ -92,13 +90,13 @@ class EventBuilder: ) @property - def state_key(self): + def state_key(self) -> str: if self._state_key is not None: return self._state_key raise AttributeError("state_key") - def is_state(self): + def is_state(self) -> bool: return self._state_key is not None async def build( @@ -130,14 +128,12 @@ async def build( ) format_version = self.room_version.event_format + # The types of auth/prev events changes between event versions. + prev_events: Union[List[str], List[Tuple[str, Dict[str, str]]]] + auth_events: Union[List[str], List[Tuple[str, Dict[str, str]]]] if format_version == EventFormatVersions.V1: - # The types of auth/prev events changes between event versions. - auth_events: Union[ - List[str], List[Tuple[str, Dict[str, str]]] - ] = await self._store.add_event_hashes(auth_event_ids) - prev_events: Union[ - List[str], List[Tuple[str, Dict[str, str]]] - ] = await self._store.add_event_hashes(prev_event_ids) + auth_events = await self._store.add_event_hashes(auth_event_ids) + prev_events = await self._store.add_event_hashes(prev_event_ids) else: auth_events = auth_event_ids prev_events = prev_event_ids @@ -197,24 +193,6 @@ def __init__(self, hs: "HomeServer"): self.state = hs.get_state_handler() self._event_auth_handler = hs.get_event_auth_handler() - def new(self, room_version: str, key_values: dict) -> EventBuilder: - """Generate an event builder appropriate for the given room version - - Deprecated: use for_room_version with a RoomVersion object instead - - Args: - room_version: Version of the room that we're creating an event builder for - key_values: Fields used as the basis of the new event - - Returns: - EventBuilder - """ - v = KNOWN_ROOM_VERSIONS.get(room_version) - if not v: - # this can happen if support is withdrawn for a room version - raise UnsupportedRoomVersionError() - return self.for_room_version(v, key_values) - def for_room_version( self, room_version: RoomVersion, key_values: dict ) -> EventBuilder: diff --git a/synapse/events/presence_router.py b/synapse/events/presence_router.py index 6c37c8a7a430..a58f313e8b1c 100644 --- a/synapse/events/presence_router.py +++ b/synapse/events/presence_router.py @@ -11,45 +11,116 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from typing import TYPE_CHECKING, Dict, Iterable, Set, Union +import logging +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Dict, + Iterable, + List, + Optional, + Set, + Union, +) from synapse.api.presence import UserPresenceState +from synapse.util.async_helpers import maybe_awaitable if TYPE_CHECKING: from synapse.server import HomeServer +GET_USERS_FOR_STATES_CALLBACK = Callable[ + [Iterable[UserPresenceState]], Awaitable[Dict[str, Set[UserPresenceState]]] +] +# This must either return a set of strings or the constant PresenceRouter.ALL_USERS. +GET_INTERESTED_USERS_CALLBACK = Callable[[str], Awaitable[Union[Set[str], str]]] + +logger = logging.getLogger(__name__) + + +def load_legacy_presence_router(hs: "HomeServer") -> None: + """Wrapper that loads a presence router module configured using the old + configuration, and registers the hooks they implement. + """ + + if hs.config.server.presence_router_module_class is None: + return + + module = hs.config.server.presence_router_module_class + config = hs.config.server.presence_router_config + api = hs.get_module_api() + + presence_router = module(config=config, module_api=api) + + # The known hooks. If a module implements a method which name appears in this set, + # we'll want to register it. + presence_router_methods = { + "get_users_for_states", + "get_interested_users", + } + + # All methods that the module provides should be async, but this wasn't enforced + # in the old module system, so we wrap them if needed + def async_wrapper(f: Optional[Callable]) -> Optional[Callable[..., Awaitable]]: + # f might be None if the callback isn't implemented by the module. In this + # case we don't want to register a callback at all so we return None. + if f is None: + return None + + def run(*args: Any, **kwargs: Any) -> Awaitable: + # Assertion required because mypy can't prove we won't change `f` + # back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions + assert f is not None + + return maybe_awaitable(f(*args, **kwargs)) + + return run + + # Register the hooks through the module API. + hooks = { + hook: async_wrapper(getattr(presence_router, hook, None)) + for hook in presence_router_methods + } + + api.register_presence_router_callbacks(**hooks) + class PresenceRouter: """ A module that the homeserver will call upon to help route user presence updates to - additional destinations. If a custom presence router is configured, calls will be - passed to that instead. + additional destinations. """ ALL_USERS = "ALL" def __init__(self, hs: "HomeServer"): - self.custom_presence_router = None + # Initially there are no callbacks + self._get_users_for_states_callbacks: List[GET_USERS_FOR_STATES_CALLBACK] = [] + self._get_interested_users_callbacks: List[GET_INTERESTED_USERS_CALLBACK] = [] - # Check whether a custom presence router module has been configured - if hs.config.presence_router_module_class: - # Initialise the module - self.custom_presence_router = hs.config.presence_router_module_class( - config=hs.config.presence_router_config, module_api=hs.get_module_api() + def register_presence_router_callbacks( + self, + get_users_for_states: Optional[GET_USERS_FOR_STATES_CALLBACK] = None, + get_interested_users: Optional[GET_INTERESTED_USERS_CALLBACK] = None, + ) -> None: + # PresenceRouter modules are required to implement both of these methods + # or neither of them as they are assumed to act in a complementary manner + paired_methods = [get_users_for_states, get_interested_users] + if paired_methods.count(None) == 1: + raise RuntimeError( + "PresenceRouter modules must register neither or both of the paired callbacks: " + "[get_users_for_states, get_interested_users]" ) - # Ensure the module has implemented the required methods - required_methods = ["get_users_for_states", "get_interested_users"] - for method_name in required_methods: - if not hasattr(self.custom_presence_router, method_name): - raise Exception( - "PresenceRouter module '%s' must implement all required methods: %s" - % ( - hs.config.presence_router_module_class.__name__, - ", ".join(required_methods), - ) - ) + # Append the methods provided to the lists of callbacks + if get_users_for_states is not None: + self._get_users_for_states_callbacks.append(get_users_for_states) + + if get_interested_users is not None: + self._get_interested_users_callbacks.append(get_interested_users) async def get_users_for_states( self, @@ -66,16 +137,42 @@ async def get_users_for_states( A dictionary of user_id -> set of UserPresenceState, indicating which presence updates each user should receive. """ - if self.custom_presence_router is not None: - # Ask the custom module - return await self.custom_presence_router.get_users_for_states( - state_updates=state_updates - ) - # Don't include any extra destinations for presence updates - return {} + # Bail out early if we don't have any callbacks to run. + if len(self._get_users_for_states_callbacks) == 0: + # Don't include any extra destinations for presence updates + return {} + + users_for_states: Dict[str, Set[UserPresenceState]] = {} + # run all the callbacks for get_users_for_states and combine the results + for callback in self._get_users_for_states_callbacks: + try: + result = await callback(state_updates) + except Exception as e: + logger.warning("Failed to run module API callback %s: %s", callback, e) + continue + + if not isinstance(result, Dict): + logger.warning( + "Wrong type returned by module API callback %s: %s, expected Dict", + callback, + result, + ) + continue + + for key, new_entries in result.items(): + if not isinstance(new_entries, Set): + logger.warning( + "Wrong type returned by module API callback %s: %s, expected Set", + callback, + new_entries, + ) + break + users_for_states.setdefault(key, set()).update(new_entries) - async def get_interested_users(self, user_id: str) -> Union[Set[str], ALL_USERS]: + return users_for_states + + async def get_interested_users(self, user_id: str) -> Union[Set[str], str]: """ Retrieve a list of users that `user_id` is interested in receiving the presence of. This will be in addition to those they share a room with. @@ -92,12 +189,36 @@ async def get_interested_users(self, user_id: str) -> Union[Set[str], ALL_USERS] A set of user IDs to return presence updates for, or ALL_USERS to return all known updates. """ - if self.custom_presence_router is not None: - # Ask the custom module for interested users - return await self.custom_presence_router.get_interested_users( - user_id=user_id - ) - # A custom presence router is not defined. - # Don't report any additional interested users - return set() + # Bail out early if we don't have any callbacks to run. + if len(self._get_interested_users_callbacks) == 0: + # Don't report any additional interested users + return set() + + interested_users = set() + # run all the callbacks for get_interested_users and combine the results + for callback in self._get_interested_users_callbacks: + try: + result = await callback(user_id) + except Exception as e: + logger.warning("Failed to run module API callback %s: %s", callback, e) + continue + + # If one of the callbacks returns ALL_USERS then we can stop calling all + # of the other callbacks, since the set of interested_users is already as + # large as it can possibly be + if result == PresenceRouter.ALL_USERS: + return PresenceRouter.ALL_USERS + + if not isinstance(result, Set): + logger.warning( + "Wrong type returned by module API callback %s: %s, expected set", + callback, + result, + ) + continue + + # Add the new interested users to the set + interested_users.update(result) + + return interested_users diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index f8d898c3b1d1..f251402ed8f2 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -11,17 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Tuple, Union import attr from frozendict import frozendict +from twisted.internet.defer import Deferred + from synapse.appservice import ApplicationService from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background -from synapse.types import StateMap +from synapse.types import JsonDict, StateMap if TYPE_CHECKING: + from synapse.storage import Storage from synapse.storage.databases.main import DataStore @@ -80,9 +83,7 @@ class EventContext: (type, state_key) -> event_id - FIXME: what is this for an outlier? it seems ill-defined. It seems like - it could be either {}, or the state we were given by the remote - server, depending on $THINGS + For an outlier, this is {} Note that this is a private attribute: it should be accessed via ``get_current_state_ids``. _AsyncEventContext impl calculates this @@ -96,7 +97,7 @@ class EventContext: (type, state_key) -> event_id - FIXME: again, what is this for an outlier? + For an outlier, this is {} As with _current_state_ids, this is a private attribute. It should be accessed via get_prev_state_ids. @@ -114,13 +115,13 @@ class EventContext: @staticmethod def with_state( - state_group, - state_group_before_event, - current_state_ids, - prev_state_ids, - prev_group=None, - delta_ids=None, - ): + state_group: Optional[int], + state_group_before_event: Optional[int], + current_state_ids: Optional[StateMap[str]], + prev_state_ids: Optional[StateMap[str]], + prev_group: Optional[int] = None, + delta_ids: Optional[StateMap[str]] = None, + ) -> "EventContext": return EventContext( current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, @@ -130,15 +131,23 @@ def with_state( delta_ids=delta_ids, ) - async def serialize(self, event: EventBase, store: "DataStore") -> dict: + @staticmethod + def for_outlier() -> "EventContext": + """Return an EventContext instance suitable for persisting an outlier event""" + return EventContext( + current_state_ids={}, + prev_state_ids={}, + ) + + async def serialize(self, event: EventBase, store: "DataStore") -> JsonDict: """Converts self to a type that can be serialized as JSON, and then deserialized by `deserialize` Args: - event (FrozenEvent): The event that this context relates to + event: The event that this context relates to Returns: - dict + The serialized event. """ # We don't serialize the full state dicts, instead they get pulled out @@ -164,17 +173,16 @@ async def serialize(self, event: EventBase, store: "DataStore") -> dict: } @staticmethod - def deserialize(storage, input): + def deserialize(storage: "Storage", input: JsonDict) -> "EventContext": """Converts a dict that was produced by `serialize` back into a EventContext. Args: - storage (Storage): Used to convert AS ID to AS object and fetch - state. - input (dict): A dict produced by `serialize` + storage: Used to convert AS ID to AS object and fetch state. + input: A dict produced by `serialize` Returns: - EventContext + The event context. """ context = _AsyncEventContextImpl( # We use the state_group and prev_state_id stuff to pull the @@ -235,22 +243,25 @@ async def get_current_state_ids(self) -> Optional[StateMap[str]]: await self._ensure_fetched() return self._current_state_ids - async def get_prev_state_ids(self): + async def get_prev_state_ids(self) -> StateMap[str]: """ Gets the room state map, excluding this event. For a non-state event, this will be the same as get_current_state_ids(). Returns: - dict[(str, str), str]|None: Returns None if state_group - is None, which happens when the associated event is an outlier. - Maps a (type, state_key) to the event ID of the state event matching - this tuple. + Returns {} if state_group is None, which happens when the associated + event is an outlier. + + Maps a (type, state_key) to the event ID of the state event matching + this tuple. """ await self._ensure_fetched() + # There *should* be previous state IDs now. + assert self._prev_state_ids is not None return self._prev_state_ids - def get_cached_current_state_ids(self): + def get_cached_current_state_ids(self) -> Optional[StateMap[str]]: """Gets the current state IDs if we have them already cached. It is an error to access this for a rejected event, since rejected state should @@ -258,16 +269,17 @@ def get_cached_current_state_ids(self): ``rejected`` is set. Returns: - dict[(str, str), str]|None: Returns None if we haven't cached the - state or if state_group is None, which happens when the associated - event is an outlier. + Returns None if we haven't cached the state or if state_group is None + (which happens when the associated event is an outlier). + + Otherwise, returns the the current state IDs. """ if self.rejected: raise RuntimeError("Attempt to access state_ids of rejected event") return self._current_state_ids - async def _ensure_fetched(self): + async def _ensure_fetched(self) -> None: return None @@ -279,46 +291,51 @@ class _AsyncEventContextImpl(EventContext): Attributes: - _storage (Storage) + _storage - _fetching_state_deferred (Deferred|None): Resolves when *_state_ids have - been calculated. None if we haven't started calculating yet + _fetching_state_deferred: Resolves when *_state_ids have been calculated. + None if we haven't started calculating yet - _event_type (str): The type of the event the context is associated with. + _event_type: The type of the event the context is associated with. - _event_state_key (str): The state_key of the event the context is - associated with. + _event_state_key: The state_key of the event the context is associated with. - _prev_state_id (str|None): If the event associated with the context is - a state event, then `_prev_state_id` is the event_id of the state - that was replaced. + _prev_state_id: If the event associated with the context is a state event, + then `_prev_state_id` is the event_id of the state that was replaced. """ # This needs to have a default as we're inheriting - _storage = attr.ib(default=None) - _prev_state_id = attr.ib(default=None) - _event_type = attr.ib(default=None) - _event_state_key = attr.ib(default=None) - _fetching_state_deferred = attr.ib(default=None) + _storage: "Storage" = attr.ib(default=None) + _prev_state_id: Optional[str] = attr.ib(default=None) + _event_type: str = attr.ib(default=None) + _event_state_key: Optional[str] = attr.ib(default=None) + _fetching_state_deferred: Optional["Deferred[None]"] = attr.ib(default=None) - async def _ensure_fetched(self): + async def _ensure_fetched(self) -> None: if not self._fetching_state_deferred: self._fetching_state_deferred = run_in_background(self._fill_out_state) - return await make_deferred_yieldable(self._fetching_state_deferred) + await make_deferred_yieldable(self._fetching_state_deferred) - async def _fill_out_state(self): + async def _fill_out_state(self) -> None: """Called to populate the _current_state_ids and _prev_state_ids attributes by loading from the database. """ if self.state_group is None: + # No state group means the event is an outlier. Usually the state_ids dicts are also + # pre-set to empty dicts, but they get reset when the context is serialized, so set + # them to empty dicts again here. + self._current_state_ids = {} + self._prev_state_ids = {} return - self._current_state_ids = await self._storage.state.get_state_ids_for_group( + current_state_ids = await self._storage.state.get_state_ids_for_group( self.state_group ) + # Set this separately so mypy knows current_state_ids is not None. + self._current_state_ids = current_state_ids if self._event_state_key is not None: - self._prev_state_ids = dict(self._current_state_ids) + self._prev_state_ids = dict(current_state_ids) key = (self._event_type, self._event_state_key) if self._prev_state_id: @@ -326,10 +343,12 @@ async def _fill_out_state(self): else: self._prev_state_ids.pop(key, None) else: - self._prev_state_ids = self._current_state_ids + self._prev_state_ids = current_state_ids -def _encode_state_dict(state_dict): +def _encode_state_dict( + state_dict: Optional[StateMap[str]], +) -> Optional[List[Tuple[str, str, str]]]: """Since dicts of (type, state_key) -> event_id cannot be serialized in JSON we need to convert them to a form that can. """ @@ -339,7 +358,9 @@ def _encode_state_dict(state_dict): return [(etype, state_key, v) for (etype, state_key), v in state_dict.items()] -def _decode_state_dict(input): +def _decode_state_dict( + input: Optional[List[Tuple[str, str, str]]] +) -> Optional[StateMap[str]]: """Decodes a state dict encoded using `_encode_state_dict` above""" if input is None: return None diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 57f1d53fa863..3134beb8d3c6 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -44,8 +44,13 @@ ["synapse.events.EventBase"], Awaitable[Union[bool, str]], ] +USER_MAY_JOIN_ROOM_CALLBACK = Callable[[str, str, bool], Awaitable[bool]] USER_MAY_INVITE_CALLBACK = Callable[[str, str, str], Awaitable[bool]] +USER_MAY_SEND_3PID_INVITE_CALLBACK = Callable[[str, str, str, str], Awaitable[bool]] USER_MAY_CREATE_ROOM_CALLBACK = Callable[[str], Awaitable[bool]] +USER_MAY_CREATE_ROOM_WITH_INVITES_CALLBACK = Callable[ + [str, List[str], List[Dict[str, str]]], Awaitable[bool] +] USER_MAY_CREATE_ROOM_ALIAS_CALLBACK = Callable[[str, RoomAlias], Awaitable[bool]] USER_MAY_PUBLISH_ROOM_CALLBACK = Callable[[str, str], Awaitable[bool]] CHECK_USERNAME_FOR_SPAM_CALLBACK = Callable[[Dict[str, str]], Awaitable[bool]] @@ -72,13 +77,13 @@ ] -def load_legacy_spam_checkers(hs: "synapse.server.HomeServer"): +def load_legacy_spam_checkers(hs: "synapse.server.HomeServer") -> None: """Wrapper that loads spam checkers configured using the old configuration, and registers the spam checker hooks they implement. """ spam_checkers: List[Any] = [] api = hs.get_module_api() - for module, config in hs.config.spam_checkers: + for module, config in hs.config.spamchecker.spam_checkers: # Older spam checkers don't accept the `api` argument, so we # try and detect support. spam_args = inspect.getfullargspec(module) @@ -124,9 +129,9 @@ def wrapper( request_info: Collection[Tuple[str, str]], auth_provider_id: Optional[str], ) -> Union[Awaitable[RegistrationBehaviour], RegistrationBehaviour]: - # We've already made sure f is not None above, but mypy doesn't - # do well across function boundaries so we need to tell it f is - # definitely not None. + # Assertion required because mypy can't prove we won't + # change `f` back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None return f( @@ -141,9 +146,10 @@ def wrapper( "Bad signature for callback check_registration_for_spam", ) - def run(*args, **kwargs): - # mypy doesn't do well across function boundaries so we need to tell it - # wrapped_func is definitely not None. + def run(*args: Any, **kwargs: Any) -> Awaitable: + # Assertion required because mypy can't prove we won't change `f` + # back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert wrapped_func is not None return maybe_awaitable(wrapped_func(*args, **kwargs)) @@ -160,10 +166,17 @@ def run(*args, **kwargs): class SpamChecker: - def __init__(self): + def __init__(self) -> None: self._check_event_for_spam_callbacks: List[CHECK_EVENT_FOR_SPAM_CALLBACK] = [] + self._user_may_join_room_callbacks: List[USER_MAY_JOIN_ROOM_CALLBACK] = [] self._user_may_invite_callbacks: List[USER_MAY_INVITE_CALLBACK] = [] + self._user_may_send_3pid_invite_callbacks: List[ + USER_MAY_SEND_3PID_INVITE_CALLBACK + ] = [] self._user_may_create_room_callbacks: List[USER_MAY_CREATE_ROOM_CALLBACK] = [] + self._user_may_create_room_with_invites_callbacks: List[ + USER_MAY_CREATE_ROOM_WITH_INVITES_CALLBACK + ] = [] self._user_may_create_room_alias_callbacks: List[ USER_MAY_CREATE_ROOM_ALIAS_CALLBACK ] = [] @@ -181,8 +194,13 @@ def __init__(self): def register_callbacks( self, check_event_for_spam: Optional[CHECK_EVENT_FOR_SPAM_CALLBACK] = None, + user_may_join_room: Optional[USER_MAY_JOIN_ROOM_CALLBACK] = None, user_may_invite: Optional[USER_MAY_INVITE_CALLBACK] = None, + user_may_send_3pid_invite: Optional[USER_MAY_SEND_3PID_INVITE_CALLBACK] = None, user_may_create_room: Optional[USER_MAY_CREATE_ROOM_CALLBACK] = None, + user_may_create_room_with_invites: Optional[ + USER_MAY_CREATE_ROOM_WITH_INVITES_CALLBACK + ] = None, user_may_create_room_alias: Optional[ USER_MAY_CREATE_ROOM_ALIAS_CALLBACK ] = None, @@ -192,17 +210,30 @@ def register_callbacks( CHECK_REGISTRATION_FOR_SPAM_CALLBACK ] = None, check_media_file_for_spam: Optional[CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK] = None, - ): + ) -> None: """Register callbacks from module for each hook.""" if check_event_for_spam is not None: self._check_event_for_spam_callbacks.append(check_event_for_spam) + if user_may_join_room is not None: + self._user_may_join_room_callbacks.append(user_may_join_room) + if user_may_invite is not None: self._user_may_invite_callbacks.append(user_may_invite) + if user_may_send_3pid_invite is not None: + self._user_may_send_3pid_invite_callbacks.append( + user_may_send_3pid_invite, + ) + if user_may_create_room is not None: self._user_may_create_room_callbacks.append(user_may_create_room) + if user_may_create_room_with_invites is not None: + self._user_may_create_room_with_invites_callbacks.append( + user_may_create_room_with_invites, + ) + if user_may_create_room_alias is not None: self._user_may_create_room_alias_callbacks.append( user_may_create_room_alias, @@ -245,6 +276,26 @@ async def check_event_for_spam( return False + async def user_may_join_room( + self, user_id: str, room_id: str, is_invited: bool + ) -> bool: + """Checks if a given users is allowed to join a room. + Not called when a user creates a room. + + Args: + userid: The ID of the user wanting to join the room + room_id: The ID of the room the user wants to join + is_invited: Whether the user is invited into the room + + Returns: + Whether the user may join the room + """ + for callback in self._user_may_join_room_callbacks: + if await callback(user_id, room_id, is_invited) is False: + return False + + return True + async def user_may_invite( self, inviter_userid: str, invitee_userid: str, room_id: str ) -> bool: @@ -266,6 +317,31 @@ async def user_may_invite( return True + async def user_may_send_3pid_invite( + self, inviter_userid: str, medium: str, address: str, room_id: str + ) -> bool: + """Checks if a given user may invite a given threepid into the room + + If this method returns false, the threepid invite will be rejected. + + Note that if the threepid is already associated with a Matrix user ID, Synapse + will call user_may_invite with said user ID instead. + + Args: + inviter_userid: The user ID of the sender of the invitation + medium: The 3PID's medium (e.g. "email") + address: The 3PID's address (e.g. "alice@example.com") + room_id: The room ID + + Returns: + True if the user may send the invite, otherwise False + """ + for callback in self._user_may_send_3pid_invite_callbacks: + if await callback(inviter_userid, medium, address, room_id) is False: + return False + + return True + async def user_may_create_room(self, userid: str) -> bool: """Checks if a given user may create a room @@ -283,6 +359,34 @@ async def user_may_create_room(self, userid: str) -> bool: return True + async def user_may_create_room_with_invites( + self, + userid: str, + invites: List[str], + threepid_invites: List[Dict[str, str]], + ) -> bool: + """Checks if a given user may create a room with invites + + If this method returns false, the creation request will be rejected. + + Args: + userid: The ID of the user attempting to create a room + invites: The IDs of the Matrix users to be invited if the room creation is + allowed. + threepid_invites: The threepids to be invited if the room creation is allowed, + as a dict including a "medium" key indicating the threepid's medium (e.g. + "email") and an "address" key indicating the threepid's address (e.g. + "alice@example.com") + + Returns: + True if the user may create the room, otherwise False + """ + for callback in self._user_may_create_room_with_invites_callbacks: + if await callback(userid, invites, threepid_invites) is False: + return False + + return True + async def user_may_create_room_alias( self, userid: str, room_alias: RoomAlias ) -> bool: diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py index 7a6eb3e51667..1bb8ca7145fd 100644 --- a/synapse/events/third_party_rules.py +++ b/synapse/events/third_party_rules.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Awaitable, Callable, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Awaitable, Callable, List, Optional, Tuple -from synapse.api.errors import SynapseError +from synapse.api.errors import ModuleFailedException, SynapseError from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.types import Requester, StateMap @@ -36,16 +36,17 @@ CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK = Callable[ [str, StateMap[EventBase], str], Awaitable[bool] ] +ON_NEW_EVENT_CALLBACK = Callable[[EventBase, StateMap[EventBase]], Awaitable] -def load_legacy_third_party_event_rules(hs: "HomeServer"): +def load_legacy_third_party_event_rules(hs: "HomeServer") -> None: """Wrapper that loads a third party event rules module configured using the old configuration, and registers the hooks they implement. """ - if hs.config.third_party_event_rules is None: + if hs.config.thirdpartyrules.third_party_event_rules is None: return - module, config = hs.config.third_party_event_rules + module, config = hs.config.thirdpartyrules.third_party_event_rules api = hs.get_module_api() third_party_rules = module(config=config, module_api=api) @@ -77,9 +78,9 @@ async def wrap_check_event_allowed( event: EventBase, state_events: StateMap[EventBase], ) -> Tuple[bool, Optional[dict]]: - # We've already made sure f is not None above, but mypy doesn't do well - # across function boundaries so we need to tell it f is definitely not - # None. + # Assertion required because mypy can't prove we won't change + # `f` back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None res = await f(event, state_events) @@ -98,9 +99,9 @@ async def wrap_check_event_allowed( async def wrap_on_create_room( requester: Requester, config: dict, is_requester_admin: bool ) -> None: - # We've already made sure f is not None above, but mypy doesn't do well - # across function boundaries so we need to tell it f is definitely not - # None. + # Assertion required because mypy can't prove we won't change + # `f` back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None res = await f(requester, config, is_requester_admin) @@ -112,9 +113,10 @@ async def wrap_on_create_room( return wrap_on_create_room - def run(*args, **kwargs): - # mypy doesn't do well across function boundaries so we need to tell it - # f is definitely not None. + def run(*args: Any, **kwargs: Any) -> Awaitable: + # Assertion required because mypy can't prove we won't change `f` + # back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None return maybe_awaitable(f(*args, **kwargs)) @@ -151,6 +153,7 @@ def __init__(self, hs: "HomeServer"): self._check_visibility_can_be_modified_callbacks: List[ CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK ] = [] + self._on_new_event_callbacks: List[ON_NEW_EVENT_CALLBACK] = [] def register_third_party_rules_callbacks( self, @@ -162,7 +165,8 @@ def register_third_party_rules_callbacks( check_visibility_can_be_modified: Optional[ CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK ] = None, - ): + on_new_event: Optional[ON_NEW_EVENT_CALLBACK] = None, + ) -> None: """Register callbacks from modules for each hook.""" if check_event_allowed is not None: self._check_event_allowed_callbacks.append(check_event_allowed) @@ -180,6 +184,9 @@ def register_third_party_rules_callbacks( check_visibility_can_be_modified, ) + if on_new_event is not None: + self._on_new_event_callbacks.append(on_new_event) + async def check_event_allowed( self, event: EventBase, context: EventContext ) -> Tuple[bool, Optional[dict]]: @@ -217,9 +224,19 @@ async def check_event_allowed( for callback in self._check_event_allowed_callbacks: try: res, replacement_data = await callback(event, state_events) - except Exception as e: - logger.warning("Failed to run module API callback %s: %s", callback, e) - continue + except SynapseError as e: + # FIXME: Being able to throw SynapseErrors is relied upon by + # some modules. PR #10386 accidentally broke this ability. + # That said, we aren't keen on exposing this implementation detail + # to modules and we should one day have a proper way to do what + # is wanted. + # This module callback needs a rework so that hacks such as + # this one are not necessary. + raise e + except Exception: + raise ModuleFailedException( + "Failed to run `check_event_allowed` module API callback" + ) # Return if the event shouldn't be allowed or if the module came up with a # replacement dict for the event. @@ -311,6 +328,31 @@ async def check_visibility_can_be_modified( return True + async def on_new_event(self, event_id: str) -> None: + """Let modules act on events after they've been sent (e.g. auto-accepting + invites, etc.) + + Args: + event_id: The ID of the event. + + Raises: + ModuleFailureError if a callback raised any exception. + """ + # Bail out early without hitting the store if we don't have any callbacks + if len(self._on_new_event_callbacks) == 0: + return + + event = await self.store.get_event(event_id) + state_events = await self._get_state_map_for_room(event.room_id) + + for callback in self._on_new_event_callbacks: + try: + await callback(event, state_events) + except Exception as e: + logger.exception( + "Failed to run module API callback %s: %s", callback, e + ) + async def _get_state_map_for_room(self, room_id: str) -> StateMap[EventBase]: """Given a room ID, return the state events of that room. diff --git a/synapse/events/utils.py b/synapse/events/utils.py index a0c07f62f44b..84ef69df679b 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -1,4 +1,5 @@ # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,18 +14,32 @@ # limitations under the License. import collections.abc import re -from typing import Any, Mapping, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + List, + Mapping, + Optional, + Union, +) from frozendict import frozendict -from synapse.api.constants import EventTypes, RelationTypes +from synapse.api.constants import EventContentFields, EventTypes, RelationTypes from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersion +from synapse.types import JsonDict from synapse.util.async_helpers import yieldable_gather_results from synapse.util.frozenutils import unfreeze from . import EventBase +if TYPE_CHECKING: + from synapse.server import HomeServer + # Split strings on "." but not "\." This uses a negative lookbehind assertion for '\' # (? EventBase: """Returns a pruned version of the given event, which removes all keys we @@ -62,7 +80,7 @@ def prune_event(event: EventBase) -> EventBase: return pruned_event -def prune_event_dict(room_version: RoomVersion, event_dict: dict) -> dict: +def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDict: """Redacts the event_dict in the same way as `prune_event`, except it operates on dicts rather than event objects @@ -94,13 +112,15 @@ def prune_event_dict(room_version: RoomVersion, event_dict: dict) -> dict: new_content = {} - def add_fields(*fields): + def add_fields(*fields: str) -> None: for field in fields: if field in event_dict["content"]: new_content[field] = event_dict["content"][field] if event_type == EventTypes.Member: add_fields("membership") + if room_version.msc3375_redaction_rules: + add_fields(EventContentFields.AUTHORISING_USER) elif event_type == EventTypes.Create: # MSC2176 rules state that create events cannot be redacted. if room_version.msc2176_redaction_rules: @@ -135,12 +155,18 @@ def add_fields(*fields): add_fields("history_visibility") elif event_type == EventTypes.Redaction and room_version.msc2176_redaction_rules: add_fields("redacts") + elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_INSERTION: + add_fields(EventContentFields.MSC2716_NEXT_BATCH_ID) + elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_BATCH: + add_fields(EventContentFields.MSC2716_BATCH_ID) + elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_MARKER: + add_fields(EventContentFields.MSC2716_MARKER_INSERTION) allowed_fields = {k: v for k, v in event_dict.items() if k in allowed_keys} allowed_fields["content"] = new_content - unsigned = {} + unsigned: JsonDict = {} allowed_fields["unsigned"] = unsigned event_unsigned = event_dict.get("unsigned", {}) @@ -153,16 +179,16 @@ def add_fields(*fields): return allowed_fields -def _copy_field(src, dst, field): +def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None: """Copy the field in 'src' to 'dst'. For example, if src={"foo":{"bar":5}} and dst={}, and field=["foo","bar"] then dst={"foo":{"bar":5}}. Args: - src(dict): The dict to read from. - dst(dict): The dict to modify. - field(list): List of keys to drill down to in 'src'. + src: The dict to read from. + dst: The dict to modify. + field: List of keys to drill down to in 'src'. """ if len(field) == 0: # this should be impossible return @@ -194,7 +220,7 @@ def _copy_field(src, dst, field): sub_out_dict[key_to_move] = sub_dict[key_to_move] -def only_fields(dictionary, fields): +def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict: """Return a new dict with only the fields in 'dictionary' which are present in 'fields'. @@ -204,11 +230,11 @@ def only_fields(dictionary, fields): A literal '.' character in a field name may be escaped using a '\'. Args: - dictionary(dict): The dictionary to read from. - fields(list): A list of fields to copy over. Only shallow refs are + dictionary: The dictionary to read from. + fields: A list of fields to copy over. Only shallow refs are taken. Returns: - dict: A new dictionary with only the given fields. If fields was empty, + A new dictionary with only the given fields. If fields was empty, the same dictionary is returned. """ if len(fields) == 0: @@ -224,17 +250,17 @@ def only_fields(dictionary, fields): [f.replace(r"\.", r".") for f in field_array] for field_array in split_fields ] - output = {} + output: JsonDict = {} for field_array in split_fields: _copy_field(dictionary, output, field_array) return output -def format_event_raw(d): +def format_event_raw(d: JsonDict) -> JsonDict: return d -def format_event_for_client_v1(d): +def format_event_for_client_v1(d: JsonDict) -> JsonDict: d = format_event_for_client_v2(d) sender = d.get("sender") @@ -256,7 +282,7 @@ def format_event_for_client_v1(d): return d -def format_event_for_client_v2(d): +def format_event_for_client_v2(d: JsonDict) -> JsonDict: drop_keys = ( "auth_events", "prev_events", @@ -271,37 +297,38 @@ def format_event_for_client_v2(d): return d -def format_event_for_client_v2_without_room_id(d): +def format_event_for_client_v2_without_room_id(d: JsonDict) -> JsonDict: d = format_event_for_client_v2(d) d.pop("room_id", None) return d def serialize_event( - e, - time_now_ms, - as_client_event=True, - event_format=format_event_for_client_v1, - token_id=None, - only_event_fields=None, - include_stripped_room_state=False, -): + e: Union[JsonDict, EventBase], + time_now_ms: int, + *, + as_client_event: bool = True, + event_format: Callable[[JsonDict], JsonDict] = format_event_for_client_v1, + token_id: Optional[str] = None, + only_event_fields: Optional[List[str]] = None, + include_stripped_room_state: bool = False, +) -> JsonDict: """Serialize event for clients Args: - e (EventBase) - time_now_ms (int) - as_client_event (bool) + e + time_now_ms + as_client_event event_format token_id only_event_fields - include_stripped_room_state (bool): Some events can have stripped room state + include_stripped_room_state: Some events can have stripped room state stored in the `unsigned` field. This is required for invite and knock functionality. If this option is False, that state will be removed from the event before it is returned. Otherwise, it will be kept. Returns: - dict + The serialized event dictionary. """ # FIXME(erikj): To handle the case of presence events and the like @@ -358,104 +385,161 @@ class EventClientSerializer: clients. """ - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() - self.experimental_msc1849_support_enabled = ( - hs.config.experimental_msc1849_support_enabled - ) + self._msc1849_enabled = hs.config.experimental.msc1849_enabled + self._msc3440_enabled = hs.config.experimental.msc3440_enabled async def serialize_event( - self, event, time_now, bundle_aggregations=True, **kwargs - ): + self, + event: Union[JsonDict, EventBase], + time_now: int, + *, + bundle_aggregations: bool = True, + **kwargs: Any, + ) -> JsonDict: """Serializes a single event. Args: - event (EventBase) - time_now (int): The current time in milliseconds - bundle_aggregations (bool): Whether to bundle in related events + event: The event being serialized. + time_now: The current time in milliseconds + bundle_aggregations: Whether to include the bundled aggregations for this + event. Only applies to non-state events. (State events never include + bundled aggregations.) **kwargs: Arguments to pass to `serialize_event` Returns: - dict: The serialized event + The serialized event """ # To handle the case of presence events and the like if not isinstance(event, EventBase): return event - event_id = event.event_id serialized_event = serialize_event(event, time_now, **kwargs) - # If MSC1849 is enabled then we need to look if there are any relations - # we need to bundle in with the event. - # Do not bundle relations if the event has been redacted - if not event.internal_metadata.is_redacted() and ( - self.experimental_msc1849_support_enabled and bundle_aggregations + # Check if there are any bundled aggregations to include with the event. + # + # Do not bundle aggregations if any of the following at true: + # + # * Support is disabled via the configuration or the caller. + # * The event is a state event. + # * The event has been redacted. + if ( + self._msc1849_enabled + and bundle_aggregations + and not event.is_state() + and not event.internal_metadata.is_redacted() ): - annotations = await self.store.get_aggregation_groups_for_event(event_id) - references = await self.store.get_relations_for_event( - event_id, RelationTypes.REFERENCE, direction="f" - ) + await self._injected_bundled_aggregations(event, time_now, serialized_event) + + return serialized_event + + async def _injected_bundled_aggregations( + self, event: EventBase, time_now: int, serialized_event: JsonDict + ) -> None: + """Potentially injects bundled aggregations into the unsigned portion of the serialized event. + + Args: + event: The event being serialized. + time_now: The current time in milliseconds + serialized_event: The serialized event which may be modified. - if annotations.chunk: - r = serialized_event["unsigned"].setdefault("m.relations", {}) - r[RelationTypes.ANNOTATION] = annotations.to_dict() - - if references.chunk: - r = serialized_event["unsigned"].setdefault("m.relations", {}) - r[RelationTypes.REFERENCE] = references.to_dict() - - edit = None - if event.type == EventTypes.Message: - edit = await self.store.get_applicable_edit(event_id) - - if edit: - # If there is an edit replace the content, preserving existing - # relations. - - # Ensure we take copies of the edit content, otherwise we risk modifying - # the original event. - edit_content = edit.content.copy() - - # Unfreeze the event content if necessary, so that we may modify it below - edit_content = unfreeze(edit_content) - serialized_event["content"] = edit_content.get("m.new_content", {}) - - # Check for existing relations - relations = event.content.get("m.relates_to") - if relations: - # Keep the relations, ensuring we use a dict copy of the original - serialized_event["content"]["m.relates_to"] = relations.copy() - else: - serialized_event["content"].pop("m.relates_to", None) - - r = serialized_event["unsigned"].setdefault("m.relations", {}) - r[RelationTypes.REPLACE] = { - "event_id": edit.event_id, - "origin_server_ts": edit.origin_server_ts, - "sender": edit.sender, + """ + # Do not bundle aggregations for an event which represents an edit or an + # annotation. It does not make sense for them to have related events. + relates_to = event.content.get("m.relates_to") + if isinstance(relates_to, (dict, frozendict)): + relation_type = relates_to.get("rel_type") + if relation_type in (RelationTypes.ANNOTATION, RelationTypes.REPLACE): + return + + event_id = event.event_id + + # The bundled aggregations to include. + aggregations = {} + + annotations = await self.store.get_aggregation_groups_for_event(event_id) + if annotations.chunk: + aggregations[RelationTypes.ANNOTATION] = annotations.to_dict() + + references = await self.store.get_relations_for_event( + event_id, RelationTypes.REFERENCE, direction="f" + ) + if references.chunk: + aggregations[RelationTypes.REFERENCE] = references.to_dict() + + edit = None + if event.type == EventTypes.Message: + edit = await self.store.get_applicable_edit(event_id) + + if edit: + # If there is an edit replace the content, preserving existing + # relations. + + # Ensure we take copies of the edit content, otherwise we risk modifying + # the original event. + edit_content = edit.content.copy() + + # Unfreeze the event content if necessary, so that we may modify it below + edit_content = unfreeze(edit_content) + serialized_event["content"] = edit_content.get("m.new_content", {}) + + # Check for existing relations + relates_to = event.content.get("m.relates_to") + if relates_to: + # Keep the relations, ensuring we use a dict copy of the original + serialized_event["content"]["m.relates_to"] = relates_to.copy() + else: + serialized_event["content"].pop("m.relates_to", None) + + aggregations[RelationTypes.REPLACE] = { + "event_id": edit.event_id, + "origin_server_ts": edit.origin_server_ts, + "sender": edit.sender, + } + + # If this event is the start of a thread, include a summary of the replies. + if self._msc3440_enabled: + ( + thread_count, + latest_thread_event, + ) = await self.store.get_thread_summary(event_id) + if latest_thread_event: + aggregations[RelationTypes.THREAD] = { + # Don't bundle aggregations as this could recurse forever. + "latest_event": await self.serialize_event( + latest_thread_event, time_now, bundle_aggregations=False + ), + "count": thread_count, } - return serialized_event + # If any bundled aggregations were found, include them. + if aggregations: + serialized_event["unsigned"].setdefault("m.relations", {}).update( + aggregations + ) - def serialize_events(self, events, time_now, **kwargs): + async def serialize_events( + self, events: Iterable[Union[JsonDict, EventBase]], time_now: int, **kwargs: Any + ) -> List[JsonDict]: """Serializes multiple events. Args: - event (iter[EventBase]) - time_now (int): The current time in milliseconds + event + time_now: The current time in milliseconds **kwargs: Arguments to pass to `serialize_event` Returns: - Deferred[list[dict]]: The list of serialized events + The list of serialized events """ - return yieldable_gather_results( + return await yieldable_gather_results( self.serialize_event, events, time_now=time_now, **kwargs ) def copy_power_levels_contents( old_power_levels: Mapping[str, Union[int, Mapping[str, int]]] -): +) -> Dict[str, Union[int, Dict[str, int]]]: """Copy the content of a power_levels event, unfreezing frozendicts along the way Raises: @@ -464,7 +548,7 @@ def copy_power_levels_contents( if not isinstance(old_power_levels, collections.abc.Mapping): raise TypeError("Not a valid power-levels content: %r" % (old_power_levels,)) - power_levels = {} + power_levels: Dict[str, Union[int, Dict[str, int]]] = {} for k, v in old_power_levels.items(): if isinstance(v, int): @@ -472,7 +556,8 @@ def copy_power_levels_contents( continue if isinstance(v, collections.abc.Mapping): - power_levels[k] = h = {} + h: Dict[str, int] = {} + power_levels[k] = h for k1, v1 in v.items(): # we should only have one level of nesting if not isinstance(v1, int): @@ -487,7 +572,7 @@ def copy_power_levels_contents( return power_levels -def validate_canonicaljson(value: Any): +def validate_canonicaljson(value: Any) -> None: """ Ensure that the JSON object is valid according to the rules of canonical JSON. @@ -499,7 +584,7 @@ def validate_canonicaljson(value: Any): * NaN, Infinity, -Infinity """ if isinstance(value, int): - if value <= -(2 ** 53) or 2 ** 53 <= value: + if value < CANONICALJSON_MIN_INT or CANONICALJSON_MAX_INT < value: raise SynapseError(400, "JSON integer out of range", Codes.BAD_JSON) elif isinstance(value, float): diff --git a/synapse/events/validator.py b/synapse/events/validator.py index fa6987d7cbac..cf8693496846 100644 --- a/synapse/events/validator.py +++ b/synapse/events/validator.py @@ -11,8 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import collections.abc +from typing import Iterable, Union -from typing import Union +import jsonschema from synapse.api.constants import MAX_ALIAS_LENGTH, EventTypes, Membership from synapse.api.errors import Codes, SynapseError @@ -20,13 +22,17 @@ from synapse.config.homeserver import HomeServerConfig from synapse.events import EventBase from synapse.events.builder import EventBuilder -from synapse.events.utils import validate_canonicaljson +from synapse.events.utils import ( + CANONICALJSON_MAX_INT, + CANONICALJSON_MIN_INT, + validate_canonicaljson, +) from synapse.federation.federation_server import server_matches_acl_event -from synapse.types import EventID, RoomID, UserID +from synapse.types import EventID, JsonDict, RoomID, UserID class EventValidator: - def validate_new(self, event: EventBase, config: HomeServerConfig): + def validate_new(self, event: EventBase, config: HomeServerConfig) -> None: """Validates the event has roughly the right format Args: @@ -49,7 +55,7 @@ def validate_new(self, event: EventBase, config: HomeServerConfig): ] for k in required: - if not hasattr(event, k): + if k not in event: raise SynapseError(400, "Event does not have key %s" % (k,)) # Check that the following keys have string values @@ -82,12 +88,35 @@ def validate_new(self, event: EventBase, config: HomeServerConfig): self._validate_retention(event) if event.type == EventTypes.ServerACL: - if not server_matches_acl_event(config.server_name, event): + if not server_matches_acl_event(config.server.server_name, event): raise SynapseError( 400, "Can't create an ACL event that denies the local server" ) - def _validate_retention(self, event: EventBase): + if event.type == EventTypes.PowerLevels: + try: + jsonschema.validate( + instance=event.content, + schema=POWER_LEVELS_SCHEMA, + cls=plValidator, + ) + except jsonschema.ValidationError as e: + if e.path: + # example: "users_default": '0' is not of type 'integer' + message = '"' + e.path[-1] + '": ' + e.message # noqa: B306 + # jsonschema.ValidationError.message is a valid attribute + else: + # example: '0' is not of type 'integer' + message = e.message # noqa: B306 + # jsonschema.ValidationError.message is a valid attribute + + raise SynapseError( + code=400, + msg=message, + errcode=Codes.BAD_JSON, + ) + + def _validate_retention(self, event: EventBase) -> None: """Checks that an event that defines the retention policy for a room respects the format enforced by the spec. @@ -127,7 +156,7 @@ def _validate_retention(self, event: EventBase): errcode=Codes.BAD_JSON, ) - def validate_builder(self, event: Union[EventBase, EventBuilder]): + def validate_builder(self, event: Union[EventBase, EventBuilder]) -> None: """Validates that the builder/event has roughly the right format. Only checks values that we expect a proto event to have, rather than all the fields an event would have @@ -175,13 +204,59 @@ def validate_builder(self, event: Union[EventBase, EventBuilder]): self._ensure_state_event(event) - def _ensure_strings(self, d, keys): + def _ensure_strings(self, d: JsonDict, keys: Iterable[str]) -> None: for s in keys: if s not in d: raise SynapseError(400, "'%s' not in content" % (s,)) if not isinstance(d[s], str): raise SynapseError(400, "'%s' not a string type" % (s,)) - def _ensure_state_event(self, event): + def _ensure_state_event(self, event: Union[EventBase, EventBuilder]) -> None: if not event.is_state(): raise SynapseError(400, "'%s' must be state events" % (event.type,)) + + +POWER_LEVELS_SCHEMA = { + "type": "object", + "properties": { + "ban": {"$ref": "#/definitions/int"}, + "events": {"$ref": "#/definitions/objectOfInts"}, + "events_default": {"$ref": "#/definitions/int"}, + "invite": {"$ref": "#/definitions/int"}, + "kick": {"$ref": "#/definitions/int"}, + "notifications": {"$ref": "#/definitions/objectOfInts"}, + "redact": {"$ref": "#/definitions/int"}, + "state_default": {"$ref": "#/definitions/int"}, + "users": {"$ref": "#/definitions/objectOfInts"}, + "users_default": {"$ref": "#/definitions/int"}, + }, + "definitions": { + "int": { + "type": "integer", + "minimum": CANONICALJSON_MIN_INT, + "maximum": CANONICALJSON_MAX_INT, + }, + "objectOfInts": { + "type": "object", + "additionalProperties": {"$ref": "#/definitions/int"}, + }, + }, +} + + +# This could return something newer than Draft 7, but that's the current "latest" +# validator. +def _create_power_level_validator() -> jsonschema.Draft7Validator: + validator = jsonschema.validators.validator_for(POWER_LEVELS_SCHEMA) + + # by default jsonschema does not consider a frozendict to be an object so + # we need to use a custom type checker + # https://python-jsonschema.readthedocs.io/en/stable/validate/?highlight=object#validating-with-additional-types + type_checker = validator.TYPE_CHECKER.redefine( + "object", lambda checker, thing: isinstance(thing, collections.abc.Mapping) + ) + + return jsonschema.validators.extend(validator, type_checker=type_checker) + + +plValidator = _create_power_level_validator() diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 024e440ff401..f56344a3b94f 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -14,8 +14,9 @@ # limitations under the License. import logging from collections import namedtuple +from typing import TYPE_CHECKING -from synapse.api.constants import MAX_DEPTH, EventTypes, Membership +from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import EventFormatVersions, RoomVersion from synapse.crypto.event_signing import check_event_content_hash @@ -25,11 +26,15 @@ from synapse.http.servlet import assert_params_in_dict from synapse.types import JsonDict, get_domain_from_id +if TYPE_CHECKING: + from synapse.server import HomeServer + + logger = logging.getLogger(__name__) class FederationBase: - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self.hs = hs self.server_name = hs.hostname @@ -184,10 +189,10 @@ async def _check_sigs_on_pdu( room_version.msc3083_join_rules and pdu.type == EventTypes.Member and pdu.membership == Membership.JOIN - and "join_authorised_via_users_server" in pdu.content + and EventContentFields.AUTHORISING_USER in pdu.content ): authorising_server = get_domain_from_id( - pdu.content["join_authorised_via_users_server"] + pdu.content[EventContentFields.AUTHORISING_USER] ) try: await keyring.verify_event_for_server( diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index b7a10da15a89..fee1477ab684 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -37,12 +37,13 @@ import attr from prometheus_client import Counter -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventContentFields, EventTypes, Membership from synapse.api.errors import ( CodeMessageException, Codes, FederationDeniedError, HttpResponseException, + RequestSendFailed, SynapseError, UnsupportedRoomVersionError, ) @@ -110,7 +111,24 @@ def __init__(self, hs: "HomeServer"): reset_expiry_on_get=False, ) - def _clear_tried_cache(self): + # A cache for fetching the room hierarchy over federation. + # + # Some stale data over federation is OK, but must be refreshed + # periodically since the local server is in the room. + # + # It is a map of (room ID, suggested-only) -> the response of + # get_room_hierarchy. + self._get_room_hierarchy_cache: ExpiringCache[ + Tuple[str, bool], Tuple[JsonDict, Sequence[JsonDict], Sequence[str]] + ] = ExpiringCache( + cache_name="get_room_hierarchy_cache", + clock=self._clock, + max_len=1000, + expiry_ms=5 * 60 * 1000, + reset_expiry_on_get=False, + ) + + def _clear_tried_cache(self) -> None: """Clear pdu_destination_tried cache""" now = self._clock.time_msec() @@ -209,7 +227,7 @@ async def claim_client_keys( ) async def backfill( - self, dest: str, room_id: str, limit: int, extremities: Iterable[str] + self, dest: str, room_id: str, limit: int, extremities: Collection[str] ) -> Optional[List[EventBase]]: """Requests some more historic PDUs for the given room from the given destination server. @@ -219,6 +237,8 @@ async def backfill( room_id: The room_id to backfill. limit: The maximum number of events to return. extremities: our current backwards extremities, to backfill from + Must be a Collection that is falsy when empty. + (Iterable is not enough here!) """ logger.debug("backfill extrem=%s", extremities) @@ -232,11 +252,22 @@ async def backfill( logger.debug("backfill transaction_data=%r", transaction_data) + if not isinstance(transaction_data, dict): + # TODO we probably want an exception type specific to federation + # client validation. + raise TypeError("Backfill transaction_data is not a dict.") + + transaction_data_pdus = transaction_data.get("pdus") + if not isinstance(transaction_data_pdus, list): + # TODO we probably want an exception type specific to federation + # client validation. + raise TypeError("transaction_data.pdus is not a list.") + room_version = await self.store.get_room_version(room_id) pdus = [ event_from_pdu_json(p, room_version, outlier=False) - for p in transaction_data["pdus"] + for p in transaction_data_pdus ] # Check signatures and hash of pdus, removing any from the list that fail checks @@ -246,6 +277,58 @@ async def backfill( return pdus + async def get_pdu_from_destination_raw( + self, + destination: str, + event_id: str, + room_version: RoomVersion, + outlier: bool = False, + timeout: Optional[int] = None, + ) -> Optional[EventBase]: + """Requests the PDU with given origin and ID from the remote home + server. Does not have any caching or rate limiting! + + Args: + destination: Which homeserver to query + event_id: event to fetch + room_version: version of the room + outlier: Indicates whether the PDU is an `outlier`, i.e. if + it's from an arbitrary point in the context as opposed to part + of the current block of PDUs. Defaults to `False` + timeout: How long to try (in ms) each destination for before + moving to the next destination. None indicates no timeout. + + Returns: + The requested PDU, or None if we were unable to find it. + + Raises: + SynapseError, NotRetryingDestination, FederationDeniedError + """ + transaction_data = await self.transport_layer.get_event( + destination, event_id, timeout=timeout + ) + + logger.debug( + "retrieved event id %s from %s: %r", + event_id, + destination, + transaction_data, + ) + + pdu_list: List[EventBase] = [ + event_from_pdu_json(p, room_version, outlier=outlier) + for p in transaction_data["pdus"] + ] + + if pdu_list and pdu_list[0]: + pdu = pdu_list[0] + + # Check signatures are correct. + signed_pdu = await self._check_sigs_and_hash(room_version, pdu) + return signed_pdu + + return None + async def get_pdu( self, destinations: Iterable[str], @@ -290,30 +373,14 @@ async def get_pdu( continue try: - transaction_data = await self.transport_layer.get_event( - destination, event_id, timeout=timeout - ) - - logger.debug( - "retrieved event id %s from %s: %r", - event_id, - destination, - transaction_data, + signed_pdu = await self.get_pdu_from_destination_raw( + destination=destination, + event_id=event_id, + room_version=room_version, + outlier=outlier, + timeout=timeout, ) - pdu_list: List[EventBase] = [ - event_from_pdu_json(p, room_version, outlier=outlier) - for p in transaction_data["pdus"] - ] - - if pdu_list and pdu_list[0]: - pdu = pdu_list[0] - - # Check signatures are correct. - signed_pdu = await self._check_sigs_and_hash(room_version, pdu) - - break - pdu_attempts[destination] = now except SynapseError as e: @@ -483,8 +550,6 @@ async def get_event_auth( destination, auth_chain, outlier=True, room_version=room_version ) - signed_auth.sort(key=lambda e: e.depth) - return signed_auth def _is_unknown_endpoint( @@ -558,7 +623,11 @@ async def _try_destination_list( try: return await callback(destination) - except InvalidResponseError as e: + except ( + RequestSendFailed, + InvalidResponseError, + NotRetryingDestination, + ) as e: logger.warning("Failed to %s via %s: %s", description, destination, e) except UnsupportedRoomVersionError: raise @@ -731,7 +800,7 @@ async def send_join( no servers successfully handle the request. """ - async def send_request(destination) -> SendJoinResult: + async def send_request(destination: str) -> SendJoinResult: response = await self._do_send_join(room_version, destination, pdu) # If an event was returned (and expected to be returned): @@ -855,9 +924,9 @@ async def _execute(pdu: EventBase) -> None: # If the join is being authorised via allow rules, we need to send # the /send_join back to the same server that was originally used # with /make_join. - if "join_authorised_via_users_server" in pdu.content: + if EventContentFields.AUTHORISING_USER in pdu.content: destinations = [ - get_domain_from_id(pdu.content["join_authorised_via_users_server"]) + get_domain_from_id(pdu.content[EventContentFields.AUTHORISING_USER]) ] return await self._try_destination_list( @@ -1108,7 +1177,8 @@ async def get_public_rooms( The response from the remote server. Raises: - HttpResponseException: There was an exception returned from the remote server + HttpResponseException / RequestSendFailed: There was an exception + returned from the remote server SynapseException: M_FORBIDDEN when the remote server has disallowed publicRoom requests over federation @@ -1289,8 +1359,243 @@ async def send_request(destination: str) -> FederationSpaceSummaryResult: failover_on_unknown_endpoint=True, ) + async def get_room_hierarchy( + self, + destinations: Iterable[str], + room_id: str, + suggested_only: bool, + ) -> Tuple[JsonDict, Sequence[JsonDict], Sequence[str]]: + """ + Call other servers to get a hierarchy of the given room. + + Performs simple data validates and parsing of the response. + + Args: + destinations: The remote servers. We will try them in turn, omitting any + that have been blacklisted. + room_id: ID of the space to be queried + suggested_only: If true, ask the remote server to only return children + with the "suggested" flag set + + Returns: + A tuple of: + The room as a JSON dictionary. + A list of children rooms, as JSON dictionaries. + A list of inaccessible children room IDs. + + Raises: + SynapseError if we were unable to get a valid summary from any of the + remote servers + """ + + cached_result = self._get_room_hierarchy_cache.get((room_id, suggested_only)) + if cached_result: + return cached_result + + async def send_request( + destination: str, + ) -> Tuple[JsonDict, Sequence[JsonDict], Sequence[str]]: + try: + res = await self.transport_layer.get_room_hierarchy( + destination=destination, + room_id=room_id, + suggested_only=suggested_only, + ) + except HttpResponseException as e: + # If an error is received that is due to an unrecognised endpoint, + # fallback to the unstable endpoint. Otherwise consider it a + # legitmate error and raise. + if not self._is_unknown_endpoint(e): + raise + + logger.debug( + "Couldn't fetch room hierarchy with the v1 API, falling back to the unstable API" + ) + + res = await self.transport_layer.get_room_hierarchy_unstable( + destination=destination, + room_id=room_id, + suggested_only=suggested_only, + ) + + room = res.get("room") + if not isinstance(room, dict): + raise InvalidResponseError("'room' must be a dict") + + # Validate children_state of the room. + children_state = room.get("children_state", []) + if not isinstance(children_state, Sequence): + raise InvalidResponseError("'room.children_state' must be a list") + if any(not isinstance(e, dict) for e in children_state): + raise InvalidResponseError("Invalid event in 'children_state' list") + try: + [ + FederationSpaceSummaryEventResult.from_json_dict(e) + for e in children_state + ] + except ValueError as e: + raise InvalidResponseError(str(e)) + + # Validate the children rooms. + children = res.get("children", []) + if not isinstance(children, Sequence): + raise InvalidResponseError("'children' must be a list") + if any(not isinstance(r, dict) for r in children): + raise InvalidResponseError("Invalid room in 'children' list") + + # Validate the inaccessible children. + inaccessible_children = res.get("inaccessible_children", []) + if not isinstance(inaccessible_children, Sequence): + raise InvalidResponseError("'inaccessible_children' must be a list") + if any(not isinstance(r, str) for r in inaccessible_children): + raise InvalidResponseError( + "Invalid room ID in 'inaccessible_children' list" + ) + + return room, children, inaccessible_children + + try: + result = await self._try_destination_list( + "fetch room hierarchy", + destinations, + send_request, + failover_on_unknown_endpoint=True, + ) + except SynapseError as e: + # If an unexpected error occurred, re-raise it. + if e.code != 502: + raise + + logger.debug( + "Couldn't fetch room hierarchy, falling back to the spaces API" + ) + + # Fallback to the old federation API and translate the results if + # no servers implement the new API. + # + # The algorithm below is a bit inefficient as it only attempts to + # parse information for the requested room, but the legacy API may + # return additional layers. + legacy_result = await self.get_space_summary( + destinations, + room_id, + suggested_only, + max_rooms_per_space=None, + exclude_rooms=[], + ) + + # Find the requested room in the response (and remove it). + for _i, room in enumerate(legacy_result.rooms): + if room.get("room_id") == room_id: + break + else: + # The requested room was not returned, nothing we can do. + raise + requested_room = legacy_result.rooms.pop(_i) + + # Find any children events of the requested room. + children_events = [] + children_room_ids = set() + for event in legacy_result.events: + if event.room_id == room_id: + children_events.append(event.data) + children_room_ids.add(event.state_key) + # And add them under the requested room. + requested_room["children_state"] = children_events + + # Find the children rooms. + children = [] + for room in legacy_result.rooms: + if room.get("room_id") in children_room_ids: + children.append(room) + + # It isn't clear from the response whether some of the rooms are + # not accessible. + result = (requested_room, children, ()) + + # Cache the result to avoid fetching data over federation every time. + self._get_room_hierarchy_cache[(room_id, suggested_only)] = result + return result + + async def timestamp_to_event( + self, destination: str, room_id: str, timestamp: int, direction: str + ) -> "TimestampToEventResponse": + """ + Calls a remote federating server at `destination` asking for their + closest event to the given timestamp in the given direction. Also + validates the response to always return the expected keys or raises an + error. + + Args: + destination: Domain name of the remote homeserver + room_id: Room to fetch the event from + timestamp: The point in time (inclusive) we should navigate from in + the given direction to find the closest event. + direction: ["f"|"b"] to indicate whether we should navigate forward + or backward from the given timestamp to find the closest event. + + Returns: + A parsed TimestampToEventResponse including the closest event_id + and origin_server_ts + + Raises: + Various exceptions when the request fails + InvalidResponseError when the response does not have the correct + keys or wrong types + """ + remote_response = await self.transport_layer.timestamp_to_event( + destination, room_id, timestamp, direction + ) + + if not isinstance(remote_response, dict): + raise InvalidResponseError( + "Response must be a JSON dictionary but received %r" % remote_response + ) + + try: + return TimestampToEventResponse.from_json_dict(remote_response) + except ValueError as e: + raise InvalidResponseError(str(e)) + -@attr.s(frozen=True, slots=True) +@attr.s(frozen=True, slots=True, auto_attribs=True) +class TimestampToEventResponse: + """Typed response dictionary for the federation /timestamp_to_event endpoint""" + + event_id: str + origin_server_ts: int + + # the raw data, including the above keys + data: JsonDict + + @classmethod + def from_json_dict(cls, d: JsonDict) -> "TimestampToEventResponse": + """Parsed response from the federation /timestamp_to_event endpoint + + Args: + d: JSON object response to be parsed + + Raises: + ValueError if d does not the correct keys or they are the wrong types + """ + + event_id = d.get("event_id") + if not isinstance(event_id, str): + raise ValueError( + "Invalid response: 'event_id' must be a str but received %r" % event_id + ) + + origin_server_ts = d.get("origin_server_ts") + if not isinstance(origin_server_ts, int): + raise ValueError( + "Invalid response: 'origin_server_ts' must be a int but received %r" + % origin_server_ts + ) + + return cls(event_id, origin_server_ts, d) + + +@attr.s(frozen=True, slots=True, auto_attribs=True) class FederationSpaceSummaryEventResult: """Represents a single event in the result of a successful get_space_summary call. @@ -1299,12 +1604,13 @@ class FederationSpaceSummaryEventResult: object attributes. """ - event_type = attr.ib(type=str) - state_key = attr.ib(type=str) - via = attr.ib(type=Sequence[str]) + event_type: str + room_id: str + state_key: str + via: Sequence[str] # the raw data, including the above keys - data = attr.ib(type=JsonDict) + data: JsonDict @classmethod def from_json_dict(cls, d: JsonDict) -> "FederationSpaceSummaryEventResult": @@ -1321,6 +1627,10 @@ def from_json_dict(cls, d: JsonDict) -> "FederationSpaceSummaryEventResult": if not isinstance(event_type, str): raise ValueError("Invalid event: 'event_type' must be a str") + room_id = d.get("room_id") + if not isinstance(room_id, str): + raise ValueError("Invalid event: 'room_id' must be a str") + state_key = d.get("state_key") if not isinstance(state_key, str): raise ValueError("Invalid event: 'state_key' must be a str") @@ -1335,15 +1645,15 @@ def from_json_dict(cls, d: JsonDict) -> "FederationSpaceSummaryEventResult": if any(not isinstance(v, str) for v in via): raise ValueError("Invalid event: 'via' must be a list of strings") - return cls(event_type, state_key, via, d) + return cls(event_type, room_id, state_key, via, d) -@attr.s(frozen=True, slots=True) +@attr.s(frozen=True, slots=True, auto_attribs=True) class FederationSpaceSummaryResult: """Represents the data returned by a successful get_space_summary call.""" - rooms = attr.ib(type=Sequence[JsonDict]) - events = attr.ib(type=Sequence[FederationSpaceSummaryEventResult]) + rooms: List[JsonDict] + events: Sequence[FederationSpaceSummaryEventResult] @classmethod def from_json_dict(cls, d: JsonDict) -> "FederationSpaceSummaryResult": @@ -1356,7 +1666,7 @@ def from_json_dict(cls, d: JsonDict) -> "FederationSpaceSummaryResult": ValueError if d is not a valid /spaces/ response """ rooms = d.get("rooms") - if not isinstance(rooms, Sequence): + if not isinstance(rooms, List): raise ValueError("'rooms' must be a list") if any(not isinstance(r, dict) for r in rooms): raise ValueError("Invalid room in 'rooms' list") diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 145b9161d985..8e37e76206ac 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -1,6 +1,6 @@ # Copyright 2015, 2016 OpenMarket Ltd # Copyright 2018 New Vector Ltd -# Copyright 2019 Matrix.org Federation C.I.C +# Copyright 2019-2021 Matrix.org Federation C.I.C # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ from twisted.internet.abstract import isIPAddress from twisted.python import failure -from synapse.api.constants import EduTypes, EventTypes, Membership +from synapse.api.constants import EduTypes, EventContentFields, EventTypes, Membership from synapse.api.errors import ( AuthError, Codes, @@ -110,6 +110,8 @@ def __init__(self, hs: "HomeServer"): super().__init__(hs) self.handler = hs.get_federation_handler() + self.storage = hs.get_storage() + self._federation_event_handler = hs.get_federation_event_handler() self.state = hs.get_state_handler() self._event_auth_handler = hs.get_event_auth_handler() @@ -195,25 +197,82 @@ async def on_backfill_request( origin, room_id, versions, limit ) - res = self._transaction_from_pdus(pdus).get_dict() + res = self._transaction_dict_from_pdus(pdus) return 200, res - async def on_incoming_transaction( - self, origin: str, transaction_data: JsonDict + async def on_timestamp_to_event_request( + self, origin: str, room_id: str, timestamp: int, direction: str ) -> Tuple[int, Dict[str, Any]]: + """When we receive a federated `/timestamp_to_event` request, + handle all of the logic for validating and fetching the event. + + Args: + origin: The server we received the event from + room_id: Room to fetch the event from + timestamp: The point in time (inclusive) we should navigate from in + the given direction to find the closest event. + direction: ["f"|"b"] to indicate whether we should navigate forward + or backward from the given timestamp to find the closest event. + + Returns: + Tuple indicating the response status code and dictionary response + body including `event_id`. + """ + with (await self._server_linearizer.queue((origin, room_id))): + origin_host, _ = parse_server_name(origin) + await self.check_server_matches_acl(origin_host, room_id) + + # We only try to fetch data from the local database + event_id = await self.store.get_event_id_for_timestamp( + room_id, timestamp, direction + ) + if event_id: + event = await self.store.get_event( + event_id, allow_none=False, allow_rejected=False + ) + + return 200, { + "event_id": event_id, + "origin_server_ts": event.origin_server_ts, + } + + raise SynapseError( + 404, + "Unable to find event from %s in direction %s" % (timestamp, direction), + errcode=Codes.NOT_FOUND, + ) + + async def on_incoming_transaction( + self, + origin: str, + transaction_id: str, + destination: str, + transaction_data: JsonDict, + ) -> Tuple[int, JsonDict]: # If we receive a transaction we should make sure that kick off handling # any old events in the staging area. if not self._started_handling_of_staged_events: self._started_handling_of_staged_events = True self._handle_old_staged_events() + # Start a periodic check for old staged events. This is to handle + # the case where locks time out, e.g. if another process gets killed + # without dropping its locks. + self._clock.looping_call(self._handle_old_staged_events, 60 * 1000) + # keep this as early as possible to make the calculated origin ts as # accurate as possible. request_time = self._clock.time_msec() - transaction = Transaction(**transaction_data) - transaction_id = transaction.transaction_id # type: ignore + transaction = Transaction( + transaction_id=transaction_id, + destination=destination, + origin=origin, + origin_server_ts=transaction_data.get("origin_server_ts"), # type: ignore + pdus=transaction_data.get("pdus"), # type: ignore + edus=transaction_data.get("edus"), + ) if not transaction_id: raise Exception("Transaction missing transaction_id") @@ -221,9 +280,7 @@ async def on_incoming_transaction( logger.debug("[%s] Got transaction", transaction_id) # Reject malformed transactions early: reject if too many PDUs/EDUs - if len(transaction.pdus) > 50 or ( # type: ignore - hasattr(transaction, "edus") and len(transaction.edus) > 100 # type: ignore - ): + if len(transaction.pdus) > 50 or len(transaction.edus) > 100: logger.info("Transaction PDU or EDU count too large. Returning 400") return 400, {} @@ -263,7 +320,7 @@ async def _on_incoming_transaction_inner( # CRITICAL SECTION: the first thing we must do (before awaiting) is # add an entry to _active_transactions. assert origin not in self._active_transactions - self._active_transactions[origin] = transaction.transaction_id # type: ignore + self._active_transactions[origin] = transaction.transaction_id try: result = await self._handle_incoming_transaction( @@ -286,16 +343,18 @@ async def _handle_incoming_transaction( Returns: HTTP response code and body """ - response = await self.transaction_actions.have_responded(origin, transaction) + existing_response = await self.transaction_actions.have_responded( + origin, transaction + ) - if response: + if existing_response: logger.debug( "[%s] We've already responded to this request", - transaction.transaction_id, # type: ignore + transaction.transaction_id, ) - return response + return existing_response - logger.debug("[%s] Transaction is new", transaction.transaction_id) # type: ignore + logger.debug("[%s] Transaction is new", transaction.transaction_id) # We process PDUs and EDUs in parallel. This is important as we don't # want to block things like to device messages from reaching clients @@ -334,7 +393,7 @@ async def _handle_pdus_in_txn( report back to the sending server. """ - received_pdus_counter.inc(len(transaction.pdus)) # type: ignore + received_pdus_counter.inc(len(transaction.pdus)) origin_host, _ = parse_server_name(origin) @@ -342,7 +401,7 @@ async def _handle_pdus_in_txn( newest_pdu_ts = 0 - for p in transaction.pdus: # type: ignore + for p in transaction.pdus: # FIXME (richardv): I don't think this works: # https://github.com/matrix-org/synapse/issues/8429 if "unsigned" in p: @@ -391,7 +450,7 @@ async def _handle_pdus_in_txn( # require callouts to other servers to fetch missing events), but # impose a limit to avoid going too crazy with ram/cpu. - async def process_pdus_for_room(room_id: str): + async def process_pdus_for_room(room_id: str) -> None: with nested_logging_context(room_id): logger.debug("Processing PDUs for %s", room_id) @@ -436,10 +495,10 @@ async def process_pdu(pdu: EventBase) -> JsonDict: return pdu_results - async def _handle_edus_in_txn(self, origin: str, transaction: Transaction): + async def _handle_edus_in_txn(self, origin: str, transaction: Transaction) -> None: """Process the EDUs in a received transaction.""" - async def _process_edu(edu_dict): + async def _process_edu(edu_dict: JsonDict) -> None: received_edus_counter.inc() edu = Edu( @@ -452,13 +511,13 @@ async def _process_edu(edu_dict): await concurrently_execute( _process_edu, - getattr(transaction, "edus", []), + transaction.edus, TRANSACTION_CONCURRENCY_LIMIT, ) async def on_room_state_request( self, origin: str, room_id: str, event_id: Optional[str] - ) -> Tuple[int, Dict[str, Any]]: + ) -> Tuple[int, JsonDict]: origin_host, _ = parse_server_name(origin) await self.check_server_matches_acl(origin_host, room_id) @@ -472,7 +531,7 @@ async def on_room_state_request( # - but that's non-trivial to get right, and anyway somewhat defeats # the point of the linearizer. with (await self._server_linearizer.queue((origin, room_id))): - resp = dict( + resp: JsonDict = dict( await self._state_resp_cache.wrap( (room_id, event_id), self._on_context_state_request_compute, @@ -488,7 +547,7 @@ async def on_room_state_request( async def on_state_ids_request( self, origin: str, room_id: str, event_id: str - ) -> Tuple[int, Dict[str, Any]]: + ) -> Tuple[int, JsonDict]: if not event_id: raise NotImplementedError("Specify an event") @@ -508,7 +567,9 @@ async def on_state_ids_request( return 200, resp - async def _on_state_ids_request_compute(self, room_id, event_id): + async def _on_state_ids_request_compute( + self, room_id: str, event_id: str + ) -> JsonDict: state_ids = await self.handler.get_state_ids_for_pdu(room_id, event_id) auth_chain_ids = await self.store.get_auth_chain_ids(room_id, state_ids) return {"pdu_ids": state_ids, "auth_chain_ids": auth_chain_ids} @@ -538,7 +599,7 @@ async def on_pdu_request( pdu = await self.handler.get_persisted_pdu(origin, event_id) if pdu: - return 200, self._transaction_from_pdus([pdu]).get_dict() + return 200, self._transaction_dict_from_pdus([pdu]) else: return 404, "" @@ -597,8 +658,11 @@ async def on_send_join_request( state = await self.store.get_events(state_ids) time_now = self._clock.time_msec() + event_json = event.get_pdu_json() return { - "org.matrix.msc3083.v2.event": event.get_pdu_json(), + # TODO Remove the unstable prefix when servers have updated. + "org.matrix.msc3083.v2.event": event_json, + "event": event_json, "state": [p.get_pdu_json(time_now) for p in state.values()], "auth_chain": [p.get_pdu_json(time_now) for p in auth_chain], } @@ -623,7 +687,7 @@ async def on_send_leave_request( async def on_make_knock_request( self, origin: str, room_id: str, user_id: str, supported_versions: List[str] - ) -> Dict[str, Union[EventBase, str]]: + ) -> JsonDict: """We've received a /make_knock/ request, so we create a partial knock event for the room and hand that back, along with the room version, to the knocking homeserver. We do *not* persist or process this event until the other server has @@ -756,11 +820,11 @@ async def _on_send_membership_event( if ( room_version.msc3083_join_rules and event.membership == Membership.JOIN - and "join_authorised_via_users_server" in event.content + and EventContentFields.AUTHORISING_USER in event.content ): # We can only authorise our own users. authorising_server = get_domain_from_id( - event.content["join_authorised_via_users_server"] + event.content[EventContentFields.AUTHORISING_USER] ) if authorising_server != self.server_name: raise SynapseError( @@ -779,7 +843,9 @@ async def _on_send_membership_event( event = await self._check_sigs_and_hash(room_version, event) - return await self.handler.on_send_membership_event(origin, event) + return await self._federation_event_handler.on_send_membership_event( + origin, event + ) async def on_event_auth( self, origin: str, room_id: str, event_id: str @@ -879,18 +945,20 @@ async def on_openid_userinfo(self, token: str) -> Optional[str]: ts_now_ms = self._clock.time_msec() return await self.store.get_user_id_for_open_id_token(token, ts_now_ms) - def _transaction_from_pdus(self, pdu_list: List[EventBase]) -> Transaction: + def _transaction_dict_from_pdus(self, pdu_list: List[EventBase]) -> JsonDict: """Returns a new Transaction containing the given PDUs suitable for transmission. """ time_now = self._clock.time_msec() pdus = [p.get_pdu_json(time_now) for p in pdu_list] return Transaction( + # Just need a dummy transaction ID and destination since it won't be used. + transaction_id="", origin=self.server_name, pdus=pdus, origin_server_ts=int(time_now), - destination=None, - ) + destination="", + ).get_dict() async def _handle_received_pdu(self, origin: str, pdu: EventBase) -> None: """Process a PDU received in a federation /send/ transaction. @@ -962,13 +1030,18 @@ async def _process_incoming_pdus_in_room_inner( # the room, so instead of pulling the event out of the DB and parsing # the event we just pull out the next event ID and check if that matches. if latest_event is not None and latest_origin is not None: - ( - next_origin, - next_event_id, - ) = await self.store.get_next_staged_event_id_for_room(room_id) - if next_origin != latest_origin or next_event_id != latest_event.event_id: + result = await self.store.get_next_staged_event_id_for_room(room_id) + if result is None: latest_origin = None latest_event = None + else: + next_origin, next_event_id = result + if ( + next_origin != latest_origin + or next_event_id != latest_event.event_id + ): + latest_origin = None + latest_event = None if latest_origin is None or latest_event is None: next = await self.store.get_next_staged_event_for_room( @@ -988,10 +1061,12 @@ async def _process_incoming_pdus_in_room_inner( # has started processing). while True: async with lock: + logger.info("handling received PDU: %s", event) try: - await self.handler.on_receive_pdu( - origin, event, sent_to_us_directly=True - ) + with nested_logging_context(event.event_id): + await self._federation_event_handler.on_receive_pdu( + origin, event + ) except FederationError as e: # XXX: Ideally we'd inform the remote we failed to process # the event, but we can't return an error in the transaction @@ -1041,11 +1116,12 @@ async def _process_incoming_pdus_in_room_inner( origin, event = next - lock = await self.store.try_acquire_lock( + new_lock = await self.store.try_acquire_lock( _INBOUND_EVENT_HANDLING_LOCK_NAME, room_id ) - if not lock: + if not new_lock: return + lock = new_lock def __str__(self) -> str: return "" % self.server_name @@ -1209,10 +1285,6 @@ def register_query_handler( self.query_handlers[query_type] = handler - def register_instance_for_edu(self, edu_type: str, instance_name: str) -> None: - """Register that the EDU handler is on a different instance than master.""" - self._edu_type_to_instance[edu_type] = [instance_name] - def register_instances_for_edu( self, edu_type: str, instance_names: List[str] ) -> None: @@ -1220,7 +1292,7 @@ def register_instances_for_edu( self._edu_type_to_instance[edu_type] = instance_names async def on_edu(self, edu_type: str, origin: str, content: dict) -> None: - if not self.config.use_presence and edu_type == EduTypes.Presence: + if not self.config.server.use_presence and edu_type == EduTypes.Presence: return # Check if we have a handler on this instance diff --git a/synapse/federation/persistence.py b/synapse/federation/persistence.py index 2f9c9bc2cdc8..523ab1c51ed1 100644 --- a/synapse/federation/persistence.py +++ b/synapse/federation/persistence.py @@ -1,4 +1,5 @@ # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,6 +24,7 @@ from synapse.federation.units import Transaction from synapse.logging.utils import log_function +from synapse.storage.databases.main import DataStore from synapse.types import JsonDict logger = logging.getLogger(__name__) @@ -31,7 +33,7 @@ class TransactionActions: """Defines persistence actions that relate to handling Transactions.""" - def __init__(self, datastore): + def __init__(self, datastore: DataStore): self.store = datastore @log_function @@ -45,7 +47,7 @@ async def have_responded( `None` if we have not previously responded to this transaction or a 2-tuple of `(int, dict)` representing the response code and response body. """ - transaction_id = transaction.transaction_id # type: ignore + transaction_id = transaction.transaction_id if not transaction_id: raise RuntimeError("Cannot persist a transaction with no transaction_id") @@ -56,7 +58,7 @@ async def set_response( self, origin: str, transaction: Transaction, code: int, response: JsonDict ) -> None: """Persist how we responded to a transaction.""" - transaction_id = transaction.transaction_id # type: ignore + transaction_id = transaction.transaction_id if not transaction_id: raise RuntimeError("Cannot persist a transaction with no transaction_id") diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 1fbf325fdc9a..63289a5a334f 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -1,4 +1,5 @@ # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -350,7 +351,7 @@ class BaseFederationRow: TypeId = "" # Unique string that ids the type. Must be overridden in sub classes. @staticmethod - def from_data(data): + def from_data(data: JsonDict) -> "BaseFederationRow": """Parse the data from the federation stream into a row. Args: @@ -359,7 +360,7 @@ def from_data(data): """ raise NotImplementedError() - def to_data(self): + def to_data(self) -> JsonDict: """Serialize this row to be sent over the federation stream. Returns: @@ -368,7 +369,7 @@ def to_data(self): """ raise NotImplementedError() - def add_to_buffer(self, buff): + def add_to_buffer(self, buff: "ParsedFederationStreamData") -> None: """Add this row to the appropriate field in the buffer ready for this to be sent over federation. @@ -391,15 +392,15 @@ class PresenceDestinationsRow( TypeId = "pd" @staticmethod - def from_data(data): + def from_data(data: JsonDict) -> "PresenceDestinationsRow": return PresenceDestinationsRow( state=UserPresenceState.from_dict(data["state"]), destinations=data["dests"] ) - def to_data(self): + def to_data(self) -> JsonDict: return {"state": self.state.as_dict(), "dests": self.destinations} - def add_to_buffer(self, buff): + def add_to_buffer(self, buff: "ParsedFederationStreamData") -> None: buff.presence_destinations.append((self.state, self.destinations)) @@ -417,13 +418,13 @@ class KeyedEduRow( TypeId = "k" @staticmethod - def from_data(data): + def from_data(data: JsonDict) -> "KeyedEduRow": return KeyedEduRow(key=tuple(data["key"]), edu=Edu(**data["edu"])) - def to_data(self): + def to_data(self) -> JsonDict: return {"key": self.key, "edu": self.edu.get_internal_dict()} - def add_to_buffer(self, buff): + def add_to_buffer(self, buff: "ParsedFederationStreamData") -> None: buff.keyed_edus.setdefault(self.edu.destination, {})[self.key] = self.edu @@ -433,13 +434,13 @@ class EduRow(BaseFederationRow, namedtuple("EduRow", ("edu",))): # Edu TypeId = "e" @staticmethod - def from_data(data): + def from_data(data: JsonDict) -> "EduRow": return EduRow(Edu(**data)) - def to_data(self): + def to_data(self) -> JsonDict: return self.edu.get_internal_dict() - def add_to_buffer(self, buff): + def add_to_buffer(self, buff: "ParsedFederationStreamData") -> None: buff.edus.setdefault(self.edu.destination, []).append(self.edu) diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index d980e0d9866a..720d7bd74d07 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -22,6 +22,7 @@ from typing_extensions import Literal from twisted.internet import defer +from twisted.internet.interfaces import IDelayedCall import synapse.metrics from synapse.api.presence import UserPresenceState @@ -280,11 +281,14 @@ def __init__(self, hs: "HomeServer"): self._queues_awaiting_rr_flush_by_room: Dict[str, Set[PerDestinationQueue]] = {} self._rr_txn_interval_per_room_ms = ( - 1000.0 / hs.config.federation_rr_transactions_per_room_per_second + 1000.0 + / hs.config.ratelimiting.federation_rr_transactions_per_room_per_second ) # wake up destinations that have outstanding PDUs to be caught up - self._catchup_after_startup_timer = self.clock.call_later( + self._catchup_after_startup_timer: Optional[ + IDelayedCall + ] = self.clock.call_later( CATCH_UP_STARTUP_DELAY_SEC, run_as_background_process, "wake_destinations_needing_catchup", @@ -406,7 +410,7 @@ async def handle_event(event: EventBase) -> None: now = self.clock.time_msec() ts = await self.store.get_received_ts(event.event_id) - + assert ts is not None synapse.metrics.event_processing_lag_by_event.labels( "federation_sender" ).observe((now - ts) / 1000) @@ -435,6 +439,7 @@ async def handle_room_events(events: Iterable[EventBase]) -> None: if events: now = self.clock.time_msec() ts = await self.store.get_received_ts(events[-1].event_id) + assert ts is not None synapse.metrics.event_processing_lag.labels( "federation_sender" @@ -589,7 +594,7 @@ def send_presence_to_destinations( destinations (list[str]) """ - if not states or not self.hs.config.use_presence: + if not states or not self.hs.config.server.use_presence: # No-op if presence is disabled. return diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index c11d1f6d3152..391b30fbb559 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -1,5 +1,6 @@ # Copyright 2014-2016 OpenMarket Ltd # Copyright 2019 New Vector Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +15,8 @@ # limitations under the License. import datetime import logging -from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Optional, Tuple +from types import TracebackType +from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Optional, Tuple, Type import attr from prometheus_client import Counter @@ -213,7 +215,7 @@ def send_keyed_edu(self, edu: Edu, key: Hashable) -> None: self._pending_edus_keyed[(edu.edu_type, key)] = edu self.attempt_new_transaction() - def send_edu(self, edu) -> None: + def send_edu(self, edu: Edu) -> None: self._pending_edus.append(edu) self.attempt_new_transaction() @@ -560,7 +562,7 @@ async def _get_device_update_edus(self, limit: int) -> Tuple[List[Edu], int]: assert len(edus) <= limit, "get_device_updates_by_remote returned too many EDUs" - return (edus, now_stream_id) + return edus, now_stream_id async def _get_to_device_message_edus(self, limit: int) -> Tuple[List[Edu], int]: last_device_stream_id = self._last_device_stream_id @@ -593,7 +595,7 @@ async def _get_to_device_message_edus(self, limit: int) -> Tuple[List[Edu], int] stream_id, ) - return (edus, stream_id) + return edus, stream_id def _start_catching_up(self) -> None: """ @@ -701,7 +703,12 @@ async def __aenter__(self) -> Tuple[List[EventBase], List[Edu]]: return self._pdus, pending_edus - async def __aexit__(self, exc_type, exc, tb): + async def __aexit__( + self, + exc_type: Optional[Type[BaseException]], + exc: Optional[BaseException], + tb: Optional[TracebackType], + ) -> None: if exc_type is not None: # Failed to send transaction, so we bail out. return diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index 72a635830b9a..ab935e5a7eda 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -27,6 +27,7 @@ tags, whitelisted_homeserver, ) +from synapse.types import JsonDict from synapse.util import json_decoder from synapse.util.metrics import measure_func @@ -104,13 +105,13 @@ async def send_new_transaction( len(edus), ) - transaction = Transaction.create_new( + transaction = Transaction( origin_server_ts=int(self.clock.time_msec()), transaction_id=txn_id, origin=self._server_name, destination=destination, - pdus=pdus, - edus=edus, + pdus=[p.get_pdu_json() for p in pdus], + edus=[edu.get_dict() for edu in edus], ) self._next_txn_id += 1 @@ -131,7 +132,7 @@ async def send_new_transaction( # FIXME (richardv): I also believe it no longer works. We (now?) store # "age_ts" in "unsigned" rather than at the top level. See # https://github.com/matrix-org/synapse/issues/8429. - def json_data_cb(): + def json_data_cb() -> JsonDict: data = transaction.get_dict() now = int(self.clock.time_msec()) if "pdus" in data: @@ -148,7 +149,6 @@ def json_data_cb(): ) except HttpResponseException as e: code = e.code - response = e.response set_tag(tags.ERROR, True) diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 6a8d3ad4fe6d..9fc4c31c93f6 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -15,7 +15,20 @@ import logging import urllib -from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Tuple, Union +from typing import ( + Any, + Awaitable, + Callable, + Collection, + Dict, + Generator, + Iterable, + List, + Mapping, + Optional, + Tuple, + Union, +) import attr import ijson @@ -100,7 +113,7 @@ async def get_event( @log_function async def backfill( - self, destination: str, room_id: str, event_tuples: Iterable[str], limit: int + self, destination: str, room_id: str, event_tuples: Collection[str], limit: int ) -> Optional[JsonDict]: """Requests `limit` previous PDUs in a given context before list of PDUs. @@ -108,7 +121,9 @@ async def backfill( Args: destination room_id - event_tuples + event_tuples: + Must be a Collection that is falsy when empty. + (Iterable is not enough here!) limit Returns: @@ -134,6 +149,42 @@ async def backfill( destination, path=path, args=args, try_trailing_slash_on_400=True ) + @log_function + async def timestamp_to_event( + self, destination: str, room_id: str, timestamp: int, direction: str + ) -> Union[JsonDict, List]: + """ + Calls a remote federating server at `destination` asking for their + closest event to the given timestamp in the given direction. + + Args: + destination: Domain name of the remote homeserver + room_id: Room to fetch the event from + timestamp: The point in time (inclusive) we should navigate from in + the given direction to find the closest event. + direction: ["f"|"b"] to indicate whether we should navigate forward + or backward from the given timestamp to find the closest event. + + Returns: + Response dict received from the remote homeserver. + + Raises: + Various exceptions when the request fails + """ + path = _create_path( + FEDERATION_UNSTABLE_PREFIX, + "/org.matrix.msc3030/timestamp_to_event/%s", + room_id, + ) + + args = {"ts": [str(timestamp)], "dir": [direction]} + + remote_response = await self.client.get_json( + destination, path=path, args=args, try_trailing_slash_on_400=True + ) + + return remote_response + @log_function async def send_transaction( self, @@ -143,7 +194,7 @@ async def send_transaction( """Sends the given Transaction to its destination Args: - transaction (Transaction) + transaction Returns: Succeeds when we get a 2xx HTTP response. The result @@ -185,11 +236,16 @@ async def send_transaction( @log_function async def make_query( - self, destination, query_type, args, retry_on_dns_fail, ignore_backoff=False - ): + self, + destination: str, + query_type: str, + args: dict, + retry_on_dns_fail: bool, + ignore_backoff: bool = False, + ) -> JsonDict: path = _create_v1_path("/query/%s", query_type) - content = await self.client.get_json( + return await self.client.get_json( destination=destination, path=path, args=args, @@ -198,8 +254,6 @@ async def make_query( ignore_backoff=ignore_backoff, ) - return content - @log_function async def make_membership_event( self, @@ -786,7 +840,7 @@ async def accept_group_invite( @log_function def join_group( self, destination: str, group_id: str, user_id: str, content: JsonDict - ) -> JsonDict: + ) -> Awaitable[JsonDict]: """Attempts to join a group""" path = _create_v1_path("/groups/%s/users/%s/join", group_id, user_id) @@ -1177,6 +1231,42 @@ async def get_space_summary( destination=destination, path=path, data=params ) + async def get_room_hierarchy( + self, destination: str, room_id: str, suggested_only: bool + ) -> JsonDict: + """ + Args: + destination: The remote server + room_id: The room ID to ask about. + suggested_only: if True, only suggested rooms will be returned + """ + path = _create_v1_path("/hierarchy/%s", room_id) + + return await self.client.get_json( + destination=destination, + path=path, + args={"suggested_only": "true" if suggested_only else "false"}, + ) + + async def get_room_hierarchy_unstable( + self, destination: str, room_id: str, suggested_only: bool + ) -> JsonDict: + """ + Args: + destination: The remote server + room_id: The room ID to ask about. + suggested_only: if True, only suggested rooms will be returned + """ + path = _create_path( + FEDERATION_UNSTABLE_PREFIX, "/org.matrix.msc2946/hierarchy/%s", room_id + ) + + return await self.client.get_json( + destination=destination, + path=path, + args={"suggested_only": "true" if suggested_only else "false"}, + ) + def _create_path(federation_prefix: str, path: str, *args: str) -> str: """ @@ -1231,7 +1321,7 @@ class SendJoinResponse: @ijson.coroutine -def _event_parser(event_dict: JsonDict): +def _event_parser(event_dict: JsonDict) -> Generator[None, Tuple[str, Any], None]: """Helper function for use with `ijson.kvitems_coro` to parse key-value pairs to add them to a given dictionary. """ @@ -1242,7 +1332,9 @@ def _event_parser(event_dict: JsonDict): @ijson.coroutine -def _event_list_parser(room_version: RoomVersion, events: List[EventBase]): +def _event_list_parser( + room_version: RoomVersion, events: List[EventBase] +) -> Generator[None, JsonDict, None]: """Helper function for use with `ijson.items_coro` to parse an array of events and add them to the given list. """ @@ -1274,19 +1366,33 @@ def __init__(self, room_version: RoomVersion, v1_api: bool): self._coro_state = ijson.items_coro( _event_list_parser(room_version, self._response.state), prefix + "state.item", + use_float=True, ) self._coro_auth = ijson.items_coro( _event_list_parser(room_version, self._response.auth_events), prefix + "auth_chain.item", + use_float=True, ) - self._coro_event = ijson.kvitems_coro( + # TODO Remove the unstable prefix when servers have updated. + # + # By re-using the same event dictionary this will cause the parsing of + # org.matrix.msc3083.v2.event and event to stomp over each other. + # Generally this should be fine. + self._coro_unstable_event = ijson.kvitems_coro( _event_parser(self._response.event_dict), prefix + "org.matrix.msc3083.v2.event", + use_float=True, + ) + self._coro_event = ijson.kvitems_coro( + _event_parser(self._response.event_dict), + prefix + "event", + use_float=True, ) def write(self, data: bytes) -> int: self._coro_state.send(data) self._coro_auth.send(data) + self._coro_unstable_event.send(data) self._coro_event.send(data) return len(data) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py deleted file mode 100644 index 5e059d6e09d4..000000000000 --- a/synapse/federation/transport/server.py +++ /dev/null @@ -1,2139 +0,0 @@ -# Copyright 2014-2021 The Matrix.org Foundation C.I.C. -# Copyright 2020 Sorunome -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import functools -import logging -import re -from typing import ( - Container, - Dict, - List, - Mapping, - Optional, - Sequence, - Tuple, - Type, - Union, -) - -from typing_extensions import Literal - -import synapse -from synapse.api.constants import MAX_GROUP_CATEGORYID_LENGTH, MAX_GROUP_ROLEID_LENGTH -from synapse.api.errors import Codes, FederationDeniedError, SynapseError -from synapse.api.room_versions import RoomVersions -from synapse.api.urls import ( - FEDERATION_UNSTABLE_PREFIX, - FEDERATION_V1_PREFIX, - FEDERATION_V2_PREFIX, -) -from synapse.handlers.groups_local import GroupsLocalHandler -from synapse.http.server import HttpServer, JsonResource -from synapse.http.servlet import ( - parse_boolean_from_args, - parse_integer_from_args, - parse_json_object_from_request, - parse_string_from_args, - parse_strings_from_args, -) -from synapse.logging import opentracing -from synapse.logging.context import run_in_background -from synapse.logging.opentracing import ( - SynapseTags, - start_active_span, - start_active_span_from_request, - tags, - whitelisted_homeserver, -) -from synapse.server import HomeServer -from synapse.types import JsonDict, ThirdPartyInstanceID, get_domain_from_id -from synapse.util.ratelimitutils import FederationRateLimiter -from synapse.util.stringutils import parse_and_validate_server_name -from synapse.util.versionstring import get_version_string - -logger = logging.getLogger(__name__) - - -class TransportLayerServer(JsonResource): - """Handles incoming federation HTTP requests""" - - def __init__(self, hs: HomeServer, servlet_groups: Optional[List[str]] = None): - """Initialize the TransportLayerServer - - Will by default register all servlets. For custom behaviour, pass in - a list of servlet_groups to register. - - Args: - hs: homeserver - servlet_groups: List of servlet groups to register. - Defaults to ``DEFAULT_SERVLET_GROUPS``. - """ - self.hs = hs - self.clock = hs.get_clock() - self.servlet_groups = servlet_groups - - super().__init__(hs, canonical_json=False) - - self.authenticator = Authenticator(hs) - self.ratelimiter = hs.get_federation_ratelimiter() - - self.register_servlets() - - def register_servlets(self) -> None: - register_servlets( - self.hs, - resource=self, - ratelimiter=self.ratelimiter, - authenticator=self.authenticator, - servlet_groups=self.servlet_groups, - ) - - -class AuthenticationError(SynapseError): - """There was a problem authenticating the request""" - - -class NoAuthenticationError(AuthenticationError): - """The request had no authentication information""" - - -class Authenticator: - def __init__(self, hs: HomeServer): - self._clock = hs.get_clock() - self.keyring = hs.get_keyring() - self.server_name = hs.hostname - self.store = hs.get_datastore() - self.federation_domain_whitelist = hs.config.federation_domain_whitelist - self.notifier = hs.get_notifier() - - self.replication_client = None - if hs.config.worker.worker_app: - self.replication_client = hs.get_tcp_replication() - - # A method just so we can pass 'self' as the authenticator to the Servlets - async def authenticate_request(self, request, content): - now = self._clock.time_msec() - json_request = { - "method": request.method.decode("ascii"), - "uri": request.uri.decode("ascii"), - "destination": self.server_name, - "signatures": {}, - } - - if content is not None: - json_request["content"] = content - - origin = None - - auth_headers = request.requestHeaders.getRawHeaders(b"Authorization") - - if not auth_headers: - raise NoAuthenticationError( - 401, "Missing Authorization headers", Codes.UNAUTHORIZED - ) - - for auth in auth_headers: - if auth.startswith(b"X-Matrix"): - (origin, key, sig) = _parse_auth_header(auth) - json_request["origin"] = origin - json_request["signatures"].setdefault(origin, {})[key] = sig - - if ( - self.federation_domain_whitelist is not None - and origin not in self.federation_domain_whitelist - ): - raise FederationDeniedError(origin) - - if origin is None or not json_request["signatures"]: - raise NoAuthenticationError( - 401, "Missing Authorization headers", Codes.UNAUTHORIZED - ) - - await self.keyring.verify_json_for_server( - origin, - json_request, - now, - ) - - logger.debug("Request from %s", origin) - request.requester = origin - - # If we get a valid signed request from the other side, its probably - # alive - retry_timings = await self.store.get_destination_retry_timings(origin) - if retry_timings and retry_timings.retry_last_ts: - run_in_background(self._reset_retry_timings, origin) - - return origin - - async def _reset_retry_timings(self, origin): - try: - logger.info("Marking origin %r as up", origin) - await self.store.set_destination_retry_timings(origin, None, 0, 0) - - # Inform the relevant places that the remote server is back up. - self.notifier.notify_remote_server_up(origin) - if self.replication_client: - # If we're on a worker we try and inform master about this. The - # replication client doesn't hook into the notifier to avoid - # infinite loops where we send a `REMOTE_SERVER_UP` command to - # master, which then echoes it back to us which in turn pokes - # the notifier. - self.replication_client.send_remote_server_up(origin) - - except Exception: - logger.exception("Error resetting retry timings on %s", origin) - - -def _parse_auth_header(header_bytes): - """Parse an X-Matrix auth header - - Args: - header_bytes (bytes): header value - - Returns: - Tuple[str, str, str]: origin, key id, signature. - - Raises: - AuthenticationError if the header could not be parsed - """ - try: - header_str = header_bytes.decode("utf-8") - params = header_str.split(" ")[1].split(",") - param_dict = dict(kv.split("=") for kv in params) - - def strip_quotes(value): - if value.startswith('"'): - return value[1:-1] - else: - return value - - origin = strip_quotes(param_dict["origin"]) - - # ensure that the origin is a valid server name - parse_and_validate_server_name(origin) - - key = strip_quotes(param_dict["key"]) - sig = strip_quotes(param_dict["sig"]) - return origin, key, sig - except Exception as e: - logger.warning( - "Error parsing auth header '%s': %s", - header_bytes.decode("ascii", "replace"), - e, - ) - raise AuthenticationError( - 400, "Malformed Authorization header", Codes.UNAUTHORIZED - ) - - -class BaseFederationServlet: - """Abstract base class for federation servlet classes. - - The servlet object should have a PATH attribute which takes the form of a regexp to - match against the request path (excluding the /federation/v1 prefix). - - The servlet should also implement one or more of on_GET, on_POST, on_PUT, to match - the appropriate HTTP method. These methods must be *asynchronous* and have the - signature: - - on_(self, origin, content, query, **kwargs) - - With arguments: - - origin (unicode|None): The authenticated server_name of the calling server, - unless REQUIRE_AUTH is set to False and authentication failed. - - content (unicode|None): decoded json body of the request. None if the - request was a GET. - - query (dict[bytes, list[bytes]]): Query params from the request. url-decoded - (ie, '+' and '%xx' are decoded) but note that it is *not* utf8-decoded - yet. - - **kwargs (dict[unicode, unicode]): the dict mapping keys to path - components as specified in the path match regexp. - - Returns: - Optional[Tuple[int, object]]: either (response code, response object) to - return a JSON response, or None if the request has already been handled. - - Raises: - SynapseError: to return an error code - - Exception: other exceptions will be caught, logged, and a 500 will be - returned. - """ - - PATH = "" # Overridden in subclasses, the regex to match against the path. - - REQUIRE_AUTH = True - - PREFIX = FEDERATION_V1_PREFIX # Allows specifying the API version - - RATELIMIT = True # Whether to rate limit requests or not - - def __init__( - self, - hs: HomeServer, - authenticator: Authenticator, - ratelimiter: FederationRateLimiter, - server_name: str, - ): - self.hs = hs - self.authenticator = authenticator - self.ratelimiter = ratelimiter - self.server_name = server_name - - def _wrap(self, func): - authenticator = self.authenticator - ratelimiter = self.ratelimiter - - @functools.wraps(func) - async def new_func(request, *args, **kwargs): - """A callback which can be passed to HttpServer.RegisterPaths - - Args: - request (twisted.web.http.Request): - *args: unused? - **kwargs (dict[unicode, unicode]): the dict mapping keys to path - components as specified in the path match regexp. - - Returns: - Tuple[int, object]|None: (response code, response object) as returned by - the callback method. None if the request has already been handled. - """ - content = None - if request.method in [b"PUT", b"POST"]: - # TODO: Handle other method types? other content types? - content = parse_json_object_from_request(request) - - try: - origin = await authenticator.authenticate_request(request, content) - except NoAuthenticationError: - origin = None - if self.REQUIRE_AUTH: - logger.warning( - "authenticate_request failed: missing authentication" - ) - raise - except Exception as e: - logger.warning("authenticate_request failed: %s", e) - raise - - request_tags = { - SynapseTags.REQUEST_ID: request.get_request_id(), - tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, - tags.HTTP_METHOD: request.get_method(), - tags.HTTP_URL: request.get_redacted_uri(), - tags.PEER_HOST_IPV6: request.getClientIP(), - "authenticated_entity": origin, - "servlet_name": request.request_metrics.name, - } - - # Only accept the span context if the origin is authenticated - # and whitelisted - if origin and whitelisted_homeserver(origin): - scope = start_active_span_from_request( - request, "incoming-federation-request", tags=request_tags - ) - else: - scope = start_active_span( - "incoming-federation-request", tags=request_tags - ) - - with scope: - opentracing.inject_response_headers(request.responseHeaders) - - if origin and self.RATELIMIT: - with ratelimiter.ratelimit(origin) as d: - await d - if request._disconnected: - logger.warning( - "client disconnected before we started processing " - "request" - ) - return -1, None - response = await func( - origin, content, request.args, *args, **kwargs - ) - else: - response = await func( - origin, content, request.args, *args, **kwargs - ) - - return response - - return new_func - - def register(self, server): - pattern = re.compile("^" + self.PREFIX + self.PATH + "$") - - for method in ("GET", "PUT", "POST"): - code = getattr(self, "on_%s" % (method), None) - if code is None: - continue - - server.register_paths( - method, - (pattern,), - self._wrap(code), - self.__class__.__name__, - ) - - -class BaseFederationServerServlet(BaseFederationServlet): - """Abstract base class for federation servlet classes which provides a federation server handler. - - See BaseFederationServlet for more information. - """ - - def __init__( - self, - hs: HomeServer, - authenticator: Authenticator, - ratelimiter: FederationRateLimiter, - server_name: str, - ): - super().__init__(hs, authenticator, ratelimiter, server_name) - self.handler = hs.get_federation_server() - - -class FederationSendServlet(BaseFederationServerServlet): - PATH = "/send/(?P[^/]*)/?" - - # We ratelimit manually in the handler as we queue up the requests and we - # don't want to fill up the ratelimiter with blocked requests. - RATELIMIT = False - - # This is when someone is trying to send us a bunch of data. - async def on_PUT( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - transaction_id: str, - ) -> Tuple[int, JsonDict]: - """Called on PUT /send// - - Args: - transaction_id: The transaction_id associated with this request. This - is *not* None. - - Returns: - Tuple of `(code, response)`, where - `response` is a python dict to be converted into JSON that is - used as the response body. - """ - # Parse the request - try: - transaction_data = content - - logger.debug("Decoded %s: %s", transaction_id, str(transaction_data)) - - logger.info( - "Received txn %s from %s. (PDUs: %d, EDUs: %d)", - transaction_id, - origin, - len(transaction_data.get("pdus", [])), - len(transaction_data.get("edus", [])), - ) - - # We should ideally be getting this from the security layer. - # origin = body["origin"] - - # Add some extra data to the transaction dict that isn't included - # in the request body. - transaction_data.update( - transaction_id=transaction_id, destination=self.server_name - ) - - except Exception as e: - logger.exception(e) - return 400, {"error": "Invalid transaction"} - - code, response = await self.handler.on_incoming_transaction( - origin, transaction_data - ) - - return code, response - - -class FederationEventServlet(BaseFederationServerServlet): - PATH = "/event/(?P[^/]*)/?" - - # This is when someone asks for a data item for a given server data_id pair. - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - event_id: str, - ) -> Tuple[int, Union[JsonDict, str]]: - return await self.handler.on_pdu_request(origin, event_id) - - -class FederationStateV1Servlet(BaseFederationServerServlet): - PATH = "/state/(?P[^/]*)/?" - - # This is when someone asks for all data for a given room. - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - room_id: str, - ) -> Tuple[int, JsonDict]: - return await self.handler.on_room_state_request( - origin, - room_id, - parse_string_from_args(query, "event_id", None, required=False), - ) - - -class FederationStateIdsServlet(BaseFederationServerServlet): - PATH = "/state_ids/(?P[^/]*)/?" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - room_id: str, - ) -> Tuple[int, JsonDict]: - return await self.handler.on_state_ids_request( - origin, - room_id, - parse_string_from_args(query, "event_id", None, required=True), - ) - - -class FederationBackfillServlet(BaseFederationServerServlet): - PATH = "/backfill/(?P[^/]*)/?" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - room_id: str, - ) -> Tuple[int, JsonDict]: - versions = [x.decode("ascii") for x in query[b"v"]] - limit = parse_integer_from_args(query, "limit", None) - - if not limit: - return 400, {"error": "Did not include limit param"} - - return await self.handler.on_backfill_request(origin, room_id, versions, limit) - - -class FederationQueryServlet(BaseFederationServerServlet): - PATH = "/query/(?P[^/]*)" - - # This is when we receive a server-server Query - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - query_type: str, - ) -> Tuple[int, JsonDict]: - args = {k.decode("utf8"): v[0].decode("utf-8") for k, v in query.items()} - args["origin"] = origin - return await self.handler.on_query_request(query_type, args) - - -class FederationMakeJoinServlet(BaseFederationServerServlet): - PATH = "/make_join/(?P[^/]*)/(?P[^/]*)" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - room_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - """ - Args: - origin: The authenticated server_name of the calling server - - content: (GETs don't have bodies) - - query: Query params from the request. - - **kwargs: the dict mapping keys to path components as specified in - the path match regexp. - - Returns: - Tuple of (response code, response object) - """ - supported_versions = parse_strings_from_args(query, "ver", encoding="utf-8") - if supported_versions is None: - supported_versions = ["1"] - - result = await self.handler.on_make_join_request( - origin, room_id, user_id, supported_versions=supported_versions - ) - return 200, result - - -class FederationMakeLeaveServlet(BaseFederationServerServlet): - PATH = "/make_leave/(?P[^/]*)/(?P[^/]*)" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - room_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - result = await self.handler.on_make_leave_request(origin, room_id, user_id) - return 200, result - - -class FederationV1SendLeaveServlet(BaseFederationServerServlet): - PATH = "/send_leave/(?P[^/]*)/(?P[^/]*)" - - async def on_PUT( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - room_id: str, - event_id: str, - ) -> Tuple[int, Tuple[int, JsonDict]]: - result = await self.handler.on_send_leave_request(origin, content, room_id) - return 200, (200, result) - - -class FederationV2SendLeaveServlet(BaseFederationServerServlet): - PATH = "/send_leave/(?P[^/]*)/(?P[^/]*)" - - PREFIX = FEDERATION_V2_PREFIX - - async def on_PUT( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - room_id: str, - event_id: str, - ) -> Tuple[int, JsonDict]: - result = await self.handler.on_send_leave_request(origin, content, room_id) - return 200, result - - -class FederationMakeKnockServlet(BaseFederationServerServlet): - PATH = "/make_knock/(?P[^/]*)/(?P[^/]*)" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - room_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - # Retrieve the room versions the remote homeserver claims to support - supported_versions = parse_strings_from_args( - query, "ver", required=True, encoding="utf-8" - ) - - result = await self.handler.on_make_knock_request( - origin, room_id, user_id, supported_versions=supported_versions - ) - return 200, result - - -class FederationV1SendKnockServlet(BaseFederationServerServlet): - PATH = "/send_knock/(?P[^/]*)/(?P[^/]*)" - - async def on_PUT( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - room_id: str, - event_id: str, - ) -> Tuple[int, JsonDict]: - result = await self.handler.on_send_knock_request(origin, content, room_id) - return 200, result - - -class FederationEventAuthServlet(BaseFederationServerServlet): - PATH = "/event_auth/(?P[^/]*)/(?P[^/]*)" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - room_id: str, - event_id: str, - ) -> Tuple[int, JsonDict]: - return await self.handler.on_event_auth(origin, room_id, event_id) - - -class FederationV1SendJoinServlet(BaseFederationServerServlet): - PATH = "/send_join/(?P[^/]*)/(?P[^/]*)" - - async def on_PUT( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - room_id: str, - event_id: str, - ) -> Tuple[int, Tuple[int, JsonDict]]: - # TODO(paul): assert that event_id parsed from path actually - # match those given in content - result = await self.handler.on_send_join_request(origin, content, room_id) - return 200, (200, result) - - -class FederationV2SendJoinServlet(BaseFederationServerServlet): - PATH = "/send_join/(?P[^/]*)/(?P[^/]*)" - - PREFIX = FEDERATION_V2_PREFIX - - async def on_PUT( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - room_id: str, - event_id: str, - ) -> Tuple[int, JsonDict]: - # TODO(paul): assert that event_id parsed from path actually - # match those given in content - result = await self.handler.on_send_join_request(origin, content, room_id) - return 200, result - - -class FederationV1InviteServlet(BaseFederationServerServlet): - PATH = "/invite/(?P[^/]*)/(?P[^/]*)" - - async def on_PUT( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - room_id: str, - event_id: str, - ) -> Tuple[int, Tuple[int, JsonDict]]: - # We don't get a room version, so we have to assume its EITHER v1 or - # v2. This is "fine" as the only difference between V1 and V2 is the - # state resolution algorithm, and we don't use that for processing - # invites - result = await self.handler.on_invite_request( - origin, content, room_version_id=RoomVersions.V1.identifier - ) - - # V1 federation API is defined to return a content of `[200, {...}]` - # due to a historical bug. - return 200, (200, result) - - -class FederationV2InviteServlet(BaseFederationServerServlet): - PATH = "/invite/(?P[^/]*)/(?P[^/]*)" - - PREFIX = FEDERATION_V2_PREFIX - - async def on_PUT( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - room_id: str, - event_id: str, - ) -> Tuple[int, JsonDict]: - # TODO(paul): assert that room_id/event_id parsed from path actually - # match those given in content - - room_version = content["room_version"] - event = content["event"] - invite_room_state = content["invite_room_state"] - - # Synapse expects invite_room_state to be in unsigned, as it is in v1 - # API - - event.setdefault("unsigned", {})["invite_room_state"] = invite_room_state - - result = await self.handler.on_invite_request( - origin, event, room_version_id=room_version - ) - return 200, result - - -class FederationThirdPartyInviteExchangeServlet(BaseFederationServerServlet): - PATH = "/exchange_third_party_invite/(?P[^/]*)" - - async def on_PUT( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - room_id: str, - ) -> Tuple[int, JsonDict]: - await self.handler.on_exchange_third_party_invite_request(content) - return 200, {} - - -class FederationClientKeysQueryServlet(BaseFederationServerServlet): - PATH = "/user/keys/query" - - async def on_POST( - self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]] - ) -> Tuple[int, JsonDict]: - return await self.handler.on_query_client_keys(origin, content) - - -class FederationUserDevicesQueryServlet(BaseFederationServerServlet): - PATH = "/user/devices/(?P[^/]*)" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - user_id: str, - ) -> Tuple[int, JsonDict]: - return await self.handler.on_query_user_devices(origin, user_id) - - -class FederationClientKeysClaimServlet(BaseFederationServerServlet): - PATH = "/user/keys/claim" - - async def on_POST( - self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]] - ) -> Tuple[int, JsonDict]: - response = await self.handler.on_claim_client_keys(origin, content) - return 200, response - - -class FederationGetMissingEventsServlet(BaseFederationServerServlet): - # TODO(paul): Why does this path alone end with "/?" optional? - PATH = "/get_missing_events/(?P[^/]*)/?" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - room_id: str, - ) -> Tuple[int, JsonDict]: - limit = int(content.get("limit", 10)) - earliest_events = content.get("earliest_events", []) - latest_events = content.get("latest_events", []) - - result = await self.handler.on_get_missing_events( - origin, - room_id=room_id, - earliest_events=earliest_events, - latest_events=latest_events, - limit=limit, - ) - - return 200, result - - -class On3pidBindServlet(BaseFederationServerServlet): - PATH = "/3pid/onbind" - - REQUIRE_AUTH = False - - async def on_POST( - self, origin: Optional[str], content: JsonDict, query: Dict[bytes, List[bytes]] - ) -> Tuple[int, JsonDict]: - if "invites" in content: - last_exception = None - for invite in content["invites"]: - try: - if "signed" not in invite or "token" not in invite["signed"]: - message = ( - "Rejecting received notification of third-" - "party invite without signed: %s" % (invite,) - ) - logger.info(message) - raise SynapseError(400, message) - await self.handler.exchange_third_party_invite( - invite["sender"], - invite["mxid"], - invite["room_id"], - invite["signed"], - ) - except Exception as e: - last_exception = e - if last_exception: - raise last_exception - return 200, {} - - -class OpenIdUserInfo(BaseFederationServerServlet): - """ - Exchange a bearer token for information about a user. - - The response format should be compatible with: - http://openid.net/specs/openid-connect-core-1_0.html#UserInfoResponse - - GET /openid/userinfo?access_token=ABDEFGH HTTP/1.1 - - HTTP/1.1 200 OK - Content-Type: application/json - - { - "sub": "@userpart:example.org", - } - """ - - PATH = "/openid/userinfo" - - REQUIRE_AUTH = False - - async def on_GET( - self, - origin: Optional[str], - content: Literal[None], - query: Dict[bytes, List[bytes]], - ) -> Tuple[int, JsonDict]: - token = parse_string_from_args(query, "access_token") - if token is None: - return ( - 401, - {"errcode": "M_MISSING_TOKEN", "error": "Access Token required"}, - ) - - user_id = await self.handler.on_openid_userinfo(token) - - if user_id is None: - return ( - 401, - { - "errcode": "M_UNKNOWN_TOKEN", - "error": "Access Token unknown or expired", - }, - ) - - return 200, {"sub": user_id} - - -class PublicRoomList(BaseFederationServlet): - """ - Fetch the public room list for this server. - - This API returns information in the same format as /publicRooms on the - client API, but will only ever include local public rooms and hence is - intended for consumption by other homeservers. - - GET /publicRooms HTTP/1.1 - - HTTP/1.1 200 OK - Content-Type: application/json - - { - "chunk": [ - { - "aliases": [ - "#test:localhost" - ], - "guest_can_join": false, - "name": "test room", - "num_joined_members": 3, - "room_id": "!whkydVegtvatLfXmPN:localhost", - "world_readable": false - } - ], - "end": "END", - "start": "START" - } - """ - - PATH = "/publicRooms" - - def __init__( - self, - hs: HomeServer, - authenticator: Authenticator, - ratelimiter: FederationRateLimiter, - server_name: str, - allow_access: bool, - ): - super().__init__(hs, authenticator, ratelimiter, server_name) - self.handler = hs.get_room_list_handler() - self.allow_access = allow_access - - async def on_GET( - self, origin: str, content: Literal[None], query: Dict[bytes, List[bytes]] - ) -> Tuple[int, JsonDict]: - if not self.allow_access: - raise FederationDeniedError(origin) - - limit = parse_integer_from_args(query, "limit", 0) - since_token = parse_string_from_args(query, "since", None) - include_all_networks = parse_boolean_from_args( - query, "include_all_networks", default=False - ) - third_party_instance_id = parse_string_from_args( - query, "third_party_instance_id", None - ) - - if include_all_networks: - network_tuple = None - elif third_party_instance_id: - network_tuple = ThirdPartyInstanceID.from_string(third_party_instance_id) - else: - network_tuple = ThirdPartyInstanceID(None, None) - - if limit == 0: - # zero is a special value which corresponds to no limit. - limit = None - - data = await self.handler.get_local_public_room_list( - limit, since_token, network_tuple=network_tuple, from_federation=True - ) - return 200, data - - async def on_POST( - self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]] - ) -> Tuple[int, JsonDict]: - # This implements MSC2197 (Search Filtering over Federation) - if not self.allow_access: - raise FederationDeniedError(origin) - - limit: Optional[int] = int(content.get("limit", 100)) - since_token = content.get("since", None) - search_filter = content.get("filter", None) - - include_all_networks = content.get("include_all_networks", False) - third_party_instance_id = content.get("third_party_instance_id", None) - - if include_all_networks: - network_tuple = None - if third_party_instance_id is not None: - raise SynapseError( - 400, "Can't use include_all_networks with an explicit network" - ) - elif third_party_instance_id is None: - network_tuple = ThirdPartyInstanceID(None, None) - else: - network_tuple = ThirdPartyInstanceID.from_string(third_party_instance_id) - - if search_filter is None: - logger.warning("Nonefilter") - - if limit == 0: - # zero is a special value which corresponds to no limit. - limit = None - - data = await self.handler.get_local_public_room_list( - limit=limit, - since_token=since_token, - search_filter=search_filter, - network_tuple=network_tuple, - from_federation=True, - ) - - return 200, data - - -class FederationVersionServlet(BaseFederationServlet): - PATH = "/version" - - REQUIRE_AUTH = False - - async def on_GET( - self, - origin: Optional[str], - content: Literal[None], - query: Dict[bytes, List[bytes]], - ) -> Tuple[int, JsonDict]: - return ( - 200, - {"server": {"name": "Synapse", "version": get_version_string(synapse)}}, - ) - - -class BaseGroupsServerServlet(BaseFederationServlet): - """Abstract base class for federation servlet classes which provides a groups server handler. - - See BaseFederationServlet for more information. - """ - - def __init__( - self, - hs: HomeServer, - authenticator: Authenticator, - ratelimiter: FederationRateLimiter, - server_name: str, - ): - super().__init__(hs, authenticator, ratelimiter, server_name) - self.handler = hs.get_groups_server_handler() - - -class FederationGroupsProfileServlet(BaseGroupsServerServlet): - """Get/set the basic profile of a group on behalf of a user""" - - PATH = "/groups/(?P[^/]*)/profile" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.get_group_profile(group_id, requester_user_id) - - return 200, new_content - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.update_group_profile( - group_id, requester_user_id, content - ) - - return 200, new_content - - -class FederationGroupsSummaryServlet(BaseGroupsServerServlet): - PATH = "/groups/(?P[^/]*)/summary" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.get_group_summary(group_id, requester_user_id) - - return 200, new_content - - -class FederationGroupsRoomsServlet(BaseGroupsServerServlet): - """Get the rooms in a group on behalf of a user""" - - PATH = "/groups/(?P[^/]*)/rooms" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.get_rooms_in_group(group_id, requester_user_id) - - return 200, new_content - - -class FederationGroupsAddRoomsServlet(BaseGroupsServerServlet): - """Add/remove room from group""" - - PATH = "/groups/(?P[^/]*)/room/(?P[^/]*)" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - room_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.add_room_to_group( - group_id, requester_user_id, room_id, content - ) - - return 200, new_content - - async def on_DELETE( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - room_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.remove_room_from_group( - group_id, requester_user_id, room_id - ) - - return 200, new_content - - -class FederationGroupsAddRoomsConfigServlet(BaseGroupsServerServlet): - """Update room config in group""" - - PATH = ( - "/groups/(?P[^/]*)/room/(?P[^/]*)" - "/config/(?P[^/]*)" - ) - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - room_id: str, - config_key: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - result = await self.handler.update_room_in_group( - group_id, requester_user_id, room_id, config_key, content - ) - - return 200, result - - -class FederationGroupsUsersServlet(BaseGroupsServerServlet): - """Get the users in a group on behalf of a user""" - - PATH = "/groups/(?P[^/]*)/users" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.get_users_in_group(group_id, requester_user_id) - - return 200, new_content - - -class FederationGroupsInvitedUsersServlet(BaseGroupsServerServlet): - """Get the users that have been invited to a group""" - - PATH = "/groups/(?P[^/]*)/invited_users" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.get_invited_users_in_group( - group_id, requester_user_id - ) - - return 200, new_content - - -class FederationGroupsInviteServlet(BaseGroupsServerServlet): - """Ask a group server to invite someone to the group""" - - PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/invite" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.invite_to_group( - group_id, user_id, requester_user_id, content - ) - - return 200, new_content - - -class FederationGroupsAcceptInviteServlet(BaseGroupsServerServlet): - """Accept an invitation from the group server""" - - PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/accept_invite" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - if get_domain_from_id(user_id) != origin: - raise SynapseError(403, "user_id doesn't match origin") - - new_content = await self.handler.accept_invite(group_id, user_id, content) - - return 200, new_content - - -class FederationGroupsJoinServlet(BaseGroupsServerServlet): - """Attempt to join a group""" - - PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/join" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - if get_domain_from_id(user_id) != origin: - raise SynapseError(403, "user_id doesn't match origin") - - new_content = await self.handler.join_group(group_id, user_id, content) - - return 200, new_content - - -class FederationGroupsRemoveUserServlet(BaseGroupsServerServlet): - """Leave or kick a user from the group""" - - PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/remove" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.remove_user_from_group( - group_id, user_id, requester_user_id, content - ) - - return 200, new_content - - -class BaseGroupsLocalServlet(BaseFederationServlet): - """Abstract base class for federation servlet classes which provides a groups local handler. - - See BaseFederationServlet for more information. - """ - - def __init__( - self, - hs: HomeServer, - authenticator: Authenticator, - ratelimiter: FederationRateLimiter, - server_name: str, - ): - super().__init__(hs, authenticator, ratelimiter, server_name) - self.handler = hs.get_groups_local_handler() - - -class FederationGroupsLocalInviteServlet(BaseGroupsLocalServlet): - """A group server has invited a local user""" - - PATH = "/groups/local/(?P[^/]*)/users/(?P[^/]*)/invite" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - if get_domain_from_id(group_id) != origin: - raise SynapseError(403, "group_id doesn't match origin") - - assert isinstance( - self.handler, GroupsLocalHandler - ), "Workers cannot handle group invites." - - new_content = await self.handler.on_invite(group_id, user_id, content) - - return 200, new_content - - -class FederationGroupsRemoveLocalUserServlet(BaseGroupsLocalServlet): - """A group server has removed a local user""" - - PATH = "/groups/local/(?P[^/]*)/users/(?P[^/]*)/remove" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, None]: - if get_domain_from_id(group_id) != origin: - raise SynapseError(403, "user_id doesn't match origin") - - assert isinstance( - self.handler, GroupsLocalHandler - ), "Workers cannot handle group removals." - - await self.handler.user_removed_from_group(group_id, user_id, content) - - return 200, None - - -class FederationGroupsRenewAttestaionServlet(BaseFederationServlet): - """A group or user's server renews their attestation""" - - PATH = "/groups/(?P[^/]*)/renew_attestation/(?P[^/]*)" - - def __init__( - self, - hs: HomeServer, - authenticator: Authenticator, - ratelimiter: FederationRateLimiter, - server_name: str, - ): - super().__init__(hs, authenticator, ratelimiter, server_name) - self.handler = hs.get_groups_attestation_renewer() - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - # We don't need to check auth here as we check the attestation signatures - - new_content = await self.handler.on_renew_attestation( - group_id, user_id, content - ) - - return 200, new_content - - -class FederationGroupsSummaryRoomsServlet(BaseGroupsServerServlet): - """Add/remove a room from the group summary, with optional category. - - Matches both: - - /groups/:group/summary/rooms/:room_id - - /groups/:group/summary/categories/:category/rooms/:room_id - """ - - PATH = ( - "/groups/(?P[^/]*)/summary" - "(/categories/(?P[^/]+))?" - "/rooms/(?P[^/]*)" - ) - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - category_id: str, - room_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if category_id == "": - raise SynapseError( - 400, "category_id cannot be empty string", Codes.INVALID_PARAM - ) - - if len(category_id) > MAX_GROUP_CATEGORYID_LENGTH: - raise SynapseError( - 400, - "category_id may not be longer than %s characters" - % (MAX_GROUP_CATEGORYID_LENGTH,), - Codes.INVALID_PARAM, - ) - - resp = await self.handler.update_group_summary_room( - group_id, - requester_user_id, - room_id=room_id, - category_id=category_id, - content=content, - ) - - return 200, resp - - async def on_DELETE( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - category_id: str, - room_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if category_id == "": - raise SynapseError(400, "category_id cannot be empty string") - - resp = await self.handler.delete_group_summary_room( - group_id, requester_user_id, room_id=room_id, category_id=category_id - ) - - return 200, resp - - -class FederationGroupsCategoriesServlet(BaseGroupsServerServlet): - """Get all categories for a group""" - - PATH = "/groups/(?P[^/]*)/categories/?" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - resp = await self.handler.get_group_categories(group_id, requester_user_id) - - return 200, resp - - -class FederationGroupsCategoryServlet(BaseGroupsServerServlet): - """Add/remove/get a category in a group""" - - PATH = "/groups/(?P[^/]*)/categories/(?P[^/]+)" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - category_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - resp = await self.handler.get_group_category( - group_id, requester_user_id, category_id - ) - - return 200, resp - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - category_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if category_id == "": - raise SynapseError(400, "category_id cannot be empty string") - - if len(category_id) > MAX_GROUP_CATEGORYID_LENGTH: - raise SynapseError( - 400, - "category_id may not be longer than %s characters" - % (MAX_GROUP_CATEGORYID_LENGTH,), - Codes.INVALID_PARAM, - ) - - resp = await self.handler.upsert_group_category( - group_id, requester_user_id, category_id, content - ) - - return 200, resp - - async def on_DELETE( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - category_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if category_id == "": - raise SynapseError(400, "category_id cannot be empty string") - - resp = await self.handler.delete_group_category( - group_id, requester_user_id, category_id - ) - - return 200, resp - - -class FederationGroupsRolesServlet(BaseGroupsServerServlet): - """Get roles in a group""" - - PATH = "/groups/(?P[^/]*)/roles/?" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - resp = await self.handler.get_group_roles(group_id, requester_user_id) - - return 200, resp - - -class FederationGroupsRoleServlet(BaseGroupsServerServlet): - """Add/remove/get a role in a group""" - - PATH = "/groups/(?P[^/]*)/roles/(?P[^/]+)" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - role_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - resp = await self.handler.get_group_role(group_id, requester_user_id, role_id) - - return 200, resp - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - role_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if role_id == "": - raise SynapseError( - 400, "role_id cannot be empty string", Codes.INVALID_PARAM - ) - - if len(role_id) > MAX_GROUP_ROLEID_LENGTH: - raise SynapseError( - 400, - "role_id may not be longer than %s characters" - % (MAX_GROUP_ROLEID_LENGTH,), - Codes.INVALID_PARAM, - ) - - resp = await self.handler.update_group_role( - group_id, requester_user_id, role_id, content - ) - - return 200, resp - - async def on_DELETE( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - role_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if role_id == "": - raise SynapseError(400, "role_id cannot be empty string") - - resp = await self.handler.delete_group_role( - group_id, requester_user_id, role_id - ) - - return 200, resp - - -class FederationGroupsSummaryUsersServlet(BaseGroupsServerServlet): - """Add/remove a user from the group summary, with optional role. - - Matches both: - - /groups/:group/summary/users/:user_id - - /groups/:group/summary/roles/:role/users/:user_id - """ - - PATH = ( - "/groups/(?P[^/]*)/summary" - "(/roles/(?P[^/]+))?" - "/users/(?P[^/]*)" - ) - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - role_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if role_id == "": - raise SynapseError(400, "role_id cannot be empty string") - - if len(role_id) > MAX_GROUP_ROLEID_LENGTH: - raise SynapseError( - 400, - "role_id may not be longer than %s characters" - % (MAX_GROUP_ROLEID_LENGTH,), - Codes.INVALID_PARAM, - ) - - resp = await self.handler.update_group_summary_user( - group_id, - requester_user_id, - user_id=user_id, - role_id=role_id, - content=content, - ) - - return 200, resp - - async def on_DELETE( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - role_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if role_id == "": - raise SynapseError(400, "role_id cannot be empty string") - - resp = await self.handler.delete_group_summary_user( - group_id, requester_user_id, user_id=user_id, role_id=role_id - ) - - return 200, resp - - -class FederationGroupsBulkPublicisedServlet(BaseGroupsLocalServlet): - """Get roles in a group""" - - PATH = "/get_groups_publicised" - - async def on_POST( - self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]] - ) -> Tuple[int, JsonDict]: - resp = await self.handler.bulk_get_publicised_groups( - content["user_ids"], proxy=False - ) - - return 200, resp - - -class FederationGroupsSettingJoinPolicyServlet(BaseGroupsServerServlet): - """Sets whether a group is joinable without an invite or knock""" - - PATH = "/groups/(?P[^/]*)/settings/m.join_policy" - - async def on_PUT( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.set_group_join_policy( - group_id, requester_user_id, content - ) - - return 200, new_content - - -class FederationSpaceSummaryServlet(BaseFederationServlet): - PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc2946" - PATH = "/spaces/(?P[^/]*)" - - def __init__( - self, - hs: HomeServer, - authenticator: Authenticator, - ratelimiter: FederationRateLimiter, - server_name: str, - ): - super().__init__(hs, authenticator, ratelimiter, server_name) - self.handler = hs.get_space_summary_handler() - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Mapping[bytes, Sequence[bytes]], - room_id: str, - ) -> Tuple[int, JsonDict]: - suggested_only = parse_boolean_from_args(query, "suggested_only", default=False) - max_rooms_per_space = parse_integer_from_args(query, "max_rooms_per_space") - - exclude_rooms = parse_strings_from_args(query, "exclude_rooms", default=[]) - - return 200, await self.handler.federation_space_summary( - origin, room_id, suggested_only, max_rooms_per_space, exclude_rooms - ) - - # TODO When switching to the stable endpoint, remove the POST handler. - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Mapping[bytes, Sequence[bytes]], - room_id: str, - ) -> Tuple[int, JsonDict]: - suggested_only = content.get("suggested_only", False) - if not isinstance(suggested_only, bool): - raise SynapseError( - 400, "'suggested_only' must be a boolean", Codes.BAD_JSON - ) - - exclude_rooms = content.get("exclude_rooms", []) - if not isinstance(exclude_rooms, list) or any( - not isinstance(x, str) for x in exclude_rooms - ): - raise SynapseError(400, "bad value for 'exclude_rooms'", Codes.BAD_JSON) - - max_rooms_per_space = content.get("max_rooms_per_space") - if max_rooms_per_space is not None and not isinstance(max_rooms_per_space, int): - raise SynapseError( - 400, "bad value for 'max_rooms_per_space'", Codes.BAD_JSON - ) - - return 200, await self.handler.federation_space_summary( - origin, room_id, suggested_only, max_rooms_per_space, exclude_rooms - ) - - -class RoomComplexityServlet(BaseFederationServlet): - """ - Indicates to other servers how complex (and therefore likely - resource-intensive) a public room this server knows about is. - """ - - PATH = "/rooms/(?P[^/]*)/complexity" - PREFIX = FEDERATION_UNSTABLE_PREFIX - - def __init__( - self, - hs: HomeServer, - authenticator: Authenticator, - ratelimiter: FederationRateLimiter, - server_name: str, - ): - super().__init__(hs, authenticator, ratelimiter, server_name) - self._store = self.hs.get_datastore() - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - room_id: str, - ) -> Tuple[int, JsonDict]: - is_public = await self._store.is_room_world_readable_or_publicly_joinable( - room_id - ) - - if not is_public: - raise SynapseError(404, "Room not found", errcode=Codes.INVALID_PARAM) - - complexity = await self._store.get_room_complexity(room_id) - return 200, complexity - - -FEDERATION_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( - FederationSendServlet, - FederationEventServlet, - FederationStateV1Servlet, - FederationStateIdsServlet, - FederationBackfillServlet, - FederationQueryServlet, - FederationMakeJoinServlet, - FederationMakeLeaveServlet, - FederationEventServlet, - FederationV1SendJoinServlet, - FederationV2SendJoinServlet, - FederationV1SendLeaveServlet, - FederationV2SendLeaveServlet, - FederationV1InviteServlet, - FederationV2InviteServlet, - FederationGetMissingEventsServlet, - FederationEventAuthServlet, - FederationClientKeysQueryServlet, - FederationUserDevicesQueryServlet, - FederationClientKeysClaimServlet, - FederationThirdPartyInviteExchangeServlet, - On3pidBindServlet, - FederationVersionServlet, - RoomComplexityServlet, - FederationSpaceSummaryServlet, - FederationV1SendKnockServlet, - FederationMakeKnockServlet, -) - -OPENID_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = (OpenIdUserInfo,) - -ROOM_LIST_CLASSES: Tuple[Type[PublicRoomList], ...] = (PublicRoomList,) - -GROUP_SERVER_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( - FederationGroupsProfileServlet, - FederationGroupsSummaryServlet, - FederationGroupsRoomsServlet, - FederationGroupsUsersServlet, - FederationGroupsInvitedUsersServlet, - FederationGroupsInviteServlet, - FederationGroupsAcceptInviteServlet, - FederationGroupsJoinServlet, - FederationGroupsRemoveUserServlet, - FederationGroupsSummaryRoomsServlet, - FederationGroupsCategoriesServlet, - FederationGroupsCategoryServlet, - FederationGroupsRolesServlet, - FederationGroupsRoleServlet, - FederationGroupsSummaryUsersServlet, - FederationGroupsAddRoomsServlet, - FederationGroupsAddRoomsConfigServlet, - FederationGroupsSettingJoinPolicyServlet, -) - - -GROUP_LOCAL_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( - FederationGroupsLocalInviteServlet, - FederationGroupsRemoveLocalUserServlet, - FederationGroupsBulkPublicisedServlet, -) - - -GROUP_ATTESTATION_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( - FederationGroupsRenewAttestaionServlet, -) - - -DEFAULT_SERVLET_GROUPS = ( - "federation", - "room_list", - "group_server", - "group_local", - "group_attestation", - "openid", -) - - -def register_servlets( - hs: HomeServer, - resource: HttpServer, - authenticator: Authenticator, - ratelimiter: FederationRateLimiter, - servlet_groups: Optional[Container[str]] = None, -): - """Initialize and register servlet classes. - - Will by default register all servlets. For custom behaviour, pass in - a list of servlet_groups to register. - - Args: - hs: homeserver - resource: resource class to register to - authenticator: authenticator to use - ratelimiter: ratelimiter to use - servlet_groups: List of servlet groups to register. - Defaults to ``DEFAULT_SERVLET_GROUPS``. - """ - if not servlet_groups: - servlet_groups = DEFAULT_SERVLET_GROUPS - - if "federation" in servlet_groups: - for servletclass in FEDERATION_SERVLET_CLASSES: - servletclass( - hs=hs, - authenticator=authenticator, - ratelimiter=ratelimiter, - server_name=hs.hostname, - ).register(resource) - - if "openid" in servlet_groups: - for servletclass in OPENID_SERVLET_CLASSES: - servletclass( - hs=hs, - authenticator=authenticator, - ratelimiter=ratelimiter, - server_name=hs.hostname, - ).register(resource) - - if "room_list" in servlet_groups: - for servletclass in ROOM_LIST_CLASSES: - servletclass( - hs=hs, - authenticator=authenticator, - ratelimiter=ratelimiter, - server_name=hs.hostname, - allow_access=hs.config.allow_public_rooms_over_federation, - ).register(resource) - - if "group_server" in servlet_groups: - for servletclass in GROUP_SERVER_SERVLET_CLASSES: - servletclass( - hs=hs, - authenticator=authenticator, - ratelimiter=ratelimiter, - server_name=hs.hostname, - ).register(resource) - - if "group_local" in servlet_groups: - for servletclass in GROUP_LOCAL_SERVLET_CLASSES: - servletclass( - hs=hs, - authenticator=authenticator, - ratelimiter=ratelimiter, - server_name=hs.hostname, - ).register(resource) - - if "group_attestation" in servlet_groups: - for servletclass in GROUP_ATTESTATION_SERVLET_CLASSES: - servletclass( - hs=hs, - authenticator=authenticator, - ratelimiter=ratelimiter, - server_name=hs.hostname, - ).register(resource) diff --git a/synapse/federation/transport/server/__init__.py b/synapse/federation/transport/server/__init__.py new file mode 100644 index 000000000000..77b936361a4a --- /dev/null +++ b/synapse/federation/transport/server/__init__.py @@ -0,0 +1,342 @@ +# Copyright 2014-2021 The Matrix.org Foundation C.I.C. +# Copyright 2020 Sorunome +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from typing import Dict, Iterable, List, Optional, Tuple, Type + +from typing_extensions import Literal + +from synapse.api.errors import FederationDeniedError, SynapseError +from synapse.federation.transport.server._base import ( + Authenticator, + BaseFederationServlet, +) +from synapse.federation.transport.server.federation import ( + FEDERATION_SERVLET_CLASSES, + FederationTimestampLookupServlet, +) +from synapse.federation.transport.server.groups_local import GROUP_LOCAL_SERVLET_CLASSES +from synapse.federation.transport.server.groups_server import ( + GROUP_SERVER_SERVLET_CLASSES, +) +from synapse.http.server import HttpServer, JsonResource +from synapse.http.servlet import ( + parse_boolean_from_args, + parse_integer_from_args, + parse_string_from_args, +) +from synapse.server import HomeServer +from synapse.types import JsonDict, ThirdPartyInstanceID +from synapse.util.ratelimitutils import FederationRateLimiter + +logger = logging.getLogger(__name__) + + +class TransportLayerServer(JsonResource): + """Handles incoming federation HTTP requests""" + + def __init__(self, hs: HomeServer, servlet_groups: Optional[List[str]] = None): + """Initialize the TransportLayerServer + + Will by default register all servlets. For custom behaviour, pass in + a list of servlet_groups to register. + + Args: + hs: homeserver + servlet_groups: List of servlet groups to register. + Defaults to ``DEFAULT_SERVLET_GROUPS``. + """ + self.hs = hs + self.clock = hs.get_clock() + self.servlet_groups = servlet_groups + + super().__init__(hs, canonical_json=False) + + self.authenticator = Authenticator(hs) + self.ratelimiter = hs.get_federation_ratelimiter() + + self.register_servlets() + + def register_servlets(self) -> None: + register_servlets( + self.hs, + resource=self, + ratelimiter=self.ratelimiter, + authenticator=self.authenticator, + servlet_groups=self.servlet_groups, + ) + + +class PublicRoomList(BaseFederationServlet): + """ + Fetch the public room list for this server. + + This API returns information in the same format as /publicRooms on the + client API, but will only ever include local public rooms and hence is + intended for consumption by other homeservers. + + GET /publicRooms HTTP/1.1 + + HTTP/1.1 200 OK + Content-Type: application/json + + { + "chunk": [ + { + "aliases": [ + "#test:localhost" + ], + "guest_can_join": false, + "name": "test room", + "num_joined_members": 3, + "room_id": "!whkydVegtvatLfXmPN:localhost", + "world_readable": false + } + ], + "end": "END", + "start": "START" + } + """ + + PATH = "/publicRooms" + + def __init__( + self, + hs: HomeServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + server_name: str, + ): + super().__init__(hs, authenticator, ratelimiter, server_name) + self.handler = hs.get_room_list_handler() + self.allow_access = hs.config.server.allow_public_rooms_over_federation + + async def on_GET( + self, origin: str, content: Literal[None], query: Dict[bytes, List[bytes]] + ) -> Tuple[int, JsonDict]: + if not self.allow_access: + raise FederationDeniedError(origin) + + limit = parse_integer_from_args(query, "limit", 0) + since_token = parse_string_from_args(query, "since", None) + include_all_networks = parse_boolean_from_args( + query, "include_all_networks", default=False + ) + third_party_instance_id = parse_string_from_args( + query, "third_party_instance_id", None + ) + + if include_all_networks: + network_tuple = None + elif third_party_instance_id: + network_tuple = ThirdPartyInstanceID.from_string(third_party_instance_id) + else: + network_tuple = ThirdPartyInstanceID(None, None) + + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + + data = await self.handler.get_local_public_room_list( + limit, since_token, network_tuple=network_tuple, from_federation=True + ) + return 200, data + + async def on_POST( + self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]] + ) -> Tuple[int, JsonDict]: + # This implements MSC2197 (Search Filtering over Federation) + if not self.allow_access: + raise FederationDeniedError(origin) + + limit: Optional[int] = int(content.get("limit", 100)) + since_token = content.get("since", None) + search_filter = content.get("filter", None) + + include_all_networks = content.get("include_all_networks", False) + third_party_instance_id = content.get("third_party_instance_id", None) + + if include_all_networks: + network_tuple = None + if third_party_instance_id is not None: + raise SynapseError( + 400, "Can't use include_all_networks with an explicit network" + ) + elif third_party_instance_id is None: + network_tuple = ThirdPartyInstanceID(None, None) + else: + network_tuple = ThirdPartyInstanceID.from_string(third_party_instance_id) + + if search_filter is None: + logger.warning("Nonefilter") + + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + + data = await self.handler.get_local_public_room_list( + limit=limit, + since_token=since_token, + search_filter=search_filter, + network_tuple=network_tuple, + from_federation=True, + ) + + return 200, data + + +class FederationGroupsRenewAttestaionServlet(BaseFederationServlet): + """A group or user's server renews their attestation""" + + PATH = "/groups/(?P[^/]*)/renew_attestation/(?P[^/]*)" + + def __init__( + self, + hs: HomeServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + server_name: str, + ): + super().__init__(hs, authenticator, ratelimiter, server_name) + self.handler = hs.get_groups_attestation_renewer() + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + user_id: str, + ) -> Tuple[int, JsonDict]: + # We don't need to check auth here as we check the attestation signatures + + new_content = await self.handler.on_renew_attestation( + group_id, user_id, content + ) + + return 200, new_content + + +class OpenIdUserInfo(BaseFederationServlet): + """ + Exchange a bearer token for information about a user. + + The response format should be compatible with: + http://openid.net/specs/openid-connect-core-1_0.html#UserInfoResponse + + GET /openid/userinfo?access_token=ABDEFGH HTTP/1.1 + + HTTP/1.1 200 OK + Content-Type: application/json + + { + "sub": "@userpart:example.org", + } + """ + + PATH = "/openid/userinfo" + + REQUIRE_AUTH = False + + def __init__( + self, + hs: HomeServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + server_name: str, + ): + super().__init__(hs, authenticator, ratelimiter, server_name) + self.handler = hs.get_federation_server() + + async def on_GET( + self, + origin: Optional[str], + content: Literal[None], + query: Dict[bytes, List[bytes]], + ) -> Tuple[int, JsonDict]: + token = parse_string_from_args(query, "access_token") + if token is None: + return ( + 401, + {"errcode": "M_MISSING_TOKEN", "error": "Access Token required"}, + ) + + user_id = await self.handler.on_openid_userinfo(token) + + if user_id is None: + return ( + 401, + { + "errcode": "M_UNKNOWN_TOKEN", + "error": "Access Token unknown or expired", + }, + ) + + return 200, {"sub": user_id} + + +DEFAULT_SERVLET_GROUPS: Dict[str, Iterable[Type[BaseFederationServlet]]] = { + "federation": FEDERATION_SERVLET_CLASSES, + "room_list": (PublicRoomList,), + "group_server": GROUP_SERVER_SERVLET_CLASSES, + "group_local": GROUP_LOCAL_SERVLET_CLASSES, + "group_attestation": (FederationGroupsRenewAttestaionServlet,), + "openid": (OpenIdUserInfo,), +} + + +def register_servlets( + hs: HomeServer, + resource: HttpServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + servlet_groups: Optional[Iterable[str]] = None, +) -> None: + """Initialize and register servlet classes. + + Will by default register all servlets. For custom behaviour, pass in + a list of servlet_groups to register. + + Args: + hs: homeserver + resource: resource class to register to + authenticator: authenticator to use + ratelimiter: ratelimiter to use + servlet_groups: List of servlet groups to register. + Defaults to ``DEFAULT_SERVLET_GROUPS``. + """ + if not servlet_groups: + servlet_groups = DEFAULT_SERVLET_GROUPS.keys() + + for servlet_group in servlet_groups: + # Skip unknown servlet groups. + if servlet_group not in DEFAULT_SERVLET_GROUPS: + raise RuntimeError( + f"Attempting to register unknown federation servlet: '{servlet_group}'" + ) + + for servletclass in DEFAULT_SERVLET_GROUPS[servlet_group]: + # Only allow the `/timestamp_to_event` servlet if msc3030 is enabled + if ( + servletclass == FederationTimestampLookupServlet + and not hs.config.experimental.msc3030_enabled + ): + continue + + servletclass( + hs=hs, + authenticator=authenticator, + ratelimiter=ratelimiter, + server_name=hs.hostname, + ).register(resource) diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py new file mode 100644 index 000000000000..dc39e3537bf6 --- /dev/null +++ b/synapse/federation/transport/server/_base.py @@ -0,0 +1,340 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import logging +import re +from typing import Any, Awaitable, Callable, Optional, Tuple, cast + +from synapse.api.errors import Codes, FederationDeniedError, SynapseError +from synapse.api.urls import FEDERATION_V1_PREFIX +from synapse.http.server import HttpServer, ServletCallback +from synapse.http.servlet import parse_json_object_from_request +from synapse.http.site import SynapseRequest +from synapse.logging import opentracing +from synapse.logging.context import run_in_background +from synapse.logging.opentracing import ( + SynapseTags, + start_active_span, + start_active_span_from_request, + tags, + whitelisted_homeserver, +) +from synapse.server import HomeServer +from synapse.types import JsonDict +from synapse.util.ratelimitutils import FederationRateLimiter +from synapse.util.stringutils import parse_and_validate_server_name + +logger = logging.getLogger(__name__) + + +class AuthenticationError(SynapseError): + """There was a problem authenticating the request""" + + +class NoAuthenticationError(AuthenticationError): + """The request had no authentication information""" + + +class Authenticator: + def __init__(self, hs: HomeServer): + self._clock = hs.get_clock() + self.keyring = hs.get_keyring() + self.server_name = hs.hostname + self.store = hs.get_datastore() + self.federation_domain_whitelist = ( + hs.config.federation.federation_domain_whitelist + ) + self.notifier = hs.get_notifier() + + self.replication_client = None + if hs.config.worker.worker_app: + self.replication_client = hs.get_tcp_replication() + + # A method just so we can pass 'self' as the authenticator to the Servlets + async def authenticate_request( + self, request: SynapseRequest, content: Optional[JsonDict] + ) -> str: + now = self._clock.time_msec() + json_request: JsonDict = { + "method": request.method.decode("ascii"), + "uri": request.uri.decode("ascii"), + "destination": self.server_name, + "signatures": {}, + } + + if content is not None: + json_request["content"] = content + + origin = None + + auth_headers = request.requestHeaders.getRawHeaders(b"Authorization") + + if not auth_headers: + raise NoAuthenticationError( + 401, "Missing Authorization headers", Codes.UNAUTHORIZED + ) + + for auth in auth_headers: + if auth.startswith(b"X-Matrix"): + (origin, key, sig) = _parse_auth_header(auth) + json_request["origin"] = origin + json_request["signatures"].setdefault(origin, {})[key] = sig + + if ( + self.federation_domain_whitelist is not None + and origin not in self.federation_domain_whitelist + ): + raise FederationDeniedError(origin) + + if origin is None or not json_request["signatures"]: + raise NoAuthenticationError( + 401, "Missing Authorization headers", Codes.UNAUTHORIZED + ) + + await self.keyring.verify_json_for_server( + origin, + json_request, + now, + ) + + logger.debug("Request from %s", origin) + request.requester = origin + + # If we get a valid signed request from the other side, its probably + # alive + retry_timings = await self.store.get_destination_retry_timings(origin) + if retry_timings and retry_timings.retry_last_ts: + run_in_background(self._reset_retry_timings, origin) + + return origin + + async def _reset_retry_timings(self, origin: str) -> None: + try: + logger.info("Marking origin %r as up", origin) + await self.store.set_destination_retry_timings(origin, None, 0, 0) + + # Inform the relevant places that the remote server is back up. + self.notifier.notify_remote_server_up(origin) + if self.replication_client: + # If we're on a worker we try and inform master about this. The + # replication client doesn't hook into the notifier to avoid + # infinite loops where we send a `REMOTE_SERVER_UP` command to + # master, which then echoes it back to us which in turn pokes + # the notifier. + self.replication_client.send_remote_server_up(origin) + + except Exception: + logger.exception("Error resetting retry timings on %s", origin) + + +def _parse_auth_header(header_bytes: bytes) -> Tuple[str, str, str]: + """Parse an X-Matrix auth header + + Args: + header_bytes: header value + + Returns: + origin, key id, signature. + + Raises: + AuthenticationError if the header could not be parsed + """ + try: + header_str = header_bytes.decode("utf-8") + params = header_str.split(" ")[1].split(",") + param_dict = {k: v for k, v in (kv.split("=", maxsplit=1) for kv in params)} + + def strip_quotes(value: str) -> str: + if value.startswith('"'): + return value[1:-1] + else: + return value + + origin = strip_quotes(param_dict["origin"]) + + # ensure that the origin is a valid server name + parse_and_validate_server_name(origin) + + key = strip_quotes(param_dict["key"]) + sig = strip_quotes(param_dict["sig"]) + return origin, key, sig + except Exception as e: + logger.warning( + "Error parsing auth header '%s': %s", + header_bytes.decode("ascii", "replace"), + e, + ) + raise AuthenticationError( + 400, "Malformed Authorization header", Codes.UNAUTHORIZED + ) + + +class BaseFederationServlet: + """Abstract base class for federation servlet classes. + + The servlet object should have a PATH attribute which takes the form of a regexp to + match against the request path (excluding the /federation/v1 prefix). + + The servlet should also implement one or more of on_GET, on_POST, on_PUT, to match + the appropriate HTTP method. These methods must be *asynchronous* and have the + signature: + + on_(self, origin, content, query, **kwargs) + + With arguments: + + origin (unicode|None): The authenticated server_name of the calling server, + unless REQUIRE_AUTH is set to False and authentication failed. + + content (unicode|None): decoded json body of the request. None if the + request was a GET. + + query (dict[bytes, list[bytes]]): Query params from the request. url-decoded + (ie, '+' and '%xx' are decoded) but note that it is *not* utf8-decoded + yet. + + **kwargs (dict[unicode, unicode]): the dict mapping keys to path + components as specified in the path match regexp. + + Returns: + Optional[Tuple[int, object]]: either (response code, response object) to + return a JSON response, or None if the request has already been handled. + + Raises: + SynapseError: to return an error code + + Exception: other exceptions will be caught, logged, and a 500 will be + returned. + """ + + PATH = "" # Overridden in subclasses, the regex to match against the path. + + REQUIRE_AUTH = True + + PREFIX = FEDERATION_V1_PREFIX # Allows specifying the API version + + RATELIMIT = True # Whether to rate limit requests or not + + def __init__( + self, + hs: HomeServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + server_name: str, + ): + self.hs = hs + self.authenticator = authenticator + self.ratelimiter = ratelimiter + self.server_name = server_name + + def _wrap(self, func: Callable[..., Awaitable[Tuple[int, Any]]]) -> ServletCallback: + authenticator = self.authenticator + ratelimiter = self.ratelimiter + + @functools.wraps(func) + async def new_func( + request: SynapseRequest, *args: Any, **kwargs: str + ) -> Optional[Tuple[int, Any]]: + """A callback which can be passed to HttpServer.RegisterPaths + + Args: + request: + *args: unused? + **kwargs: the dict mapping keys to path components as specified + in the path match regexp. + + Returns: + (response code, response object) as returned by the callback method. + None if the request has already been handled. + """ + content = None + if request.method in [b"PUT", b"POST"]: + # TODO: Handle other method types? other content types? + content = parse_json_object_from_request(request) + + try: + origin: Optional[str] = await authenticator.authenticate_request( + request, content + ) + except NoAuthenticationError: + origin = None + if self.REQUIRE_AUTH: + logger.warning( + "authenticate_request failed: missing authentication" + ) + raise + except Exception as e: + logger.warning("authenticate_request failed: %s", e) + raise + + request_tags = { + SynapseTags.REQUEST_ID: request.get_request_id(), + tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, + tags.HTTP_METHOD: request.get_method(), + tags.HTTP_URL: request.get_redacted_uri(), + tags.PEER_HOST_IPV6: request.getClientIP(), + "authenticated_entity": origin, + "servlet_name": request.request_metrics.name, + } + + # Only accept the span context if the origin is authenticated + # and whitelisted + if origin and whitelisted_homeserver(origin): + scope = start_active_span_from_request( + request, "incoming-federation-request", tags=request_tags + ) + else: + scope = start_active_span( + "incoming-federation-request", tags=request_tags + ) + + with scope: + opentracing.inject_response_headers(request.responseHeaders) + + if origin and self.RATELIMIT: + with ratelimiter.ratelimit(origin) as d: + await d + if request._disconnected: + logger.warning( + "client disconnected before we started processing " + "request" + ) + return None + response = await func( + origin, content, request.args, *args, **kwargs + ) + else: + response = await func( + origin, content, request.args, *args, **kwargs + ) + + return response + + return cast(ServletCallback, new_func) + + def register(self, server: HttpServer) -> None: + pattern = re.compile("^" + self.PREFIX + self.PATH + "$") + + for method in ("GET", "PUT", "POST"): + code = getattr(self, "on_%s" % (method), None) + if code is None: + continue + + server.register_paths( + method, + (pattern,), + self._wrap(code), + self.__class__.__name__, + ) diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py new file mode 100644 index 000000000000..77bfd88ad052 --- /dev/null +++ b/synapse/federation/transport/server/federation.py @@ -0,0 +1,751 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Type, Union + +from typing_extensions import Literal + +import synapse +from synapse.api.errors import Codes, SynapseError +from synapse.api.room_versions import RoomVersions +from synapse.api.urls import FEDERATION_UNSTABLE_PREFIX, FEDERATION_V2_PREFIX +from synapse.federation.transport.server._base import ( + Authenticator, + BaseFederationServlet, +) +from synapse.http.servlet import ( + parse_boolean_from_args, + parse_integer_from_args, + parse_string_from_args, + parse_strings_from_args, +) +from synapse.server import HomeServer +from synapse.types import JsonDict +from synapse.util.ratelimitutils import FederationRateLimiter +from synapse.util.versionstring import get_version_string + +logger = logging.getLogger(__name__) + + +class BaseFederationServerServlet(BaseFederationServlet): + """Abstract base class for federation servlet classes which provides a federation server handler. + + See BaseFederationServlet for more information. + """ + + def __init__( + self, + hs: HomeServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + server_name: str, + ): + super().__init__(hs, authenticator, ratelimiter, server_name) + self.handler = hs.get_federation_server() + + +class FederationSendServlet(BaseFederationServerServlet): + PATH = "/send/(?P[^/]*)/?" + + # We ratelimit manually in the handler as we queue up the requests and we + # don't want to fill up the ratelimiter with blocked requests. + RATELIMIT = False + + # This is when someone is trying to send us a bunch of data. + async def on_PUT( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + transaction_id: str, + ) -> Tuple[int, JsonDict]: + """Called on PUT /send// + + Args: + transaction_id: The transaction_id associated with this request. This + is *not* None. + + Returns: + Tuple of `(code, response)`, where + `response` is a python dict to be converted into JSON that is + used as the response body. + """ + # Parse the request + try: + transaction_data = content + + logger.debug("Decoded %s: %s", transaction_id, str(transaction_data)) + + logger.info( + "Received txn %s from %s. (PDUs: %d, EDUs: %d)", + transaction_id, + origin, + len(transaction_data.get("pdus", [])), + len(transaction_data.get("edus", [])), + ) + + except Exception as e: + logger.exception(e) + return 400, {"error": "Invalid transaction"} + + code, response = await self.handler.on_incoming_transaction( + origin, transaction_id, self.server_name, transaction_data + ) + + return code, response + + +class FederationEventServlet(BaseFederationServerServlet): + PATH = "/event/(?P[^/]*)/?" + + # This is when someone asks for a data item for a given server data_id pair. + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + event_id: str, + ) -> Tuple[int, Union[JsonDict, str]]: + return await self.handler.on_pdu_request(origin, event_id) + + +class FederationStateV1Servlet(BaseFederationServerServlet): + PATH = "/state/(?P[^/]*)/?" + + # This is when someone asks for all data for a given room. + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + room_id: str, + ) -> Tuple[int, JsonDict]: + return await self.handler.on_room_state_request( + origin, + room_id, + parse_string_from_args(query, "event_id", None, required=False), + ) + + +class FederationStateIdsServlet(BaseFederationServerServlet): + PATH = "/state_ids/(?P[^/]*)/?" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + room_id: str, + ) -> Tuple[int, JsonDict]: + return await self.handler.on_state_ids_request( + origin, + room_id, + parse_string_from_args(query, "event_id", None, required=True), + ) + + +class FederationBackfillServlet(BaseFederationServerServlet): + PATH = "/backfill/(?P[^/]*)/?" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + room_id: str, + ) -> Tuple[int, JsonDict]: + versions = [x.decode("ascii") for x in query[b"v"]] + limit = parse_integer_from_args(query, "limit", None) + + if not limit: + return 400, {"error": "Did not include limit param"} + + return await self.handler.on_backfill_request(origin, room_id, versions, limit) + + +class FederationTimestampLookupServlet(BaseFederationServerServlet): + """ + API endpoint to fetch the `event_id` of the closest event to the given + timestamp (`ts` query parameter) in the given direction (`dir` query + parameter). + + Useful for other homeservers when they're unable to find an event locally. + + `ts` is a timestamp in milliseconds where we will find the closest event in + the given direction. + + `dir` can be `f` or `b` to indicate forwards and backwards in time from the + given timestamp. + + GET /_matrix/federation/unstable/org.matrix.msc3030/timestamp_to_event/?ts=&dir= + { + "event_id": ... + } + """ + + PATH = "/timestamp_to_event/(?P[^/]*)/?" + PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc3030" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + room_id: str, + ) -> Tuple[int, JsonDict]: + timestamp = parse_integer_from_args(query, "ts", required=True) + direction = parse_string_from_args( + query, "dir", default="f", allowed_values=["f", "b"], required=True + ) + + return await self.handler.on_timestamp_to_event_request( + origin, room_id, timestamp, direction + ) + + +class FederationQueryServlet(BaseFederationServerServlet): + PATH = "/query/(?P[^/]*)" + + # This is when we receive a server-server Query + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + query_type: str, + ) -> Tuple[int, JsonDict]: + args = {k.decode("utf8"): v[0].decode("utf-8") for k, v in query.items()} + args["origin"] = origin + return await self.handler.on_query_request(query_type, args) + + +class FederationMakeJoinServlet(BaseFederationServerServlet): + PATH = "/make_join/(?P[^/]*)/(?P[^/]*)" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + room_id: str, + user_id: str, + ) -> Tuple[int, JsonDict]: + """ + Args: + origin: The authenticated server_name of the calling server + + content: (GETs don't have bodies) + + query: Query params from the request. + + **kwargs: the dict mapping keys to path components as specified in + the path match regexp. + + Returns: + Tuple of (response code, response object) + """ + supported_versions = parse_strings_from_args(query, "ver", encoding="utf-8") + if supported_versions is None: + supported_versions = ["1"] + + result = await self.handler.on_make_join_request( + origin, room_id, user_id, supported_versions=supported_versions + ) + return 200, result + + +class FederationMakeLeaveServlet(BaseFederationServerServlet): + PATH = "/make_leave/(?P[^/]*)/(?P[^/]*)" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + room_id: str, + user_id: str, + ) -> Tuple[int, JsonDict]: + result = await self.handler.on_make_leave_request(origin, room_id, user_id) + return 200, result + + +class FederationV1SendLeaveServlet(BaseFederationServerServlet): + PATH = "/send_leave/(?P[^/]*)/(?P[^/]*)" + + async def on_PUT( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + room_id: str, + event_id: str, + ) -> Tuple[int, Tuple[int, JsonDict]]: + result = await self.handler.on_send_leave_request(origin, content, room_id) + return 200, (200, result) + + +class FederationV2SendLeaveServlet(BaseFederationServerServlet): + PATH = "/send_leave/(?P[^/]*)/(?P[^/]*)" + + PREFIX = FEDERATION_V2_PREFIX + + async def on_PUT( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + room_id: str, + event_id: str, + ) -> Tuple[int, JsonDict]: + result = await self.handler.on_send_leave_request(origin, content, room_id) + return 200, result + + +class FederationMakeKnockServlet(BaseFederationServerServlet): + PATH = "/make_knock/(?P[^/]*)/(?P[^/]*)" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + room_id: str, + user_id: str, + ) -> Tuple[int, JsonDict]: + # Retrieve the room versions the remote homeserver claims to support + supported_versions = parse_strings_from_args( + query, "ver", required=True, encoding="utf-8" + ) + + result = await self.handler.on_make_knock_request( + origin, room_id, user_id, supported_versions=supported_versions + ) + return 200, result + + +class FederationV1SendKnockServlet(BaseFederationServerServlet): + PATH = "/send_knock/(?P[^/]*)/(?P[^/]*)" + + async def on_PUT( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + room_id: str, + event_id: str, + ) -> Tuple[int, JsonDict]: + result = await self.handler.on_send_knock_request(origin, content, room_id) + return 200, result + + +class FederationEventAuthServlet(BaseFederationServerServlet): + PATH = "/event_auth/(?P[^/]*)/(?P[^/]*)" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + room_id: str, + event_id: str, + ) -> Tuple[int, JsonDict]: + return await self.handler.on_event_auth(origin, room_id, event_id) + + +class FederationV1SendJoinServlet(BaseFederationServerServlet): + PATH = "/send_join/(?P[^/]*)/(?P[^/]*)" + + async def on_PUT( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + room_id: str, + event_id: str, + ) -> Tuple[int, Tuple[int, JsonDict]]: + # TODO(paul): assert that event_id parsed from path actually + # match those given in content + result = await self.handler.on_send_join_request(origin, content, room_id) + return 200, (200, result) + + +class FederationV2SendJoinServlet(BaseFederationServerServlet): + PATH = "/send_join/(?P[^/]*)/(?P[^/]*)" + + PREFIX = FEDERATION_V2_PREFIX + + async def on_PUT( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + room_id: str, + event_id: str, + ) -> Tuple[int, JsonDict]: + # TODO(paul): assert that event_id parsed from path actually + # match those given in content + result = await self.handler.on_send_join_request(origin, content, room_id) + return 200, result + + +class FederationV1InviteServlet(BaseFederationServerServlet): + PATH = "/invite/(?P[^/]*)/(?P[^/]*)" + + async def on_PUT( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + room_id: str, + event_id: str, + ) -> Tuple[int, Tuple[int, JsonDict]]: + # We don't get a room version, so we have to assume its EITHER v1 or + # v2. This is "fine" as the only difference between V1 and V2 is the + # state resolution algorithm, and we don't use that for processing + # invites + result = await self.handler.on_invite_request( + origin, content, room_version_id=RoomVersions.V1.identifier + ) + + # V1 federation API is defined to return a content of `[200, {...}]` + # due to a historical bug. + return 200, (200, result) + + +class FederationV2InviteServlet(BaseFederationServerServlet): + PATH = "/invite/(?P[^/]*)/(?P[^/]*)" + + PREFIX = FEDERATION_V2_PREFIX + + async def on_PUT( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + room_id: str, + event_id: str, + ) -> Tuple[int, JsonDict]: + # TODO(paul): assert that room_id/event_id parsed from path actually + # match those given in content + + room_version = content["room_version"] + event = content["event"] + invite_room_state = content["invite_room_state"] + + # Synapse expects invite_room_state to be in unsigned, as it is in v1 + # API + + event.setdefault("unsigned", {})["invite_room_state"] = invite_room_state + + result = await self.handler.on_invite_request( + origin, event, room_version_id=room_version + ) + return 200, result + + +class FederationThirdPartyInviteExchangeServlet(BaseFederationServerServlet): + PATH = "/exchange_third_party_invite/(?P[^/]*)" + + async def on_PUT( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + room_id: str, + ) -> Tuple[int, JsonDict]: + await self.handler.on_exchange_third_party_invite_request(content) + return 200, {} + + +class FederationClientKeysQueryServlet(BaseFederationServerServlet): + PATH = "/user/keys/query" + + async def on_POST( + self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]] + ) -> Tuple[int, JsonDict]: + return await self.handler.on_query_client_keys(origin, content) + + +class FederationUserDevicesQueryServlet(BaseFederationServerServlet): + PATH = "/user/devices/(?P[^/]*)" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + user_id: str, + ) -> Tuple[int, JsonDict]: + return await self.handler.on_query_user_devices(origin, user_id) + + +class FederationClientKeysClaimServlet(BaseFederationServerServlet): + PATH = "/user/keys/claim" + + async def on_POST( + self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]] + ) -> Tuple[int, JsonDict]: + response = await self.handler.on_claim_client_keys(origin, content) + return 200, response + + +class FederationGetMissingEventsServlet(BaseFederationServerServlet): + # TODO(paul): Why does this path alone end with "/?" optional? + PATH = "/get_missing_events/(?P[^/]*)/?" + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + room_id: str, + ) -> Tuple[int, JsonDict]: + limit = int(content.get("limit", 10)) + earliest_events = content.get("earliest_events", []) + latest_events = content.get("latest_events", []) + + result = await self.handler.on_get_missing_events( + origin, + room_id=room_id, + earliest_events=earliest_events, + latest_events=latest_events, + limit=limit, + ) + + return 200, result + + +class On3pidBindServlet(BaseFederationServerServlet): + PATH = "/3pid/onbind" + + REQUIRE_AUTH = False + + async def on_POST( + self, origin: Optional[str], content: JsonDict, query: Dict[bytes, List[bytes]] + ) -> Tuple[int, JsonDict]: + if "invites" in content: + last_exception = None + for invite in content["invites"]: + try: + if "signed" not in invite or "token" not in invite["signed"]: + message = ( + "Rejecting received notification of third-" + "party invite without signed: %s" % (invite,) + ) + logger.info(message) + raise SynapseError(400, message) + await self.handler.exchange_third_party_invite( + invite["sender"], + invite["mxid"], + invite["room_id"], + invite["signed"], + ) + except Exception as e: + last_exception = e + if last_exception: + raise last_exception + return 200, {} + + +class FederationVersionServlet(BaseFederationServlet): + PATH = "/version" + + REQUIRE_AUTH = False + + async def on_GET( + self, + origin: Optional[str], + content: Literal[None], + query: Dict[bytes, List[bytes]], + ) -> Tuple[int, JsonDict]: + return ( + 200, + {"server": {"name": "Synapse", "version": get_version_string(synapse)}}, + ) + + +class FederationSpaceSummaryServlet(BaseFederationServlet): + PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc2946" + PATH = "/spaces/(?P[^/]*)" + + def __init__( + self, + hs: HomeServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + server_name: str, + ): + super().__init__(hs, authenticator, ratelimiter, server_name) + self.handler = hs.get_room_summary_handler() + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Mapping[bytes, Sequence[bytes]], + room_id: str, + ) -> Tuple[int, JsonDict]: + suggested_only = parse_boolean_from_args(query, "suggested_only", default=False) + + max_rooms_per_space = parse_integer_from_args(query, "max_rooms_per_space") + if max_rooms_per_space is not None and max_rooms_per_space < 0: + raise SynapseError( + 400, + "Value for 'max_rooms_per_space' must be a non-negative integer", + Codes.BAD_JSON, + ) + + exclude_rooms = parse_strings_from_args(query, "exclude_rooms", default=[]) + + return 200, await self.handler.federation_space_summary( + origin, room_id, suggested_only, max_rooms_per_space, exclude_rooms + ) + + # TODO When switching to the stable endpoint, remove the POST handler. + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Mapping[bytes, Sequence[bytes]], + room_id: str, + ) -> Tuple[int, JsonDict]: + suggested_only = content.get("suggested_only", False) + if not isinstance(suggested_only, bool): + raise SynapseError( + 400, "'suggested_only' must be a boolean", Codes.BAD_JSON + ) + + exclude_rooms = content.get("exclude_rooms", []) + if not isinstance(exclude_rooms, list) or any( + not isinstance(x, str) for x in exclude_rooms + ): + raise SynapseError(400, "bad value for 'exclude_rooms'", Codes.BAD_JSON) + + max_rooms_per_space = content.get("max_rooms_per_space") + if max_rooms_per_space is not None: + if not isinstance(max_rooms_per_space, int): + raise SynapseError( + 400, "bad value for 'max_rooms_per_space'", Codes.BAD_JSON + ) + if max_rooms_per_space < 0: + raise SynapseError( + 400, + "Value for 'max_rooms_per_space' must be a non-negative integer", + Codes.BAD_JSON, + ) + + return 200, await self.handler.federation_space_summary( + origin, room_id, suggested_only, max_rooms_per_space, exclude_rooms + ) + + +class FederationRoomHierarchyServlet(BaseFederationServlet): + PATH = "/hierarchy/(?P[^/]*)" + + def __init__( + self, + hs: HomeServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + server_name: str, + ): + super().__init__(hs, authenticator, ratelimiter, server_name) + self.handler = hs.get_room_summary_handler() + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Mapping[bytes, Sequence[bytes]], + room_id: str, + ) -> Tuple[int, JsonDict]: + suggested_only = parse_boolean_from_args(query, "suggested_only", default=False) + return 200, await self.handler.get_federation_hierarchy( + origin, room_id, suggested_only + ) + + +class FederationRoomHierarchyUnstableServlet(FederationRoomHierarchyServlet): + PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc2946" + + +class RoomComplexityServlet(BaseFederationServlet): + """ + Indicates to other servers how complex (and therefore likely + resource-intensive) a public room this server knows about is. + """ + + PATH = "/rooms/(?P[^/]*)/complexity" + PREFIX = FEDERATION_UNSTABLE_PREFIX + + def __init__( + self, + hs: HomeServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + server_name: str, + ): + super().__init__(hs, authenticator, ratelimiter, server_name) + self._store = self.hs.get_datastore() + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + room_id: str, + ) -> Tuple[int, JsonDict]: + is_public = await self._store.is_room_world_readable_or_publicly_joinable( + room_id + ) + + if not is_public: + raise SynapseError(404, "Room not found", errcode=Codes.INVALID_PARAM) + + complexity = await self._store.get_room_complexity(room_id) + return 200, complexity + + +FEDERATION_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( + FederationSendServlet, + FederationEventServlet, + FederationStateV1Servlet, + FederationStateIdsServlet, + FederationBackfillServlet, + FederationTimestampLookupServlet, + FederationQueryServlet, + FederationMakeJoinServlet, + FederationMakeLeaveServlet, + FederationEventServlet, + FederationV1SendJoinServlet, + FederationV2SendJoinServlet, + FederationV1SendLeaveServlet, + FederationV2SendLeaveServlet, + FederationV1InviteServlet, + FederationV2InviteServlet, + FederationGetMissingEventsServlet, + FederationEventAuthServlet, + FederationClientKeysQueryServlet, + FederationUserDevicesQueryServlet, + FederationClientKeysClaimServlet, + FederationThirdPartyInviteExchangeServlet, + On3pidBindServlet, + FederationVersionServlet, + RoomComplexityServlet, + FederationSpaceSummaryServlet, + FederationRoomHierarchyServlet, + FederationRoomHierarchyUnstableServlet, + FederationV1SendKnockServlet, + FederationMakeKnockServlet, +) diff --git a/synapse/federation/transport/server/groups_local.py b/synapse/federation/transport/server/groups_local.py new file mode 100644 index 000000000000..a12cd18d5806 --- /dev/null +++ b/synapse/federation/transport/server/groups_local.py @@ -0,0 +1,113 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, List, Tuple, Type + +from synapse.api.errors import SynapseError +from synapse.federation.transport.server._base import ( + Authenticator, + BaseFederationServlet, +) +from synapse.handlers.groups_local import GroupsLocalHandler +from synapse.server import HomeServer +from synapse.types import JsonDict, get_domain_from_id +from synapse.util.ratelimitutils import FederationRateLimiter + + +class BaseGroupsLocalServlet(BaseFederationServlet): + """Abstract base class for federation servlet classes which provides a groups local handler. + + See BaseFederationServlet for more information. + """ + + def __init__( + self, + hs: HomeServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + server_name: str, + ): + super().__init__(hs, authenticator, ratelimiter, server_name) + self.handler = hs.get_groups_local_handler() + + +class FederationGroupsLocalInviteServlet(BaseGroupsLocalServlet): + """A group server has invited a local user""" + + PATH = "/groups/local/(?P[^/]*)/users/(?P[^/]*)/invite" + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + user_id: str, + ) -> Tuple[int, JsonDict]: + if get_domain_from_id(group_id) != origin: + raise SynapseError(403, "group_id doesn't match origin") + + assert isinstance( + self.handler, GroupsLocalHandler + ), "Workers cannot handle group invites." + + new_content = await self.handler.on_invite(group_id, user_id, content) + + return 200, new_content + + +class FederationGroupsRemoveLocalUserServlet(BaseGroupsLocalServlet): + """A group server has removed a local user""" + + PATH = "/groups/local/(?P[^/]*)/users/(?P[^/]*)/remove" + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + user_id: str, + ) -> Tuple[int, None]: + if get_domain_from_id(group_id) != origin: + raise SynapseError(403, "user_id doesn't match origin") + + assert isinstance( + self.handler, GroupsLocalHandler + ), "Workers cannot handle group removals." + + await self.handler.user_removed_from_group(group_id, user_id, content) + + return 200, None + + +class FederationGroupsBulkPublicisedServlet(BaseGroupsLocalServlet): + """Get roles in a group""" + + PATH = "/get_groups_publicised" + + async def on_POST( + self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]] + ) -> Tuple[int, JsonDict]: + resp = await self.handler.bulk_get_publicised_groups( + content["user_ids"], proxy=False + ) + + return 200, resp + + +GROUP_LOCAL_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( + FederationGroupsLocalInviteServlet, + FederationGroupsRemoveLocalUserServlet, + FederationGroupsBulkPublicisedServlet, +) diff --git a/synapse/federation/transport/server/groups_server.py b/synapse/federation/transport/server/groups_server.py new file mode 100644 index 000000000000..b30e92a5eb74 --- /dev/null +++ b/synapse/federation/transport/server/groups_server.py @@ -0,0 +1,753 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, List, Tuple, Type + +from typing_extensions import Literal + +from synapse.api.constants import MAX_GROUP_CATEGORYID_LENGTH, MAX_GROUP_ROLEID_LENGTH +from synapse.api.errors import Codes, SynapseError +from synapse.federation.transport.server._base import ( + Authenticator, + BaseFederationServlet, +) +from synapse.http.servlet import parse_string_from_args +from synapse.server import HomeServer +from synapse.types import JsonDict, get_domain_from_id +from synapse.util.ratelimitutils import FederationRateLimiter + + +class BaseGroupsServerServlet(BaseFederationServlet): + """Abstract base class for federation servlet classes which provides a groups server handler. + + See BaseFederationServlet for more information. + """ + + def __init__( + self, + hs: HomeServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + server_name: str, + ): + super().__init__(hs, authenticator, ratelimiter, server_name) + self.handler = hs.get_groups_server_handler() + + +class FederationGroupsProfileServlet(BaseGroupsServerServlet): + """Get/set the basic profile of a group on behalf of a user""" + + PATH = "/groups/(?P[^/]*)/profile" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + new_content = await self.handler.get_group_profile(group_id, requester_user_id) + + return 200, new_content + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + new_content = await self.handler.update_group_profile( + group_id, requester_user_id, content + ) + + return 200, new_content + + +class FederationGroupsSummaryServlet(BaseGroupsServerServlet): + PATH = "/groups/(?P[^/]*)/summary" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + new_content = await self.handler.get_group_summary(group_id, requester_user_id) + + return 200, new_content + + +class FederationGroupsRoomsServlet(BaseGroupsServerServlet): + """Get the rooms in a group on behalf of a user""" + + PATH = "/groups/(?P[^/]*)/rooms" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + new_content = await self.handler.get_rooms_in_group(group_id, requester_user_id) + + return 200, new_content + + +class FederationGroupsAddRoomsServlet(BaseGroupsServerServlet): + """Add/remove room from group""" + + PATH = "/groups/(?P[^/]*)/room/(?P[^/]*)" + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + room_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + new_content = await self.handler.add_room_to_group( + group_id, requester_user_id, room_id, content + ) + + return 200, new_content + + async def on_DELETE( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + room_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + new_content = await self.handler.remove_room_from_group( + group_id, requester_user_id, room_id + ) + + return 200, new_content + + +class FederationGroupsAddRoomsConfigServlet(BaseGroupsServerServlet): + """Update room config in group""" + + PATH = ( + "/groups/(?P[^/]*)/room/(?P[^/]*)" + "/config/(?P[^/]*)" + ) + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + room_id: str, + config_key: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + result = await self.handler.update_room_in_group( + group_id, requester_user_id, room_id, config_key, content + ) + + return 200, result + + +class FederationGroupsUsersServlet(BaseGroupsServerServlet): + """Get the users in a group on behalf of a user""" + + PATH = "/groups/(?P[^/]*)/users" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + new_content = await self.handler.get_users_in_group(group_id, requester_user_id) + + return 200, new_content + + +class FederationGroupsInvitedUsersServlet(BaseGroupsServerServlet): + """Get the users that have been invited to a group""" + + PATH = "/groups/(?P[^/]*)/invited_users" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + new_content = await self.handler.get_invited_users_in_group( + group_id, requester_user_id + ) + + return 200, new_content + + +class FederationGroupsInviteServlet(BaseGroupsServerServlet): + """Ask a group server to invite someone to the group""" + + PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/invite" + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + user_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + new_content = await self.handler.invite_to_group( + group_id, user_id, requester_user_id, content + ) + + return 200, new_content + + +class FederationGroupsAcceptInviteServlet(BaseGroupsServerServlet): + """Accept an invitation from the group server""" + + PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/accept_invite" + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + user_id: str, + ) -> Tuple[int, JsonDict]: + if get_domain_from_id(user_id) != origin: + raise SynapseError(403, "user_id doesn't match origin") + + new_content = await self.handler.accept_invite(group_id, user_id, content) + + return 200, new_content + + +class FederationGroupsJoinServlet(BaseGroupsServerServlet): + """Attempt to join a group""" + + PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/join" + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + user_id: str, + ) -> Tuple[int, JsonDict]: + if get_domain_from_id(user_id) != origin: + raise SynapseError(403, "user_id doesn't match origin") + + new_content = await self.handler.join_group(group_id, user_id, content) + + return 200, new_content + + +class FederationGroupsRemoveUserServlet(BaseGroupsServerServlet): + """Leave or kick a user from the group""" + + PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/remove" + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + user_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + new_content = await self.handler.remove_user_from_group( + group_id, user_id, requester_user_id, content + ) + + return 200, new_content + + +class FederationGroupsSummaryRoomsServlet(BaseGroupsServerServlet): + """Add/remove a room from the group summary, with optional category. + + Matches both: + - /groups/:group/summary/rooms/:room_id + - /groups/:group/summary/categories/:category/rooms/:room_id + """ + + PATH = ( + "/groups/(?P[^/]*)/summary" + "(/categories/(?P[^/]+))?" + "/rooms/(?P[^/]*)" + ) + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + category_id: str, + room_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + if category_id == "": + raise SynapseError( + 400, "category_id cannot be empty string", Codes.INVALID_PARAM + ) + + if len(category_id) > MAX_GROUP_CATEGORYID_LENGTH: + raise SynapseError( + 400, + "category_id may not be longer than %s characters" + % (MAX_GROUP_CATEGORYID_LENGTH,), + Codes.INVALID_PARAM, + ) + + resp = await self.handler.update_group_summary_room( + group_id, + requester_user_id, + room_id=room_id, + category_id=category_id, + content=content, + ) + + return 200, resp + + async def on_DELETE( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + category_id: str, + room_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + if category_id == "": + raise SynapseError(400, "category_id cannot be empty string") + + resp = await self.handler.delete_group_summary_room( + group_id, requester_user_id, room_id=room_id, category_id=category_id + ) + + return 200, resp + + +class FederationGroupsCategoriesServlet(BaseGroupsServerServlet): + """Get all categories for a group""" + + PATH = "/groups/(?P[^/]*)/categories/?" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + resp = await self.handler.get_group_categories(group_id, requester_user_id) + + return 200, resp + + +class FederationGroupsCategoryServlet(BaseGroupsServerServlet): + """Add/remove/get a category in a group""" + + PATH = "/groups/(?P[^/]*)/categories/(?P[^/]+)" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + category_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + resp = await self.handler.get_group_category( + group_id, requester_user_id, category_id + ) + + return 200, resp + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + category_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + if category_id == "": + raise SynapseError(400, "category_id cannot be empty string") + + if len(category_id) > MAX_GROUP_CATEGORYID_LENGTH: + raise SynapseError( + 400, + "category_id may not be longer than %s characters" + % (MAX_GROUP_CATEGORYID_LENGTH,), + Codes.INVALID_PARAM, + ) + + resp = await self.handler.upsert_group_category( + group_id, requester_user_id, category_id, content + ) + + return 200, resp + + async def on_DELETE( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + category_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + if category_id == "": + raise SynapseError(400, "category_id cannot be empty string") + + resp = await self.handler.delete_group_category( + group_id, requester_user_id, category_id + ) + + return 200, resp + + +class FederationGroupsRolesServlet(BaseGroupsServerServlet): + """Get roles in a group""" + + PATH = "/groups/(?P[^/]*)/roles/?" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + resp = await self.handler.get_group_roles(group_id, requester_user_id) + + return 200, resp + + +class FederationGroupsRoleServlet(BaseGroupsServerServlet): + """Add/remove/get a role in a group""" + + PATH = "/groups/(?P[^/]*)/roles/(?P[^/]+)" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + role_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + resp = await self.handler.get_group_role(group_id, requester_user_id, role_id) + + return 200, resp + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + role_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + if role_id == "": + raise SynapseError( + 400, "role_id cannot be empty string", Codes.INVALID_PARAM + ) + + if len(role_id) > MAX_GROUP_ROLEID_LENGTH: + raise SynapseError( + 400, + "role_id may not be longer than %s characters" + % (MAX_GROUP_ROLEID_LENGTH,), + Codes.INVALID_PARAM, + ) + + resp = await self.handler.update_group_role( + group_id, requester_user_id, role_id, content + ) + + return 200, resp + + async def on_DELETE( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + role_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + if role_id == "": + raise SynapseError(400, "role_id cannot be empty string") + + resp = await self.handler.delete_group_role( + group_id, requester_user_id, role_id + ) + + return 200, resp + + +class FederationGroupsSummaryUsersServlet(BaseGroupsServerServlet): + """Add/remove a user from the group summary, with optional role. + + Matches both: + - /groups/:group/summary/users/:user_id + - /groups/:group/summary/roles/:role/users/:user_id + """ + + PATH = ( + "/groups/(?P[^/]*)/summary" + "(/roles/(?P[^/]+))?" + "/users/(?P[^/]*)" + ) + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + role_id: str, + user_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + if role_id == "": + raise SynapseError(400, "role_id cannot be empty string") + + if len(role_id) > MAX_GROUP_ROLEID_LENGTH: + raise SynapseError( + 400, + "role_id may not be longer than %s characters" + % (MAX_GROUP_ROLEID_LENGTH,), + Codes.INVALID_PARAM, + ) + + resp = await self.handler.update_group_summary_user( + group_id, + requester_user_id, + user_id=user_id, + role_id=role_id, + content=content, + ) + + return 200, resp + + async def on_DELETE( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + group_id: str, + role_id: str, + user_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + if role_id == "": + raise SynapseError(400, "role_id cannot be empty string") + + resp = await self.handler.delete_group_summary_user( + group_id, requester_user_id, user_id=user_id, role_id=role_id + ) + + return 200, resp + + +class FederationGroupsSettingJoinPolicyServlet(BaseGroupsServerServlet): + """Sets whether a group is joinable without an invite or knock""" + + PATH = "/groups/(?P[^/]*)/settings/m.join_policy" + + async def on_PUT( + self, + origin: str, + content: JsonDict, + query: Dict[bytes, List[bytes]], + group_id: str, + ) -> Tuple[int, JsonDict]: + requester_user_id = parse_string_from_args( + query, "requester_user_id", required=True + ) + if get_domain_from_id(requester_user_id) != origin: + raise SynapseError(403, "requester_user_id doesn't match origin") + + new_content = await self.handler.set_group_join_policy( + group_id, requester_user_id, content + ) + + return 200, new_content + + +GROUP_SERVER_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( + FederationGroupsProfileServlet, + FederationGroupsSummaryServlet, + FederationGroupsRoomsServlet, + FederationGroupsUsersServlet, + FederationGroupsInvitedUsersServlet, + FederationGroupsInviteServlet, + FederationGroupsAcceptInviteServlet, + FederationGroupsJoinServlet, + FederationGroupsRemoveUserServlet, + FederationGroupsSummaryRoomsServlet, + FederationGroupsCategoriesServlet, + FederationGroupsCategoryServlet, + FederationGroupsRolesServlet, + FederationGroupsRoleServlet, + FederationGroupsSummaryUsersServlet, + FederationGroupsAddRoomsServlet, + FederationGroupsAddRoomsConfigServlet, + FederationGroupsSettingJoinPolicyServlet, +) diff --git a/synapse/federation/units.py b/synapse/federation/units.py index c83a261918c0..b9b12fbea563 100644 --- a/synapse/federation/units.py +++ b/synapse/federation/units.py @@ -17,18 +17,17 @@ """ import logging -from typing import Optional +from typing import List, Optional import attr from synapse.types import JsonDict -from synapse.util.jsonobject import JsonEncodedObject logger = logging.getLogger(__name__) -@attr.s(slots=True) -class Edu(JsonEncodedObject): +@attr.s(slots=True, frozen=True, auto_attribs=True) +class Edu: """An Edu represents a piece of data sent from one homeserver to another. In comparison to Pdus, Edus are not persisted for a long time on disk, are @@ -36,10 +35,10 @@ class Edu(JsonEncodedObject): internal ID or previous references graph. """ - edu_type = attr.ib(type=str) - content = attr.ib(type=dict) - origin = attr.ib(type=str) - destination = attr.ib(type=str) + edu_type: str + content: dict + origin: str + destination: str def get_dict(self) -> JsonDict: return { @@ -55,14 +54,21 @@ def get_internal_dict(self) -> JsonDict: "destination": self.destination, } - def get_context(self): + def get_context(self) -> str: return getattr(self, "content", {}).get("org.matrix.opentracing_context", "{}") - def strip_context(self): + def strip_context(self) -> None: getattr(self, "content", {})["org.matrix.opentracing_context"] = "{}" -class Transaction(JsonEncodedObject): +def _none_to_list(edus: Optional[List[JsonDict]]) -> List[JsonDict]: + if edus is None: + return [] + return edus + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class Transaction: """A transaction is a list of Pdus and Edus to be sent to a remote home server with some extra metadata. @@ -78,47 +84,21 @@ class Transaction(JsonEncodedObject): """ - valid_keys = [ - "transaction_id", - "origin", - "destination", - "origin_server_ts", - "previous_ids", - "pdus", - "edus", - ] - - internal_keys = ["transaction_id", "destination"] - - required_keys = [ - "transaction_id", - "origin", - "destination", - "origin_server_ts", - "pdus", - ] - - def __init__(self, transaction_id=None, pdus: Optional[list] = None, **kwargs): - """If we include a list of pdus then we decode then as PDU's - automatically. - """ - - # If there's no EDUs then remove the arg - if "edus" in kwargs and not kwargs["edus"]: - del kwargs["edus"] - - super().__init__(transaction_id=transaction_id, pdus=pdus or [], **kwargs) - - @staticmethod - def create_new(pdus, **kwargs): - """Used to create a new transaction. Will auto fill out - transaction_id and origin_server_ts keys. - """ - if "origin_server_ts" not in kwargs: - raise KeyError("Require 'origin_server_ts' to construct a Transaction") - if "transaction_id" not in kwargs: - raise KeyError("Require 'transaction_id' to construct a Transaction") - - kwargs["pdus"] = [p.get_pdu_json() for p in pdus] - - return Transaction(**kwargs) + # Required keys. + transaction_id: str + origin: str + destination: str + origin_server_ts: int + pdus: List[JsonDict] = attr.ib(factory=list, converter=_none_to_list) + edus: List[JsonDict] = attr.ib(factory=list, converter=_none_to_list) + + def get_dict(self) -> JsonDict: + """A JSON-ready dictionary of valid keys which aren't internal.""" + result = { + "origin": self.origin, + "origin_server_ts": self.origin_server_ts, + "pdus": self.pdus, + } + if self.edus: + result["edus"] = self.edus + return result diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py index ff8372c4e9ad..a87896e5386c 100644 --- a/synapse/groups/attestations.py +++ b/synapse/groups/attestations.py @@ -40,6 +40,8 @@ from signedjson.sign import sign_json +from twisted.internet.defer import Deferred + from synapse.api.errors import HttpResponseException, RequestSendFailed, SynapseError from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import JsonDict, get_domain_from_id @@ -144,7 +146,7 @@ def __init__(self, hs: "HomeServer"): self.is_mine_id = hs.is_mine_id self.attestations = hs.get_groups_attestation_signing() - if not hs.config.worker_app: + if not hs.config.worker.worker_app: self._renew_attestations_loop = self.clock.looping_call( self._start_renew_attestations, 30 * 60 * 1000 ) @@ -166,7 +168,7 @@ async def on_renew_attestation( return {} - def _start_renew_attestations(self) -> None: + def _start_renew_attestations(self) -> "Deferred[None]": return run_as_background_process("renew_attestations", self._renew_attestations) async def _renew_attestations(self) -> None: diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py index 3dc55ab861a5..449bbc70042d 100644 --- a/synapse/groups/groups_server.py +++ b/synapse/groups/groups_server.py @@ -332,6 +332,13 @@ async def get_rooms_in_group( requester_user_id, group_id ) + # Note! room_results["is_public"] is about whether the room is considered + # public from the group's point of view. (i.e. whether non-group members + # should be able to see the room is in the group). + # This is not the same as whether the room itself is public (in the sense + # of being visible in the room directory). + # As such, room_results["is_public"] itself is not sufficient to determine + # whether any given user is permitted to see the room's metadata. room_results = await self.store.get_rooms_in_group( group_id, include_private=is_user_in_group ) @@ -341,8 +348,15 @@ async def get_rooms_in_group( room_id = room_result["room_id"] joined_users = await self.store.get_users_in_room(room_id) + + # check the user is actually allowed to see the room before showing it to them + allow_private = requester_user_id in joined_users + entry = await self.room_list_handler.generate_room_entry( - room_id, len(joined_users), with_alias=False, allow_private=True + room_id, + len(joined_users), + with_alias=False, + allow_private=allow_private, ) if not entry: @@ -354,7 +368,7 @@ async def get_rooms_in_group( chunk.sort(key=lambda e: -e["num_joined_members"]) - return {"chunk": chunk, "total_room_count_estimate": len(room_results)} + return {"chunk": chunk, "total_room_count_estimate": len(chunk)} class GroupsServerHandler(GroupsServerWorkerHandler): @@ -833,16 +847,16 @@ async def create_group( UserID.from_string(requester_user_id) ) if not is_admin: - if not self.hs.config.enable_group_creation: + if not self.hs.config.groups.enable_group_creation: raise SynapseError( 403, "Only a server admin can create groups on this server" ) localpart = group_id_obj.localpart - if not localpart.startswith(self.hs.config.group_creation_prefix): + if not localpart.startswith(self.hs.config.groups.group_creation_prefix): raise SynapseError( 400, "Can only create groups with prefix %r on this server" - % (self.hs.config.group_creation_prefix,), + % (self.hs.config.groups.group_creation_prefix,), ) profile = content.get("profile", {}) diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py deleted file mode 100644 index 6a05a6530599..000000000000 --- a/synapse/handlers/_base.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright 2014 - 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -from typing import TYPE_CHECKING, Optional - -import synapse.types -from synapse.api.constants import EventTypes, Membership -from synapse.api.ratelimiting import Ratelimiter -from synapse.types import UserID - -if TYPE_CHECKING: - from synapse.server import HomeServer - -logger = logging.getLogger(__name__) - - -class BaseHandler: - """ - Common base class for the event handlers. - - Deprecated: new code should not use this. Instead, Handler classes should define the - fields they actually need. The utility methods should either be factored out to - standalone helper functions, or to different Handler classes. - """ - - def __init__(self, hs: "HomeServer"): - self.store = hs.get_datastore() - self.auth = hs.get_auth() - self.notifier = hs.get_notifier() - self.state_handler = hs.get_state_handler() - self.distributor = hs.get_distributor() - self.clock = hs.get_clock() - self.hs = hs - - # The rate_hz and burst_count are overridden on a per-user basis - self.request_ratelimiter = Ratelimiter( - store=self.store, clock=self.clock, rate_hz=0, burst_count=0 - ) - self._rc_message = self.hs.config.rc_message - - # Check whether ratelimiting room admin message redaction is enabled - # by the presence of rate limits in the config - if self.hs.config.rc_admin_redaction: - self.admin_redaction_ratelimiter: Optional[Ratelimiter] = Ratelimiter( - store=self.store, - clock=self.clock, - rate_hz=self.hs.config.rc_admin_redaction.per_second, - burst_count=self.hs.config.rc_admin_redaction.burst_count, - ) - else: - self.admin_redaction_ratelimiter = None - - self.server_name = hs.hostname - - self.event_builder_factory = hs.get_event_builder_factory() - - async def ratelimit(self, requester, update=True, is_admin_redaction=False): - """Ratelimits requests. - - Args: - requester (Requester) - update (bool): Whether to record that a request is being processed. - Set to False when doing multiple checks for one request (e.g. - to check up front if we would reject the request), and set to - True for the last call for a given request. - is_admin_redaction (bool): Whether this is a room admin/moderator - redacting an event. If so then we may apply different - ratelimits depending on config. - - Raises: - LimitExceededError if the request should be ratelimited - """ - user_id = requester.user.to_string() - - # The AS user itself is never rate limited. - app_service = self.store.get_app_service_by_user_id(user_id) - if app_service is not None: - return # do not ratelimit app service senders - - messages_per_second = self._rc_message.per_second - burst_count = self._rc_message.burst_count - - # Check if there is a per user override in the DB. - override = await self.store.get_ratelimit_for_user(user_id) - if override: - # If overridden with a null Hz then ratelimiting has been entirely - # disabled for the user - if not override.messages_per_second: - return - - messages_per_second = override.messages_per_second - burst_count = override.burst_count - - if is_admin_redaction and self.admin_redaction_ratelimiter: - # If we have separate config for admin redactions, use a separate - # ratelimiter as to not have user_ids clash - await self.admin_redaction_ratelimiter.ratelimit(requester, update=update) - else: - # Override rate and burst count per-user - await self.request_ratelimiter.ratelimit( - requester, - rate_hz=messages_per_second, - burst_count=burst_count, - update=update, - ) - - async def maybe_kick_guest_users(self, event, context=None): - # Technically this function invalidates current_state by changing it. - # Hopefully this isn't that important to the caller. - if event.type == EventTypes.GuestAccess: - guest_access = event.content.get("guest_access", "forbidden") - if guest_access != "can_join": - if context: - current_state_ids = await context.get_current_state_ids() - current_state_dict = await self.store.get_events( - list(current_state_ids.values()) - ) - current_state = list(current_state_dict.values()) - else: - current_state_map = await self.state_handler.get_current_state( - event.room_id - ) - current_state = list(current_state_map.values()) - - logger.info("maybe_kick_guest_users %r", current_state) - await self.kick_guest_users(current_state) - - async def kick_guest_users(self, current_state): - for member_event in current_state: - try: - if member_event.type != EventTypes.Member: - continue - - target_user = UserID.from_string(member_event.state_key) - if not self.hs.is_mine(target_user): - continue - - if member_event.content["membership"] not in { - Membership.JOIN, - Membership.INVITE, - }: - continue - - if ( - "kind" not in member_event.content - or member_event.content["kind"] != "guest" - ): - continue - - # We make the user choose to leave, rather than have the - # event-sender kick them. This is partially because we don't - # need to worry about power levels, and partially because guest - # users are a concept which doesn't hugely work over federation, - # and having homeservers have their own users leave keeps more - # of that decision-making and control local to the guest-having - # homeserver. - requester = synapse.types.create_requester( - target_user, is_guest=True, authenticated_entity=self.server_name - ) - handler = self.hs.get_room_member_handler() - await handler.update_membership( - requester, - target_user, - member_event.room_id, - "leave", - ratelimit=False, - require_consent=False, - ) - except Exception as e: - logger.exception("Error kicking guest user: %s" % (e,)) diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py index affb54e0ee4f..96273e2f81c3 100644 --- a/synapse/handlers/account_data.py +++ b/synapse/handlers/account_data.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import random -from typing import TYPE_CHECKING, List, Tuple +from typing import TYPE_CHECKING, Collection, List, Optional, Tuple from synapse.replication.http.account_data import ( ReplicationAddTagRestServlet, @@ -21,6 +21,7 @@ ReplicationRoomAccountDataRestServlet, ReplicationUserAccountDataRestServlet, ) +from synapse.streams import EventSource from synapse.types import JsonDict, UserID if TYPE_CHECKING: @@ -163,7 +164,7 @@ async def remove_tag_from_room(self, user_id: str, room_id: str, tag: str) -> in return response["max_stream_id"] -class AccountDataEventSource: +class AccountDataEventSource(EventSource[int, JsonDict]): def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() @@ -171,7 +172,13 @@ def get_current_key(self, direction: str = "f") -> int: return self.store.get_max_account_data_stream_id() async def get_new_events( - self, user: UserID, from_key: int, **kwargs + self, + user: UserID, + from_key: int, + limit: Optional[int], + room_ids: Collection[str], + is_guest: bool, + explicit_room_id: Optional[str] = None, ) -> Tuple[List[JsonDict], int]: user_id = user.to_string() last_stream_id = from_key diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py index 078accd634f1..87e415df75e8 100644 --- a/synapse/handlers/account_validity.py +++ b/synapse/handlers/account_validity.py @@ -47,7 +47,7 @@ def __init__(self, hs: "HomeServer"): self.send_email_handler = self.hs.get_send_email_handler() self.clock = self.hs.get_clock() - self._app_name = self.hs.config.email_app_name + self._app_name = self.hs.config.email.email_app_name self._account_validity_enabled = ( hs.config.account_validity.account_validity_enabled @@ -67,18 +67,14 @@ def __init__(self, hs: "HomeServer"): and self._account_validity_renew_by_email_enabled ): # Don't do email-specific configuration if renewal by email is disabled. - self._template_html = ( - hs.config.account_validity.account_validity_template_html - ) - self._template_text = ( - hs.config.account_validity.account_validity_template_text - ) + self._template_html = hs.config.email.account_validity_template_html + self._template_text = hs.config.email.account_validity_template_text self._renew_email_subject = ( hs.config.account_validity.account_validity_renew_email_subject ) # Check the renewal emails to send and send them every 30min. - if hs.config.run_background_tasks: + if hs.config.worker.run_background_tasks: self.clock.looping_call(self._send_renewal_emails, 30 * 60 * 1000) self._is_user_expired_callbacks: List[IS_USER_EXPIRED_CALLBACK] = [] @@ -99,7 +95,7 @@ def register_account_validity_callbacks( on_legacy_send_mail: Optional[ON_LEGACY_SEND_MAIL_CALLBACK] = None, on_legacy_renew: Optional[ON_LEGACY_RENEW_CALLBACK] = None, on_legacy_admin_request: Optional[ON_LEGACY_ADMIN_REQUEST] = None, - ): + ) -> None: """Register callbacks from module for each hook.""" if is_user_expired is not None: self._is_user_expired_callbacks.append(is_user_expired) @@ -165,7 +161,7 @@ async def is_user_expired(self, user_id: str) -> bool: return False - async def on_user_registration(self, user_id: str): + async def on_user_registration(self, user_id: str) -> None: """Tell third-party modules about a user's registration. Args: @@ -249,7 +245,7 @@ async def _send_renewal_email(self, user_id: str, expiration_ts: int) -> None: renewal_token = await self._get_renewal_token(user_id) url = "%s_matrix/client/unstable/account_validity/renew?token=%s" % ( - self.hs.config.public_baseurl, + self.hs.config.server.public_baseurl, renewal_token, ) @@ -398,6 +394,7 @@ async def renew_account_for_user( """ now = self.clock.time_msec() if expiration_ts is None: + assert self._account_validity_period is not None expiration_ts = now + self._account_validity_period await self.store.set_account_validity_for_user( diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index bfa7f2c545c6..85157a138b71 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -21,18 +21,15 @@ from synapse.types import JsonDict, RoomStreamToken, StateMap, UserID from synapse.visibility import filter_events_for_client -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer logger = logging.getLogger(__name__) -class AdminHandler(BaseHandler): +class AdminHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) - + self.store = hs.get_datastore() self.storage = hs.get_storage() self.state_store = self.storage.state @@ -93,6 +90,7 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> Membership.LEAVE, Membership.BAN, Membership.INVITE, + Membership.KNOCK, ), ) @@ -125,6 +123,13 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> invited_state = invite.unsigned["invite_room_state"] writer.write_invite(room_id, invite, invited_state) + if room.membership == Membership.KNOCK: + event_id = room.event_id + knock = await self.store.get_event(event_id, allow_none=True) + if knock: + knock_state = knock.unsigned["knock_room_state"] + writer.write_knock(room_id, knock, knock_state) + continue # We only want to bother fetching events up to the last time they @@ -229,7 +234,7 @@ def write_state( @abc.abstractmethod def write_invite( - self, room_id: str, event: EventBase, state: StateMap[dict] + self, room_id: str, event: EventBase, state: StateMap[EventBase] ) -> None: """Write an invite for the room, with associated invite state. @@ -241,6 +246,20 @@ def write_invite( """ raise NotImplementedError() + @abc.abstractmethod + def write_knock( + self, room_id: str, event: EventBase, state: StateMap[EventBase] + ) -> None: + """Write a knock for the room, with associated knock state. + + Args: + room_id: The room ID the knock is for. + event: The knock event. + state: A subset of the state at the knock, with a subset of the + event keys (type, state_key content and sender). + """ + raise NotImplementedError() + @abc.abstractmethod def finished(self) -> Any: """Called when all data has successfully been exported and written. diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 21a17cd2e834..9abdad262b78 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Union from prometheus_client import Counter @@ -34,6 +34,7 @@ ) from synapse.storage.databases.main.directory import RoomAliasMapping from synapse.types import JsonDict, RoomAlias, RoomStreamToken, UserID +from synapse.util.async_helpers import Linearizer from synapse.util.metrics import Measure if TYPE_CHECKING: @@ -52,13 +53,17 @@ def __init__(self, hs: "HomeServer"): self.scheduler = hs.get_application_service_scheduler() self.started_scheduler = False self.clock = hs.get_clock() - self.notify_appservices = hs.config.notify_appservices + self.notify_appservices = hs.config.appservice.notify_appservices self.event_sources = hs.get_event_sources() self.current_max = 0 self.is_processing = False - def notify_interested_services(self, max_token: RoomStreamToken): + self._ephemeral_events_linearizer = Linearizer( + name="appservice_ephemeral_events" + ) + + def notify_interested_services(self, max_token: RoomStreamToken) -> None: """Notifies (pushes) all application services interested in this event. Pushing is done asynchronously, so this method won't block for any @@ -82,7 +87,7 @@ def notify_interested_services(self, max_token: RoomStreamToken): self._notify_interested_services(max_token) @wrap_as_background_process("notify_interested_services") - async def _notify_interested_services(self, max_token: RoomStreamToken): + async def _notify_interested_services(self, max_token: RoomStreamToken) -> None: with Measure(self.clock, "notify_interested_services"): self.is_processing = True try: @@ -100,7 +105,7 @@ async def _notify_interested_services(self, max_token: RoomStreamToken): for event in events: events_by_room.setdefault(event.room_id, []).append(event) - async def handle_event(event): + async def handle_event(event: EventBase) -> None: # Gather interested services services = await self._get_services_for_event(event) if len(services) == 0: @@ -116,9 +121,9 @@ async def handle_event(event): if not self.started_scheduler: - async def start_scheduler(): + async def start_scheduler() -> None: try: - return await self.scheduler.start() + await self.scheduler.start() except Exception: logger.error("Application Services Failure") @@ -131,11 +136,13 @@ async def start_scheduler(): now = self.clock.time_msec() ts = await self.store.get_received_ts(event.event_id) + assert ts is not None + synapse.metrics.event_processing_lag_by_event.labels( "appservice_sender" ).observe((now - ts) / 1000) - async def handle_room_events(events): + async def handle_room_events(events: Iterable[EventBase]) -> None: for event in events: await handle_event(event) @@ -166,6 +173,7 @@ async def handle_room_events(events): if events: now = self.clock.time_msec() ts = await self.store.get_received_ts(events[-1].event_id) + assert ts is not None synapse.metrics.event_processing_lag.labels( "appservice_sender" @@ -179,41 +187,71 @@ async def handle_room_events(events): def notify_interested_services_ephemeral( self, stream_key: str, - new_token: Optional[int], - users: Optional[Collection[Union[str, UserID]]] = None, - ): - """This is called by the notifier in the background - when a ephemeral event handled by the homeserver. - - This will determine which appservices - are interested in the event, and submit them. + new_token: Union[int, RoomStreamToken], + users: Collection[Union[str, UserID]], + ) -> None: + """ + This is called by the notifier in the background when an ephemeral event is handled + by the homeserver. - Events will only be pushed to appservices - that have opted into ephemeral events + This will determine which appservices are interested in the event, and submit them. Args: stream_key: The stream the event came from. - new_token: The latest stream token - users: The user(s) involved with the event. + + `stream_key` can be "typing_key", "receipt_key" or "presence_key". Any other + value for `stream_key` will cause this function to return early. + + Ephemeral events will only be pushed to appservices that have opted into + receiving them by setting `push_ephemeral` to true in their registration + file. Note that while MSC2409 is experimental, this option is called + `de.sorunome.msc2409.push_ephemeral`. + + Appservices will only receive ephemeral events that fall within their + registered user and room namespaces. + + new_token: The stream token of the event. + users: The users that should be informed of the new event, if any. """ if not self.notify_appservices: return + # Ignore any unsupported streams if stream_key not in ("typing_key", "receipt_key", "presence_key"): return + # Assert that new_token is an integer (and not a RoomStreamToken). + # All of the supported streams that this function handles use an + # integer to track progress (rather than a RoomStreamToken - a + # vector clock implementation) as they don't support multiple + # stream writers. + # + # As a result, we simply assert that new_token is an integer. + # If we do end up needing to pass a RoomStreamToken down here + # in the future, using RoomStreamToken.stream (the minimum stream + # position) to convert to an ascending integer value should work. + # Additional context: https://github.com/matrix-org/synapse/pull/11137 + assert isinstance(new_token, int) + + # Check whether there are any appservices which have registered to receive + # ephemeral events. + # + # Note that whether these events are actually relevant to these appservices + # is decided later on. services = [ service for service in self.store.get_app_services() if service.supports_ephemeral ] if not services: + # Bail out early if none of the target appservices have explicitly registered + # to receive these ephemeral events. return # We only start a new background process if necessary rather than # optimistically (to cut down on overhead). self._notify_interested_services_ephemeral( - services, stream_key, new_token, users or [] + services, stream_key, new_token, users ) @wrap_as_background_process("notify_interested_services_ephemeral") @@ -221,65 +259,161 @@ async def _notify_interested_services_ephemeral( self, services: List[ApplicationService], stream_key: str, - new_token: Optional[int], + new_token: int, users: Collection[Union[str, UserID]], - ): - logger.debug("Checking interested services for %s" % (stream_key)) + ) -> None: + logger.debug("Checking interested services for %s", stream_key) with Measure(self.clock, "notify_interested_services_ephemeral"): for service in services: - # Only handle typing if we have the latest token - if stream_key == "typing_key" and new_token is not None: + if stream_key == "typing_key": + # Note that we don't persist the token (via set_type_stream_id_for_appservice) + # for typing_key due to performance reasons and due to their highly + # ephemeral nature. + # + # Instead we simply grab the latest typing updates in _handle_typing + # and, if they apply to this application service, send it off. events = await self._handle_typing(service, new_token) if events: self.scheduler.submit_ephemeral_events_for_as(service, events) - # We don't persist the token for typing_key for performance reasons - elif stream_key == "receipt_key": - events = await self._handle_receipts(service) - if events: - self.scheduler.submit_ephemeral_events_for_as(service, events) - await self.store.set_type_stream_id_for_appservice( - service, "read_receipt", new_token - ) - elif stream_key == "presence_key": - events = await self._handle_presence(service, users) - if events: - self.scheduler.submit_ephemeral_events_for_as(service, events) - await self.store.set_type_stream_id_for_appservice( - service, "presence", new_token + continue + + # Since we read/update the stream position for this AS/stream + with ( + await self._ephemeral_events_linearizer.queue( + (service.id, stream_key) ) + ): + if stream_key == "receipt_key": + events = await self._handle_receipts(service, new_token) + if events: + self.scheduler.submit_ephemeral_events_for_as( + service, events + ) + + # Persist the latest handled stream token for this appservice + await self.store.set_type_stream_id_for_appservice( + service, "read_receipt", new_token + ) + + elif stream_key == "presence_key": + events = await self._handle_presence(service, users, new_token) + if events: + self.scheduler.submit_ephemeral_events_for_as( + service, events + ) + + # Persist the latest handled stream token for this appservice + await self.store.set_type_stream_id_for_appservice( + service, "presence", new_token + ) async def _handle_typing( self, service: ApplicationService, new_token: int ) -> List[JsonDict]: - typing_source = self.event_sources.sources["typing"] + """ + Return the typing events since the given stream token that the given application + service should receive. + + First fetch all typing events between the given typing stream token (non-inclusive) + and the latest typing event stream token (inclusive). Then return only those typing + events that the given application service may be interested in. + + Args: + service: The application service to check for which events it should receive. + new_token: A typing event stream token. + + Returns: + A list of JSON dictionaries containing data derived from the typing events that + should be sent to the given application service. + """ + typing_source = self.event_sources.sources.typing # Get the typing events from just before current typing, _ = await typing_source.get_new_events_as( service=service, # For performance reasons, we don't persist the previous - # token in the DB and instead fetch the latest typing information + # token in the DB and instead fetch the latest typing event # for appservices. + # TODO: It'd likely be more efficient to simply fetch the + # typing event with the given 'new_token' stream token and + # check if the given service was interested, rather than + # iterating over all typing events and only grabbing the + # latest few. from_key=new_token - 1, ) return typing - async def _handle_receipts(self, service: ApplicationService) -> List[JsonDict]: + async def _handle_receipts( + self, service: ApplicationService, new_token: Optional[int] + ) -> List[JsonDict]: + """ + Return the latest read receipts that the given application service should receive. + + First fetch all read receipts between the last receipt stream token that this + application service should have previously received (non-inclusive) and the + latest read receipt stream token (inclusive). Then from that set, return only + those read receipts that the given application service may be interested in. + + Args: + service: The application service to check for which events it should receive. + new_token: A receipts event stream token. Purely used to double-check that the + from_token we pull from the database isn't greater than or equal to this + token. Prevents accidentally duplicating work. + + Returns: + A list of JSON dictionaries containing data derived from the read receipts that + should be sent to the given application service. + """ from_key = await self.store.get_type_stream_id_for_appservice( service, "read_receipt" ) - receipts_source = self.event_sources.sources["receipt"] + if new_token is not None and new_token <= from_key: + logger.debug( + "Rejecting token lower than or equal to stored: %s" % (new_token,) + ) + return [] + + receipts_source = self.event_sources.sources.receipt receipts, _ = await receipts_source.get_new_events_as( service=service, from_key=from_key ) return receipts async def _handle_presence( - self, service: ApplicationService, users: Collection[Union[str, UserID]] + self, + service: ApplicationService, + users: Collection[Union[str, UserID]], + new_token: Optional[int], ) -> List[JsonDict]: + """ + Return the latest presence updates that the given application service should receive. + + First, filter the given users list to those that the application service is + interested in. Then retrieve the latest presence updates since the + the last-known previously received presence stream token for the given + application service. Return those presence updates. + + Args: + service: The application service that ephemeral events are being sent to. + users: The users that should receive the presence update. + new_token: A presence update stream token. Purely used to double-check that the + from_token we pull from the database isn't greater than or equal to this + token. Prevents accidentally duplicating work. + + Returns: + A list of json dictionaries containing data derived from the presence events + that should be sent to the given application service. + """ events: List[JsonDict] = [] - presence_source = self.event_sources.sources["presence"] + presence_source = self.event_sources.sources.presence from_key = await self.store.get_type_stream_id_for_appservice( service, "presence" ) + if new_token is not None and new_token <= from_key: + logger.debug( + "Rejecting token lower than or equal to stored: %s" % (new_token,) + ) + return [] + for user in users: if isinstance(user, str): user = UserID.from_string(user) @@ -287,9 +421,9 @@ async def _handle_presence( interested = await service.is_interested_in_presence(user, self.store) if not interested: continue + presence_events, _ = await presence_source.get_new_events( user=user, - service=service, from_key=from_key, ) time_now = self.clock.time_msec() @@ -392,9 +526,6 @@ async def get_3pe_protocols( protocols[p].append(info) def _merge_instances(infos: List[JsonDict]) -> JsonDict: - if not infos: - return {} - # Merge the 'instances' lists of multiple results, but just take # the other fields from the first as they ought to be identical # copy the result so as not to corrupt the cached one @@ -406,7 +537,9 @@ def _merge_instances(infos: List[JsonDict]) -> JsonDict: return combined - return {p: _merge_instances(protocols[p]) for p in protocols.keys()} + return { + p: _merge_instances(protocols[p]) for p in protocols.keys() if protocols[p] + } async def _get_services_for_event( self, event: EventBase diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 22a855224188..61607cf2bad7 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -18,6 +18,7 @@ import unicodedata import urllib.parse from binascii import crc32 +from http import HTTPStatus from typing import ( TYPE_CHECKING, Any, @@ -29,6 +30,7 @@ Mapping, Optional, Tuple, + Type, Union, cast, ) @@ -37,6 +39,7 @@ import bcrypt import pymacaroons import unpaddedbase64 +from pymacaroons.exceptions import MacaroonVerificationFailedException from twisted.web.server import Request @@ -51,7 +54,6 @@ UserDeactivatedError, ) from synapse.api.ratelimiting import Ratelimiter -from synapse.handlers._base import BaseHandler from synapse.handlers.ui_auth import ( INTERACTIVE_AUTH_CHECKERS, UIAuthSessionDataConstants, @@ -62,7 +64,6 @@ from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.module_api import ModuleApi from synapse.storage.roommember import ProfileInfo from synapse.types import JsonDict, Requester, UserID from synapse.util import stringutils as stringutils @@ -73,7 +74,8 @@ from synapse.util.threepids import canonicalise_email if TYPE_CHECKING: - from synapse.rest.client.v1.login import LoginResponse + from synapse.module_api import ModuleApi + from synapse.rest.client.login import LoginResponse from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -181,88 +183,59 @@ class LoginTokenAttributes: user_id = attr.ib(type=str) - # the SSO Identity Provider that the user authenticated with, to get this token auth_provider_id = attr.ib(type=str) + """The SSO Identity Provider that the user authenticated with, to get this token.""" + auth_provider_session_id = attr.ib(type=Optional[str]) + """The session ID advertised by the SSO Identity Provider.""" -class AuthHandler(BaseHandler): + +class AuthHandler: SESSION_EXPIRE_MS = 48 * 60 * 60 * 1000 def __init__(self, hs: "HomeServer"): - super().__init__(hs) - + self.store = hs.get_datastore() + self.auth = hs.get_auth() + self.clock = hs.get_clock() self.checkers: Dict[str, UserInteractiveAuthChecker] = {} for auth_checker_class in INTERACTIVE_AUTH_CHECKERS: inst = auth_checker_class(hs) if inst.is_enabled(): self.checkers[inst.AUTH_TYPE] = inst # type: ignore - self.bcrypt_rounds = hs.config.bcrypt_rounds - - # we can't use hs.get_module_api() here, because to do so will create an - # import loop. - # - # TODO: refactor this class to separate the lower-level stuff that - # ModuleApi can use from the higher-level stuff that uses ModuleApi, as - # better way to break the loop - account_handler = ModuleApi(hs, self) - - self.password_providers = [ - PasswordProvider.load(module, config, account_handler) - for module, config in hs.config.password_providers - ] + self.bcrypt_rounds = hs.config.registration.bcrypt_rounds - logger.info("Extra password_providers: %s", self.password_providers) + self.password_auth_provider = hs.get_password_auth_provider() self.hs = hs # FIXME better possibility to access registrationHandler later? self.macaroon_gen = hs.get_macaroon_generator() - self._password_enabled = hs.config.password_enabled - self._password_localdb_enabled = hs.config.password_localdb_enabled - - # start out by assuming PASSWORD is enabled; we will remove it later if not. - login_types = set() - if self._password_localdb_enabled: - login_types.add(LoginType.PASSWORD) - - for provider in self.password_providers: - login_types.update(provider.get_supported_login_types().keys()) - - if not self._password_enabled: - login_types.discard(LoginType.PASSWORD) - - # Some clients just pick the first type in the list. In this case, we want - # them to use PASSWORD (rather than token or whatever), so we want to make sure - # that comes first, where it's present. - self._supported_login_types = [] - if LoginType.PASSWORD in login_types: - self._supported_login_types.append(LoginType.PASSWORD) - login_types.remove(LoginType.PASSWORD) - self._supported_login_types.extend(login_types) + self._password_enabled = hs.config.auth.password_enabled + self._password_localdb_enabled = hs.config.auth.password_localdb_enabled # Ratelimiter for failed auth during UIA. Uses same ratelimit config # as per `rc_login.failed_attempts`. self._failed_uia_attempts_ratelimiter = Ratelimiter( store=self.store, clock=self.clock, - rate_hz=self.hs.config.rc_login_failed_attempts.per_second, - burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + rate_hz=self.hs.config.ratelimiting.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.ratelimiting.rc_login_failed_attempts.burst_count, ) # The number of seconds to keep a UI auth session active. - self._ui_auth_session_timeout = hs.config.ui_auth_session_timeout + self._ui_auth_session_timeout = hs.config.auth.ui_auth_session_timeout # Ratelimitier for failed /login attempts self._failed_login_attempts_ratelimiter = Ratelimiter( store=self.store, clock=hs.get_clock(), - rate_hz=self.hs.config.rc_login_failed_attempts.per_second, - burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + rate_hz=self.hs.config.ratelimiting.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.ratelimiting.rc_login_failed_attempts.burst_count, ) self._clock = self.hs.get_clock() # Expire old UI auth sessions after a period of time. - if hs.config.run_background_tasks: + if hs.config.worker.run_background_tasks: self._clock.looping_call( run_as_background_process, 5 * 60 * 1000, @@ -276,23 +249,25 @@ def __init__(self, hs: "HomeServer"): # after the SSO completes and before redirecting them back to their client. # It notifies the user they are about to give access to their matrix account # to the client. - self._sso_redirect_confirm_template = hs.config.sso_redirect_confirm_template + self._sso_redirect_confirm_template = ( + hs.config.sso.sso_redirect_confirm_template + ) # The following template is shown during user interactive authentication # in the fallback auth scenario. It notifies the user that they are # authenticating for an operation to occur on their account. - self._sso_auth_confirm_template = hs.config.sso_auth_confirm_template + self._sso_auth_confirm_template = hs.config.sso.sso_auth_confirm_template # The following template is shown during the SSO authentication process if # the account is deactivated. self._sso_account_deactivated_template = ( - hs.config.sso_account_deactivated_template + hs.config.sso.sso_account_deactivated_template ) - self._server_name = hs.config.server_name + self._server_name = hs.config.server.server_name # cast to tuple for use with str.startswith - self._whitelisted_sso_clients = tuple(hs.config.sso_client_whitelist) + self._whitelisted_sso_clients = tuple(hs.config.sso.sso_client_whitelist) # A mapping of user ID to extra attributes to include in the login # response. @@ -424,11 +399,10 @@ async def _get_available_ui_auth_types(self, user: UserID) -> Iterable[str]: ui_auth_types.add(LoginType.PASSWORD) # also allow auth from password providers - for provider in self.password_providers: - for t in provider.get_supported_login_types().keys(): - if t == LoginType.PASSWORD and not self._password_enabled: - continue - ui_auth_types.add(t) + for t in self.password_auth_provider.get_supported_login_types().keys(): + if t == LoginType.PASSWORD and not self._password_enabled: + continue + ui_auth_types.add(t) # if sso is enabled, allow the user to log in via SSO iff they have a mapping # from sso to mxid. @@ -439,7 +413,7 @@ async def _get_available_ui_auth_types(self, user: UserID) -> Iterable[str]: return ui_auth_types - def get_enabled_auth_types(self): + def get_enabled_auth_types(self) -> Iterable[str]: """Return the enabled user-interactive authentication types Returns the UI-Auth types which are supported by the homeserver's current @@ -461,7 +435,7 @@ async def check_ui_auth( If no auth flows have been completed successfully, raises an InteractiveAuthIncompleteError. To handle this, you can use - synapse.rest.client.v2_alpha._base.interactive_auth_handler as a + synapse.rest.client._base.interactive_auth_handler as a decorator. Args: @@ -543,7 +517,7 @@ async def check_ui_auth( # Note that the registration endpoint explicitly removes the # "initial_device_display_name" parameter if it is provided # without a "password" parameter. See the changes to - # synapse.rest.client.v2_alpha.register.RegisterRestServlet.on_POST + # synapse.rest.client.register.RegisterRestServlet.on_POST # in commit 544722bad23fc31056b9240189c3cbbbf0ffd3f9. if not clientdict: clientdict = session.clientdict @@ -627,23 +601,28 @@ async def check_ui_auth( async def add_oob_auth( self, stagetype: str, authdict: Dict[str, Any], clientip: str - ) -> bool: + ) -> None: """ Adds the result of out-of-band authentication into an existing auth session. Currently used for adding the result of fallback auth. + + Raises: + LoginError if the stagetype is unknown or the session is missing. + LoginError is raised by check_auth if authentication fails. """ if stagetype not in self.checkers: - raise LoginError(400, "", Codes.MISSING_PARAM) + raise LoginError( + 400, f"Unknown UIA stage type: {stagetype}", Codes.INVALID_PARAM + ) if "session" not in authdict: - raise LoginError(400, "", Codes.MISSING_PARAM) + raise LoginError(400, "Missing session ID", Codes.MISSING_PARAM) + # If authentication fails a LoginError is raised. Otherwise, store + # the successful result. result = await self.checkers[stagetype].check_auth(authdict, clientip) - if result: - await self.store.mark_ui_auth_stage_complete( - authdict["session"], stagetype, result - ) - return True - return False + await self.store.mark_ui_auth_stage_complete( + authdict["session"], stagetype, result + ) def get_session_id(self, clientdict: Dict[str, Any]) -> Optional[str]: """ @@ -697,7 +676,7 @@ async def get_session_data( except StoreError: raise SynapseError(400, "Unknown session ID: %s" % (session_id,)) - async def _expire_old_sessions(self): + async def _expire_old_sessions(self) -> None: """ Invalidate any user interactive authentication sessions that have expired. """ @@ -733,19 +712,19 @@ async def _check_auth_dict( return canonical_id def _get_params_recaptcha(self) -> dict: - return {"public_key": self.hs.config.recaptcha_public_key} + return {"public_key": self.hs.config.captcha.recaptcha_public_key} def _get_params_terms(self) -> dict: return { "policies": { "privacy_policy": { - "version": self.hs.config.user_consent_version, + "version": self.hs.config.consent.user_consent_version, "en": { - "name": self.hs.config.user_consent_policy_name, + "name": self.hs.config.consent.user_consent_policy_name, "url": "%s_matrix/consent?v=%s" % ( - self.hs.config.public_baseurl, - self.hs.config.user_consent_version, + self.hs.config.server.public_baseurl, + self.hs.config.consent.user_consent_version, ), }, } @@ -782,53 +761,109 @@ def _auth_dict_for_flows( async def refresh_token( self, refresh_token: str, - valid_until_ms: Optional[int], - ) -> Tuple[str, str]: + access_token_valid_until_ms: Optional[int], + refresh_token_valid_until_ms: Optional[int], + ) -> Tuple[str, str, Optional[int]]: """ Consumes a refresh token and generate both a new access token and a new refresh token from it. The consumed refresh token is considered invalid after the first use of the new access token or the new refresh token. + The lifetime of both the access token and refresh token will be capped so that they + do not exceed the session's ultimate expiry time, if applicable. + Args: refresh_token: The token to consume. - valid_until_ms: The expiration timestamp of the new access token. - + access_token_valid_until_ms: The expiration timestamp of the new access token. + None if the access token does not expire. + refresh_token_valid_until_ms: The expiration timestamp of the new refresh token. + None if the refresh token does not expire. Returns: - A tuple containing the new access token and refresh token + A tuple containing: + - the new access token + - the new refresh token + - the actual expiry time of the access token, which may be earlier than + `access_token_valid_until_ms`. """ # Verify the token signature first before looking up the token if not self._verify_refresh_token(refresh_token): - raise SynapseError(401, "invalid refresh token", Codes.UNKNOWN_TOKEN) + raise SynapseError( + HTTPStatus.UNAUTHORIZED, "invalid refresh token", Codes.UNKNOWN_TOKEN + ) existing_token = await self.store.lookup_refresh_token(refresh_token) if existing_token is None: - raise SynapseError(401, "refresh token does not exist", Codes.UNKNOWN_TOKEN) + raise SynapseError( + HTTPStatus.UNAUTHORIZED, + "refresh token does not exist", + Codes.UNKNOWN_TOKEN, + ) if ( existing_token.has_next_access_token_been_used or existing_token.has_next_refresh_token_been_refreshed ): raise SynapseError( - 403, "refresh token isn't valid anymore", Codes.FORBIDDEN + HTTPStatus.FORBIDDEN, + "refresh token isn't valid anymore", + Codes.FORBIDDEN, ) + now_ms = self._clock.time_msec() + + if existing_token.expiry_ts is not None and existing_token.expiry_ts < now_ms: + + raise SynapseError( + HTTPStatus.FORBIDDEN, + "The supplied refresh token has expired", + Codes.FORBIDDEN, + ) + + if existing_token.ultimate_session_expiry_ts is not None: + # This session has a bounded lifetime, even across refreshes. + + if access_token_valid_until_ms is not None: + access_token_valid_until_ms = min( + access_token_valid_until_ms, + existing_token.ultimate_session_expiry_ts, + ) + else: + access_token_valid_until_ms = existing_token.ultimate_session_expiry_ts + + if refresh_token_valid_until_ms is not None: + refresh_token_valid_until_ms = min( + refresh_token_valid_until_ms, + existing_token.ultimate_session_expiry_ts, + ) + else: + refresh_token_valid_until_ms = existing_token.ultimate_session_expiry_ts + if existing_token.ultimate_session_expiry_ts < now_ms: + raise SynapseError( + HTTPStatus.FORBIDDEN, + "The session has expired and can no longer be refreshed", + Codes.FORBIDDEN, + ) + ( new_refresh_token, new_refresh_token_id, - ) = await self.get_refresh_token_for_user_id( - user_id=existing_token.user_id, device_id=existing_token.device_id + ) = await self.create_refresh_token_for_user_id( + user_id=existing_token.user_id, + device_id=existing_token.device_id, + expiry_ts=refresh_token_valid_until_ms, + ultimate_session_expiry_ts=existing_token.ultimate_session_expiry_ts, ) - access_token = await self.get_access_token_for_user_id( + access_token = await self.create_access_token_for_user_id( user_id=existing_token.user_id, device_id=existing_token.device_id, - valid_until_ms=valid_until_ms, + valid_until_ms=access_token_valid_until_ms, refresh_token_id=new_refresh_token_id, ) await self.store.replace_refresh_token( existing_token.token_id, new_refresh_token_id ) - return access_token, new_refresh_token + return access_token, new_refresh_token, access_token_valid_until_ms def _verify_refresh_token(self, token: str) -> bool: """ @@ -858,10 +893,12 @@ def _verify_refresh_token(self, token: str) -> bool: return True - async def get_refresh_token_for_user_id( + async def create_refresh_token_for_user_id( self, user_id: str, device_id: str, + expiry_ts: Optional[int], + ultimate_session_expiry_ts: Optional[int], ) -> Tuple[str, int]: """ Creates a new refresh token for the user with the given user ID. @@ -869,6 +906,13 @@ async def get_refresh_token_for_user_id( Args: user_id: canonical user ID device_id: the device ID to associate with the token. + expiry_ts (milliseconds since the epoch): Time after which the + refresh token cannot be used. + If None, the refresh token never expires until it has been used. + ultimate_session_expiry_ts (milliseconds since the epoch): + Time at which the session will end and can not be extended any + further. + If None, the session can be refreshed indefinitely. Returns: The newly created refresh token and its ID in the database @@ -878,10 +922,12 @@ async def get_refresh_token_for_user_id( user_id=user_id, token=refresh_token, device_id=device_id, + expiry_ts=expiry_ts, + ultimate_session_expiry_ts=ultimate_session_expiry_ts, ) return refresh_token, refresh_token_id - async def get_access_token_for_user_id( + async def create_access_token_for_user_id( self, user_id: str, device_id: Optional[str], @@ -1010,7 +1056,7 @@ async def _find_user_id_and_pwd_hash( def can_change_password(self) -> bool: """Get whether users on this server are allowed to change or set a password. - Both `config.password_enabled` and `config.password_localdb_enabled` must be true. + Both `config.auth.password_enabled` and `config.auth.password_localdb_enabled` must be true. Note that any account (even SSO accounts) are allowed to add passwords if the above is true. @@ -1030,7 +1076,25 @@ def get_supported_login_types(self) -> Iterable[str]: Returns: login types """ - return self._supported_login_types + # Load any login types registered by modules + # This is stored in the password_auth_provider so this doesn't trigger + # any callbacks + types = list(self.password_auth_provider.get_supported_login_types().keys()) + + # This list should include PASSWORD if (either _password_localdb_enabled is + # true or if one of the modules registered it) AND _password_enabled is true + # Also: + # Some clients just pick the first type in the list. In this case, we want + # them to use PASSWORD (rather than token or whatever), so we want to make sure + # that comes first, where it's present. + if LoginType.PASSWORD in types: + types.remove(LoginType.PASSWORD) + if self._password_enabled: + types.insert(0, LoginType.PASSWORD) + elif self._password_localdb_enabled and self._password_enabled: + types.insert(0, LoginType.PASSWORD) + + return types async def validate_login( self, @@ -1209,15 +1273,20 @@ async def _validate_userid_login( known_login_type = False - for provider in self.password_providers: - supported_login_types = provider.get_supported_login_types() - if login_type not in supported_login_types: - # this password provider doesn't understand this login type - continue - + # Check if login_type matches a type registered by one of the modules + # We don't need to remove LoginType.PASSWORD from the list if password login is + # disabled, since if that were the case then by this point we know that the + # login_type is not LoginType.PASSWORD + supported_login_types = self.password_auth_provider.get_supported_login_types() + # check if the login type being used is supported by a module + if login_type in supported_login_types: + # Make a note that this login type is supported by the server known_login_type = True + # Get all the fields expected for this login types login_fields = supported_login_types[login_type] + # go through the login submission and keep track of which required fields are + # provided/not provided missing_fields = [] login_dict = {} for f in login_fields: @@ -1225,6 +1294,7 @@ async def _validate_userid_login( missing_fields.append(f) else: login_dict[f] = login_submission[f] + # raise an error if any of the expected fields for that login type weren't provided if missing_fields: raise SynapseError( 400, @@ -1232,10 +1302,15 @@ async def _validate_userid_login( % (login_type, missing_fields), ) - result = await provider.check_auth(username, login_type, login_dict) + # call all of the check_auth hooks for that login_type + # it will return a result once the first success is found (or None otherwise) + result = await self.password_auth_provider.check_auth( + username, login_type, login_dict + ) if result: return result + # if no module managed to authenticate the user, then fallback to built in password based auth if login_type == LoginType.PASSWORD and self._password_localdb_enabled: known_login_type = True @@ -1274,11 +1349,16 @@ async def check_password_provider_3pid( completed login/registration, or `None`. If authentication was unsuccessful, `user_id` and `callback` are both `None`. """ - for provider in self.password_providers: - result = await provider.check_3pid_auth(medium, address, password) - if result: - return result + # call all of the check_3pid_auth callbacks + # Result will be from the first callback that returns something other than None + # If all the callbacks return None, then result is also set to None + result = await self.password_auth_provider.check_3pid_auth( + medium, address, password + ) + if result: + return result + # if result is None then return (None, None) return None, None async def _check_local_password(self, user_id: str, password: str) -> Optional[str]: @@ -1342,12 +1422,12 @@ async def validate_short_term_login_token( try: res = self.macaroon_gen.verify_short_term_login_token(login_token) except Exception: - raise AuthError(403, "Invalid token", errcode=Codes.FORBIDDEN) + raise AuthError(403, "Invalid login token", errcode=Codes.FORBIDDEN) await self.auth.check_auth_blocking(res.user_id) return res - async def delete_access_token(self, access_token: str): + async def delete_access_token(self, access_token: str) -> None: """Invalidate a single access token Args: @@ -1357,13 +1437,12 @@ async def delete_access_token(self, access_token: str): user_info = await self.auth.get_user_by_access_token(access_token) await self.store.delete_access_token(access_token) - # see if any of our auth providers want to know about this - for provider in self.password_providers: - await provider.on_logged_out( - user_id=user_info.user_id, - device_id=user_info.device_id, - access_token=access_token, - ) + # see if any modules want to know about this + await self.password_auth_provider.on_logged_out( + user_id=user_info.user_id, + device_id=user_info.device_id, + access_token=access_token, + ) # delete pushers associated with this access token if user_info.token_id is not None: @@ -1376,7 +1455,7 @@ async def delete_access_tokens_for_user( user_id: str, except_token_id: Optional[int] = None, device_id: Optional[str] = None, - ): + ) -> None: """Invalidate access tokens belonging to a user Args: @@ -1390,12 +1469,11 @@ async def delete_access_tokens_for_user( user_id, except_token_id=except_token_id, device_id=device_id ) - # see if any of our auth providers want to know about this - for provider in self.password_providers: - for token, _, device_id in tokens_and_devices: - await provider.on_logged_out( - user_id=user_id, device_id=device_id, access_token=token - ) + # see if any modules want to know about this + for token, _, device_id in tokens_and_devices: + await self.password_auth_provider.on_logged_out( + user_id=user_id, device_id=device_id, access_token=token + ) # delete pushers associated with the access tokens await self.hs.get_pusherpool().remove_pushers_by_access_token( @@ -1404,7 +1482,7 @@ async def delete_access_tokens_for_user( async def add_threepid( self, user_id: str, medium: str, address: str, validated_at: int - ): + ) -> None: # check if medium has a valid value if medium not in ["email", "msisdn"]: raise SynapseError( @@ -1459,6 +1537,10 @@ async def delete_threepid( ) await self.store.user_delete_threepid(user_id, medium, address) + if medium == "email": + await self.store.delete_pusher_by_app_id_pushkey_user_id( + app_id="m.email", pushkey=address, user_id=user_id + ) return result async def hash(self, password: str) -> str: @@ -1471,12 +1553,12 @@ async def hash(self, password: str) -> str: Hashed password. """ - def _do_hash(): + def _do_hash() -> str: # Normalise the Unicode in the password pw = unicodedata.normalize("NFKC", password) return bcrypt.hashpw( - pw.encode("utf8") + self.hs.config.password_pepper.encode("utf8"), + pw.encode("utf8") + self.hs.config.auth.password_pepper.encode("utf8"), bcrypt.gensalt(self.bcrypt_rounds), ).decode("ascii") @@ -1495,12 +1577,12 @@ async def validate_hash( Whether self.hash(password) == stored_hash. """ - def _do_validate_hash(checked_hash: bytes): + def _do_validate_hash(checked_hash: bytes) -> bool: # Normalise the Unicode in the password pw = unicodedata.normalize("NFKC", password) return bcrypt.checkpw( - pw.encode("utf8") + self.hs.config.password_pepper.encode("utf8"), + pw.encode("utf8") + self.hs.config.auth.password_pepper.encode("utf8"), checked_hash, ) @@ -1572,7 +1654,8 @@ async def complete_sso_login( client_redirect_url: str, extra_attributes: Optional[JsonDict] = None, new_user: bool = False, - ): + auth_provider_session_id: Optional[str] = None, + ) -> None: """Having figured out a mxid for this user, complete the HTTP request Args: @@ -1587,6 +1670,7 @@ async def complete_sso_login( during successful login. Must be JSON serializable. new_user: True if we should use wording appropriate to a user who has just registered. + auth_provider_session_id: The session ID from the SSO IdP received during login. """ # If the account has been deactivated, do not proceed with the login # flow. @@ -1607,6 +1691,7 @@ async def complete_sso_login( extra_attributes, new_user=new_user, user_profile_data=profile, + auth_provider_session_id=auth_provider_session_id, ) def _complete_sso_login( @@ -1618,7 +1703,8 @@ def _complete_sso_login( extra_attributes: Optional[JsonDict] = None, new_user: bool = False, user_profile_data: Optional[ProfileInfo] = None, - ): + auth_provider_session_id: Optional[str] = None, + ) -> None: """ The synchronous portion of complete_sso_login. @@ -1639,7 +1725,9 @@ def _complete_sso_login( # Create a login token login_token = self.macaroon_gen.generate_short_term_login_token( - registered_user_id, auth_provider_id=auth_provider_id + registered_user_id, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, ) # Append the login token to the original redirect URL (i.e. with its query @@ -1717,7 +1805,7 @@ def _expire_sso_extra_attributes(self) -> None: del self._extra_attributes[user_id] @staticmethod - def add_query_param_to_url(url: str, param_name: str, param: Any): + def add_query_param_to_url(url: str, param_name: str, param: Any) -> str: url_parts = list(urllib.parse.urlparse(url)) query = urllib.parse.parse_qsl(url_parts[4], keep_blank_values=True) query.append((param_name, param)) @@ -1725,10 +1813,9 @@ def add_query_param_to_url(url: str, param_name: str, param: Any): return urllib.parse.urlunparse(url_parts) -@attr.s(slots=True) +@attr.s(slots=True, auto_attribs=True) class MacaroonGenerator: - - hs = attr.ib() + hs: "HomeServer" def generate_guest_access_token(self, user_id: str) -> str: macaroon = self._generate_base_macaroon(user_id) @@ -1745,6 +1832,7 @@ def generate_short_term_login_token( self, user_id: str, auth_provider_id: str, + auth_provider_session_id: Optional[str] = None, duration_in_ms: int = (2 * 60 * 1000), ) -> str: macaroon = self._generate_base_macaroon(user_id) @@ -1753,6 +1841,10 @@ def generate_short_term_login_token( expiry = now + duration_in_ms macaroon.add_first_party_caveat("time < %d" % (expiry,)) macaroon.add_first_party_caveat("auth_provider_id = %s" % (auth_provider_id,)) + if auth_provider_session_id is not None: + macaroon.add_first_party_caveat( + "auth_provider_session_id = %s" % (auth_provider_session_id,) + ) return macaroon.serialize() def verify_short_term_login_token(self, token: str) -> LoginTokenAttributes: @@ -1774,15 +1866,28 @@ def verify_short_term_login_token(self, token: str) -> LoginTokenAttributes: user_id = get_value_from_macaroon(macaroon, "user_id") auth_provider_id = get_value_from_macaroon(macaroon, "auth_provider_id") + auth_provider_session_id: Optional[str] = None + try: + auth_provider_session_id = get_value_from_macaroon( + macaroon, "auth_provider_session_id" + ) + except MacaroonVerificationFailedException: + pass + v = pymacaroons.Verifier() v.satisfy_exact("gen = 1") v.satisfy_exact("type = login") v.satisfy_general(lambda c: c.startswith("user_id = ")) v.satisfy_general(lambda c: c.startswith("auth_provider_id = ")) + v.satisfy_general(lambda c: c.startswith("auth_provider_session_id = ")) satisfy_expiry(v, self.hs.get_clock().time_msec) v.verify(macaroon, self.hs.config.key.macaroon_secret_key) - return LoginTokenAttributes(user_id=user_id, auth_provider_id=auth_provider_id) + return LoginTokenAttributes( + user_id=user_id, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, + ) def generate_delete_pusher_token(self, user_id: str) -> str: macaroon = self._generate_base_macaroon(user_id) @@ -1791,47 +1896,241 @@ def generate_delete_pusher_token(self, user_id: str) -> str: def _generate_base_macaroon(self, user_id: str) -> pymacaroons.Macaroon: macaroon = pymacaroons.Macaroon( - location=self.hs.config.server_name, + location=self.hs.config.server.server_name, identifier="key", - key=self.hs.config.macaroon_secret_key, + key=self.hs.config.key.macaroon_secret_key, ) macaroon.add_first_party_caveat("gen = 1") macaroon.add_first_party_caveat("user_id = %s" % (user_id,)) return macaroon -class PasswordProvider: - """Wrapper for a password auth provider module +def load_legacy_password_auth_providers(hs: "HomeServer") -> None: + module_api = hs.get_module_api() + for module, config in hs.config.authproviders.password_providers: + load_single_legacy_password_auth_provider( + module=module, config=config, api=module_api + ) + - This class abstracts out all of the backwards-compatibility hacks for - password providers, to provide a consistent interface. - """ +def load_single_legacy_password_auth_provider( + module: Type, + config: JsonDict, + api: "ModuleApi", +) -> None: + try: + provider = module(config=config, account_handler=api) + except Exception as e: + logger.error("Error while initializing %r: %s", module, e) + raise + + # All methods that the module provides should be async, but this wasn't enforced + # in the old module system, so we wrap them if needed + def async_wrapper(f: Optional[Callable]) -> Optional[Callable[..., Awaitable]]: + # f might be None if the callback isn't implemented by the module. In this + # case we don't want to register a callback at all so we return None. + if f is None: + return None - @classmethod - def load(cls, module, config, module_api: ModuleApi) -> "PasswordProvider": - try: - pp = module(config=config, account_handler=module_api) - except Exception as e: - logger.error("Error while initializing %r: %s", module, e) - raise - return cls(pp, module_api) + # We need to wrap check_password because its old form would return a boolean + # but we now want it to behave just like check_auth() and return the matrix id of + # the user if authentication succeeded or None otherwise + if f.__name__ == "check_password": + + async def wrapped_check_password( + username: str, login_type: str, login_dict: JsonDict + ) -> Optional[Tuple[str, Optional[Callable]]]: + # We've already made sure f is not None above, but mypy doesn't do well + # across function boundaries so we need to tell it f is definitely not + # None. + assert f is not None + + matrix_user_id = api.get_qualified_user_id(username) + password = login_dict["password"] + + is_valid = await f(matrix_user_id, password) + + if is_valid: + return matrix_user_id, None + + return None + + return wrapped_check_password + + # We need to wrap check_auth as in the old form it could return + # just a str, but now it must return Optional[Tuple[str, Optional[Callable]] + if f.__name__ == "check_auth": + + async def wrapped_check_auth( + username: str, login_type: str, login_dict: JsonDict + ) -> Optional[Tuple[str, Optional[Callable]]]: + # We've already made sure f is not None above, but mypy doesn't do well + # across function boundaries so we need to tell it f is definitely not + # None. + assert f is not None - def __init__(self, pp, module_api: ModuleApi): - self._pp = pp - self._module_api = module_api + result = await f(username, login_type, login_dict) - self._supported_login_types = {} + if isinstance(result, str): + return result, None - # grandfather in check_password support - if hasattr(self._pp, "check_password"): - self._supported_login_types[LoginType.PASSWORD] = ("password",) + return result + + return wrapped_check_auth + + # We need to wrap check_3pid_auth as in the old form it could return + # just a str, but now it must return Optional[Tuple[str, Optional[Callable]] + if f.__name__ == "check_3pid_auth": + + async def wrapped_check_3pid_auth( + medium: str, address: str, password: str + ) -> Optional[Tuple[str, Optional[Callable]]]: + # We've already made sure f is not None above, but mypy doesn't do well + # across function boundaries so we need to tell it f is definitely not + # None. + assert f is not None + + result = await f(medium, address, password) - g = getattr(self._pp, "get_supported_login_types", None) - if g: - self._supported_login_types.update(g()) + if isinstance(result, str): + return result, None + + return result + + return wrapped_check_3pid_auth + + def run(*args: Tuple, **kwargs: Dict) -> Awaitable: + # mypy doesn't do well across function boundaries so we need to tell it + # f is definitely not None. + assert f is not None + + return maybe_awaitable(f(*args, **kwargs)) + + return run + + # If the module has these methods implemented, then we pull them out + # and register them as hooks. + check_3pid_auth_hook: Optional[CHECK_3PID_AUTH_CALLBACK] = async_wrapper( + getattr(provider, "check_3pid_auth", None) + ) + on_logged_out_hook: Optional[ON_LOGGED_OUT_CALLBACK] = async_wrapper( + getattr(provider, "on_logged_out", None) + ) + + supported_login_types = {} + # call get_supported_login_types and add that to the dict + g = getattr(provider, "get_supported_login_types", None) + if g is not None: + # Note the old module style also called get_supported_login_types at loading time + # and it is synchronous + supported_login_types.update(g()) + + auth_checkers = {} + # Legacy modules have a check_auth method which expects to be called with one of + # the keys returned by get_supported_login_types. New style modules register a + # dictionary of login_type->check_auth_method mappings + check_auth = async_wrapper(getattr(provider, "check_auth", None)) + if check_auth is not None: + for login_type, fields in supported_login_types.items(): + # need tuple(fields) since fields can be any Iterable type (so may not be hashable) + auth_checkers[(login_type, tuple(fields))] = check_auth + + # if it has a "check_password" method then it should handle all auth checks + # with login type of LoginType.PASSWORD + check_password = async_wrapper(getattr(provider, "check_password", None)) + if check_password is not None: + # need to use a tuple here for ("password",) not a list since lists aren't hashable + auth_checkers[(LoginType.PASSWORD, ("password",))] = check_password + + api.register_password_auth_provider_callbacks( + check_3pid_auth=check_3pid_auth_hook, + on_logged_out=on_logged_out_hook, + auth_checkers=auth_checkers, + ) + + +CHECK_3PID_AUTH_CALLBACK = Callable[ + [str, str, str], + Awaitable[ + Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]] + ], +] +ON_LOGGED_OUT_CALLBACK = Callable[[str, Optional[str], str], Awaitable] +CHECK_AUTH_CALLBACK = Callable[ + [str, str, JsonDict], + Awaitable[ + Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]] + ], +] + + +class PasswordAuthProvider: + """ + A class that the AuthHandler calls when authenticating users + It allows modules to provide alternative methods for authentication + """ - def __str__(self): - return str(self._pp) + def __init__(self) -> None: + # lists of callbacks + self.check_3pid_auth_callbacks: List[CHECK_3PID_AUTH_CALLBACK] = [] + self.on_logged_out_callbacks: List[ON_LOGGED_OUT_CALLBACK] = [] + + # Mapping from login type to login parameters + self._supported_login_types: Dict[str, Iterable[str]] = {} + + # Mapping from login type to auth checker callbacks + self.auth_checker_callbacks: Dict[str, List[CHECK_AUTH_CALLBACK]] = {} + + def register_password_auth_provider_callbacks( + self, + check_3pid_auth: Optional[CHECK_3PID_AUTH_CALLBACK] = None, + on_logged_out: Optional[ON_LOGGED_OUT_CALLBACK] = None, + auth_checkers: Optional[ + Dict[Tuple[str, Tuple[str, ...]], CHECK_AUTH_CALLBACK] + ] = None, + ) -> None: + # Register check_3pid_auth callback + if check_3pid_auth is not None: + self.check_3pid_auth_callbacks.append(check_3pid_auth) + + # register on_logged_out callback + if on_logged_out is not None: + self.on_logged_out_callbacks.append(on_logged_out) + + if auth_checkers is not None: + # register a new supported login_type + # Iterate through all of the types being registered + for (login_type, fields), callback in auth_checkers.items(): + # Note: fields may be empty here. This would allow a modules auth checker to + # be called with just 'login_type' and no password or other secrets + + # Need to check that all the field names are strings or may get nasty errors later + for f in fields: + if not isinstance(f, str): + raise RuntimeError( + "A module tried to register support for login type: %s with parameters %s" + " but all parameter names must be strings" + % (login_type, fields) + ) + + # 2 modules supporting the same login type must expect the same fields + # e.g. 1 can't expect "pass" if the other expects "password" + # so throw an exception if that happens + if login_type not in self._supported_login_types.get(login_type, []): + self._supported_login_types[login_type] = fields + else: + fields_currently_supported = self._supported_login_types.get( + login_type + ) + if fields_currently_supported != fields: + raise RuntimeError( + "A module tried to register support for login type: %s with parameters %s" + " but another module had already registered support for that type with parameters %s" + % (login_type, fields, fields_currently_supported) + ) + + # Add the new method to the list of auth_checker_callbacks for this login type + self.auth_checker_callbacks.setdefault(login_type, []).append(callback) def get_supported_login_types(self) -> Mapping[str, Iterable[str]]: """Get the login types supported by this password provider @@ -1839,20 +2138,15 @@ def get_supported_login_types(self) -> Mapping[str, Iterable[str]]: Returns a map from a login type identifier (such as m.login.password) to an iterable giving the fields which must be provided by the user in the submission to the /login API. - - This wrapper adds m.login.password to the list if the underlying password - provider supports the check_password() api. """ + return self._supported_login_types async def check_auth( self, username: str, login_type: str, login_dict: JsonDict - ) -> Optional[Tuple[str, Optional[Callable]]]: + ) -> Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]]: """Check if the user has presented valid login credentials - This wrapper also calls check_password() if the underlying password provider - supports the check_password() api and the login type is m.login.password. - Args: username: user id presented by the client. Either an MXID or an unqualified username. @@ -1866,63 +2160,130 @@ async def check_auth( user, and `callback` is an optional callback which will be called with the result from the /login call (including access_token, device_id, etc.) """ - # first grandfather in a call to check_password - if login_type == LoginType.PASSWORD: - g = getattr(self._pp, "check_password", None) - if g: - qualified_user_id = self._module_api.get_qualified_user_id(username) - is_valid = await self._pp.check_password( - qualified_user_id, login_dict["password"] - ) - if is_valid: - return qualified_user_id, None - g = getattr(self._pp, "check_auth", None) - if not g: - return None - result = await g(username, login_type, login_dict) + # Go through all callbacks for the login type until one returns with a value + # other than None (i.e. until a callback returns a success) + for callback in self.auth_checker_callbacks[login_type]: + try: + result = await callback(username, login_type, login_dict) + except Exception as e: + logger.warning("Failed to run module API callback %s: %s", callback, e) + continue - # Check if the return value is a str or a tuple - if isinstance(result, str): - # If it's a str, set callback function to None - return result, None + if result is not None: + # Check that the callback returned a Tuple[str, Optional[Callable]] + # "type: ignore[unreachable]" is used after some isinstance checks because mypy thinks + # result is always the right type, but as it is 3rd party code it might not be + + if not isinstance(result, tuple) or len(result) != 2: + logger.warning( + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue - return result + # pull out the two parts of the tuple so we can do type checking + str_result, callback_result = result + + # the 1st item in the tuple should be a str + if not isinstance(str_result, str): + logger.warning( # type: ignore[unreachable] + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # the second should be Optional[Callable] + if callback_result is not None: + if not callable(callback_result): + logger.warning( # type: ignore[unreachable] + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # The result is a (str, Optional[callback]) tuple so return the successful result + return result + + # If this point has been reached then none of the callbacks successfully authenticated + # the user so return None + return None async def check_3pid_auth( self, medium: str, address: str, password: str - ) -> Optional[Tuple[str, Optional[Callable]]]: - g = getattr(self._pp, "check_3pid_auth", None) - if not g: - return None - + ) -> Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]]: # This function is able to return a deferred that either # resolves None, meaning authentication failure, or upon # success, to a str (which is the user_id) or a tuple of # (user_id, callback_func), where callback_func should be run # after we've finished everything else - result = await g(medium, address, password) - # Check if the return value is a str or a tuple - if isinstance(result, str): - # If it's a str, set callback function to None - return result, None + for callback in self.check_3pid_auth_callbacks: + try: + result = await callback(medium, address, password) + except Exception as e: + logger.warning("Failed to run module API callback %s: %s", callback, e) + continue - return result + if result is not None: + # Check that the callback returned a Tuple[str, Optional[Callable]] + # "type: ignore[unreachable]" is used after some isinstance checks because mypy thinks + # result is always the right type, but as it is 3rd party code it might not be + + if not isinstance(result, tuple) or len(result) != 2: + logger.warning( + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # pull out the two parts of the tuple so we can do type checking + str_result, callback_result = result + + # the 1st item in the tuple should be a str + if not isinstance(str_result, str): + logger.warning( # type: ignore[unreachable] + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # the second should be Optional[Callable] + if callback_result is not None: + if not callable(callback_result): + logger.warning( # type: ignore[unreachable] + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # The result is a (str, Optional[callback]) tuple so return the successful result + return result + + # If this point has been reached then none of the callbacks successfully authenticated + # the user so return None + return None async def on_logged_out( self, user_id: str, device_id: Optional[str], access_token: str ) -> None: - g = getattr(self._pp, "on_logged_out", None) - if not g: - return - # This might return an awaitable, if it does block the log out - # until it completes. - await maybe_awaitable( - g( - user_id=user_id, - device_id=device_id, - access_token=access_token, - ) - ) + # call all of the on_logged_out callbacks + for callback in self.on_logged_out_callbacks: + try: + callback(user_id, device_id, access_token) + except Exception as e: + logger.warning("Failed to run module API callback %s: %s", callback, e) + continue diff --git a/synapse/handlers/cas.py b/synapse/handlers/cas.py index 0325f86e20a1..5d8f6c50a949 100644 --- a/synapse/handlers/cas.py +++ b/synapse/handlers/cas.py @@ -34,20 +34,20 @@ class CasError(Exception): """Used to catch errors when validating the CAS ticket.""" - def __init__(self, error, error_description=None): + def __init__(self, error: str, error_description: Optional[str] = None): self.error = error self.error_description = error_description - def __str__(self): + def __str__(self) -> str: if self.error_description: return f"{self.error}: {self.error_description}" return self.error -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class CasResponse: - username = attr.ib(type=str) - attributes = attr.ib(type=Dict[str, List[Optional[str]]]) + username: str + attributes: Dict[str, List[Optional[str]]] class CasHandler: @@ -65,10 +65,10 @@ def __init__(self, hs: "HomeServer"): self._auth_handler = hs.get_auth_handler() self._registration_handler = hs.get_registration_handler() - self._cas_server_url = hs.config.cas_server_url - self._cas_service_url = hs.config.cas_service_url - self._cas_displayname_attribute = hs.config.cas_displayname_attribute - self._cas_required_attributes = hs.config.cas_required_attributes + self._cas_server_url = hs.config.cas.cas_server_url + self._cas_service_url = hs.config.cas.cas_service_url + self._cas_displayname_attribute = hs.config.cas.cas_displayname_attribute + self._cas_required_attributes = hs.config.cas.cas_required_attributes self._http_client = hs.get_proxied_http_client() @@ -82,7 +82,6 @@ def __init__(self, hs: "HomeServer"): # the SsoIdentityProvider protocol type. self.idp_icon = None self.idp_brand = None - self.unstable_idp_brand = None self._sso_handler = hs.get_sso_handler() @@ -134,11 +133,9 @@ async def _validate_ticket( body = pde.response except HttpResponseException as e: description = ( - ( - 'Authorization server responded with a "{status}" error ' - "while exchanging the authorization code." - ).format(status=e.code), - ) + 'Authorization server responded with a "{status}" error ' + "while exchanging the authorization code." + ).format(status=e.code) raise CasError("server_error", description) from e return self._parse_cas_response(body) diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 45d2404ddebf..bee62cf36088 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -19,19 +19,17 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import Requester, UserID, create_requester -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer logger = logging.getLogger(__name__) -class DeactivateAccountHandler(BaseHandler): +class DeactivateAccountHandler: """Handler which deals with deactivating user accounts.""" def __init__(self, hs: "HomeServer"): - super().__init__(hs) + self.store = hs.get_datastore() self.hs = hs self._auth_handler = hs.get_auth_handler() self._device_handler = hs.get_device_handler() @@ -46,7 +44,7 @@ def __init__(self, hs: "HomeServer"): # Start the user parter loop so it can resume parting users from rooms where # it left off (if it has work left to do). - if hs.config.run_background_tasks: + if hs.config.worker.run_background_tasks: hs.get_reactor().callWhenRunning(self._start_user_parting) self._account_validity_enabled = ( @@ -131,7 +129,7 @@ async def deactivate_account( await self.store.add_user_pending_deactivation(user_id) # delete from user directory - await self.user_directory_handler.handle_user_deactivated(user_id) + await self.user_directory_handler.handle_local_user_deactivated(user_id) # Mark the user as erased, if they asked for that if erase_data: @@ -255,16 +253,16 @@ async def activate_account(self, user_id: str) -> None: Args: user_id: ID of user to be re-activated """ - # Add the user to the directory, if necessary. user = UserID.from_string(user_id) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(user.localpart) - await self.user_directory_handler.handle_local_profile_change( - user_id, profile - ) # Ensure the user is not marked as erased. await self.store.mark_user_not_erased(user_id) # Mark the user as active. await self.store.set_user_deactivated_status(user_id, False) + + # Add the user to the directory, if necessary. Note that + # this must be done after the user is re-activated, because + # deactivated users are excluded from the user directory. + profile = await self.store.get_profileinfo(user.localpart) + await self.user_directory_handler.handle_local_profile_change(user_id, profile) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 46ee83440741..82ee11e921e6 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -14,7 +14,18 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Dict, + Iterable, + List, + Mapping, + Optional, + Set, + Tuple, +) from synapse.api import errors from synapse.api.constants import EventTypes @@ -40,8 +51,6 @@ from synapse.util.metrics import measure_func from synapse.util.retryutils import NotRetryingDestination -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer @@ -50,14 +59,16 @@ MAX_DEVICE_DISPLAY_NAME_LEN = 100 -class DeviceWorkerHandler(BaseHandler): +class DeviceWorkerHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) - + self.clock = hs.get_clock() self.hs = hs + self.store = hs.get_datastore() + self.notifier = hs.get_notifier() self.state = hs.get_state_handler() self.state_store = hs.get_storage().state self._auth_handler = hs.get_auth_handler() + self.server_name = hs.hostname @trace async def get_devices_by_user(self, user_id: str) -> List[JsonDict]: @@ -267,7 +278,7 @@ def __init__(self, hs: "HomeServer"): hs.get_distributor().observe("user_left_room", self.user_left_room) - def _check_device_name_length(self, name: Optional[str]): + def _check_device_name_length(self, name: Optional[str]) -> None: """ Checks whether a device name is longer than the maximum allowed length. @@ -290,6 +301,8 @@ async def check_device_registered( user_id: str, device_id: Optional[str], initial_device_display_name: Optional[str] = None, + auth_provider_id: Optional[str] = None, + auth_provider_session_id: Optional[str] = None, ) -> str: """ If the given device has not been registered, register it with the @@ -301,6 +314,8 @@ async def check_device_registered( user_id: @user:id device_id: device id supplied by client initial_device_display_name: device display name from client + auth_provider_id: The SSO IdP the user used, if any. + auth_provider_session_id: The session ID (sid) got from the SSO IdP. Returns: device id (generated if none was supplied) """ @@ -312,6 +327,8 @@ async def check_device_registered( user_id=user_id, device_id=device_id, initial_device_display_name=initial_device_display_name, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, ) if new_device: await self.notify_device_update(user_id, [device_id]) @@ -326,6 +343,8 @@ async def check_device_registered( user_id=user_id, device_id=new_device_id, initial_device_display_name=initial_device_display_name, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, ) if new_device: await self.notify_device_update(user_id, [new_device_id]) @@ -443,6 +462,10 @@ async def notify_device_update( ) -> None: """Notify that a user's device(s) has changed. Pokes the notifier, and remote servers if the user is local. + + Args: + user_id: The Matrix ID of the user who's device list has been updated. + device_ids: The device IDs that have changed. """ if not device_ids: # No changes to notify about, so this is a no-op. @@ -595,7 +618,7 @@ async def rehydrate_device( def _update_device_from_client_ips( - device: JsonDict, client_ips: Dict[Tuple[str, str], JsonDict] + device: JsonDict, client_ips: Mapping[Tuple[str, str], Mapping[str, Any]] ) -> None: ip = client_ips.get((device["user_id"], device["device_id"]), {}) device.update({"last_seen_ts": ip.get("last_seen"), "last_seen_ip": ip.get("ip")}) diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index 679b47f0815f..b582266af908 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -84,11 +84,18 @@ def __init__(self, hs: "HomeServer"): self._ratelimiter = Ratelimiter( store=self.store, clock=hs.get_clock(), - rate_hz=hs.config.rc_key_requests.per_second, - burst_count=hs.config.rc_key_requests.burst_count, + rate_hz=hs.config.ratelimiting.rc_key_requests.per_second, + burst_count=hs.config.ratelimiting.rc_key_requests.burst_count, ) async def on_direct_to_device_edu(self, origin: str, content: JsonDict) -> None: + """ + Handle receiving to-device messages from remote homeservers. + + Args: + origin: The remote homeserver. + content: The JSON dictionary containing the to-device messages. + """ local_messages = {} sender_user_id = content["sender"] if origin != get_domain_from_id(sender_user_id): @@ -135,12 +142,16 @@ async def on_direct_to_device_edu(self, origin: str, content: JsonDict) -> None: message_type, sender_user_id, by_device ) - stream_id = await self.store.add_messages_from_remote_to_device_inbox( + # Add messages to the database. + # Retrieve the stream id of the last-processed to-device message. + last_stream_id = await self.store.add_messages_from_remote_to_device_inbox( origin, message_id, local_messages ) + # Notify listeners that there are new to-device messages to process, + # handing them the latest stream id. self.notifier.on_new_event( - "to_device_key", stream_id, users=local_messages.keys() + "to_device_key", last_stream_id, users=local_messages.keys() ) async def _check_for_unknown_devices( @@ -195,6 +206,14 @@ async def send_device_message( message_type: str, messages: Dict[str, Dict[str, JsonDict]], ) -> None: + """ + Handle a request from a user to send to-device message(s). + + Args: + requester: The user that is sending the to-device messages. + message_type: The type of to-device messages that are being sent. + messages: A dictionary containing recipients mapped to messages intended for them. + """ sender_user_id = requester.user.to_string() message_id = random_string(16) @@ -257,12 +276,16 @@ async def send_device_message( "org.matrix.opentracing_context": json_encoder.encode(context), } - stream_id = await self.store.add_messages_to_device_inbox( + # Add messages to the database. + # Retrieve the stream id of the last-processed to-device message. + last_stream_id = await self.store.add_messages_to_device_inbox( local_messages, remote_edu_contents ) + # Notify listeners that there are new to-device messages to process, + # handing them the latest stream id. self.notifier.on_new_event( - "to_device_key", stream_id, users=local_messages.keys() + "to_device_key", last_stream_id, users=local_messages.keys() ) if self.federation_sender: diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index d487fee627f2..7ee5c47fd96b 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -31,26 +31,25 @@ from synapse.storage.databases.main.directory import RoomAliasMapping from synapse.types import JsonDict, Requester, RoomAlias, UserID, get_domain_from_id -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer logger = logging.getLogger(__name__) -class DirectoryHandler(BaseHandler): +class DirectoryHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) - + self.auth = hs.get_auth() + self.hs = hs self.state = hs.get_state_handler() self.appservice_handler = hs.get_application_service_handler() self.event_creation_handler = hs.get_event_creation_handler() self.store = hs.get_datastore() self.config = hs.config - self.enable_room_list_search = hs.config.enable_room_list_search - self.require_membership = hs.config.require_membership_for_aliases + self.enable_room_list_search = hs.config.roomdirectory.enable_room_list_search + self.require_membership = hs.config.server.require_membership_for_aliases self.third_party_event_rules = hs.get_third_party_event_rules() + self.server_name = hs.hostname self.federation = hs.get_federation_client() hs.get_federation_registry().register_query_handler( @@ -143,10 +142,10 @@ async def create_association( ): raise AuthError(403, "This user is not permitted to create this alias") - if not self.config.is_alias_creation_allowed( + if not self.config.roomdirectory.is_alias_creation_allowed( user_id, room_id, room_alias_str ): - # Lets just return a generic message, as there may be all sorts of + # Let's just return a generic message, as there may be all sorts of # reasons why we said no. TODO: Allow configurable error messages # per alias creation rule? raise SynapseError(403, "Not allowed to create alias") @@ -205,6 +204,10 @@ async def delete_association( ) room_id = await self._delete_association(room_alias) + if room_id is None: + # It's possible someone else deleted the association after the + # checks above, but before we did the deletion. + raise NotFoundError("Unknown room alias") try: await self._update_canonical_alias(requester, user_id, room_id, room_alias) @@ -226,7 +229,7 @@ async def delete_appservice_association( ) await self._delete_association(room_alias) - async def _delete_association(self, room_alias: RoomAlias) -> str: + async def _delete_association(self, room_alias: RoomAlias) -> Optional[str]: if not self.hs.is_mine(room_alias): raise SynapseError(400, "Room alias must be local") @@ -246,7 +249,7 @@ async def get_association(self, room_alias: RoomAlias) -> JsonDict: servers = result.servers else: try: - fed_result = await self.federation.make_query( + fed_result: Optional[JsonDict] = await self.federation.make_query( destination=room_alias.domain, query_type="directory", args={"room_alias": room_alias.to_string()}, @@ -459,10 +462,10 @@ async def edit_published_room_list( if canonical_alias: room_aliases.append(canonical_alias) - if not self.config.is_publishing_room_allowed( + if not self.config.roomdirectory.is_publishing_room_allowed( user_id, room_id, room_aliases ): - # Lets just return a generic message, as there may be all sorts of + # Let's just return a generic message, as there may be all sorts of # reasons why we said no. TODO: Allow configurable error messages # per alias creation rule? raise SynapseError(403, "Not allowed to publish room") diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index d92370859fb6..60c11e3d2128 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -57,7 +57,7 @@ def __init__(self, hs: "HomeServer"): federation_registry = hs.get_federation_registry() - self._is_master = hs.config.worker_app is None + self._is_master = hs.config.worker.worker_app is None if not self._is_master: self._user_device_resync_client = ( ReplicationUserDevicesResyncRestServlet.make_client(hs) @@ -201,95 +201,19 @@ async def query_devices( r[user_id] = remote_queries[user_id] # Now fetch any devices that we don't have in our cache - @trace - async def do_remote_query(destination): - """This is called when we are querying the device list of a user on - a remote homeserver and their device list is not in the device list - cache. If we share a room with this user and we're not querying for - specific user we will update the cache with their device list. - """ - - destination_query = remote_queries_not_in_cache[destination] - - # We first consider whether we wish to update the device list cache with - # the users device list. We want to track a user's devices when the - # authenticated user shares a room with the queried user and the query - # has not specified a particular device. - # If we update the cache for the queried user we remove them from further - # queries. We use the more efficient batched query_client_keys for all - # remaining users - user_ids_updated = [] - for (user_id, device_list) in destination_query.items(): - if user_id in user_ids_updated: - continue - - if device_list: - continue - - room_ids = await self.store.get_rooms_for_user(user_id) - if not room_ids: - continue - - # We've decided we're sharing a room with this user and should - # probably be tracking their device lists. However, we haven't - # done an initial sync on the device list so we do it now. - try: - if self._is_master: - user_devices = await self.device_handler.device_list_updater.user_device_resync( - user_id - ) - else: - user_devices = await self._user_device_resync_client( - user_id=user_id - ) - - user_devices = user_devices["devices"] - user_results = results.setdefault(user_id, {}) - for device in user_devices: - user_results[device["device_id"]] = device["keys"] - user_ids_updated.append(user_id) - except Exception as e: - failures[destination] = _exception_to_failure(e) - - if len(destination_query) == len(user_ids_updated): - # We've updated all the users in the query and we do not need to - # make any further remote calls. - return - - # Remove all the users from the query which we have updated - for user_id in user_ids_updated: - destination_query.pop(user_id) - - try: - remote_result = await self.federation.query_client_keys( - destination, {"device_keys": destination_query}, timeout=timeout - ) - - for user_id, keys in remote_result["device_keys"].items(): - if user_id in destination_query: - results[user_id] = keys - - if "master_keys" in remote_result: - for user_id, key in remote_result["master_keys"].items(): - if user_id in destination_query: - cross_signing_keys["master_keys"][user_id] = key - - if "self_signing_keys" in remote_result: - for user_id, key in remote_result["self_signing_keys"].items(): - if user_id in destination_query: - cross_signing_keys["self_signing_keys"][user_id] = key - - except Exception as e: - failure = _exception_to_failure(e) - failures[destination] = failure - set_tag("error", True) - set_tag("reason", failure) - await make_deferred_yieldable( defer.gatherResults( [ - run_in_background(do_remote_query, destination) - for destination in remote_queries_not_in_cache + run_in_background( + self._query_devices_for_destination, + results, + cross_signing_keys, + failures, + destination, + queries, + timeout, + ) + for destination, queries in remote_queries_not_in_cache.items() ], consumeErrors=True, ).addErrback(unwrapFirstError) @@ -301,6 +225,121 @@ async def do_remote_query(destination): return ret + @trace + async def _query_devices_for_destination( + self, + results: JsonDict, + cross_signing_keys: JsonDict, + failures: Dict[str, JsonDict], + destination: str, + destination_query: Dict[str, Iterable[str]], + timeout: int, + ) -> None: + """This is called when we are querying the device list of a user on + a remote homeserver and their device list is not in the device list + cache. If we share a room with this user and we're not querying for + specific user we will update the cache with their device list. + + Args: + results: A map from user ID to their device keys, which gets + updated with the newly fetched keys. + cross_signing_keys: Map from user ID to their cross signing keys, + which gets updated with the newly fetched keys. + failures: Map of destinations to failures that have occurred while + attempting to fetch keys. + destination: The remote server to query + destination_query: The query dict of devices to query the remote + server for. + timeout: The timeout for remote HTTP requests. + """ + + # We first consider whether we wish to update the device list cache with + # the users device list. We want to track a user's devices when the + # authenticated user shares a room with the queried user and the query + # has not specified a particular device. + # If we update the cache for the queried user we remove them from further + # queries. We use the more efficient batched query_client_keys for all + # remaining users + user_ids_updated = [] + for (user_id, device_list) in destination_query.items(): + if user_id in user_ids_updated: + continue + + if device_list: + continue + + room_ids = await self.store.get_rooms_for_user(user_id) + if not room_ids: + continue + + # We've decided we're sharing a room with this user and should + # probably be tracking their device lists. However, we haven't + # done an initial sync on the device list so we do it now. + try: + if self._is_master: + resync_results = await self.device_handler.device_list_updater.user_device_resync( + user_id + ) + else: + resync_results = await self._user_device_resync_client( + user_id=user_id + ) + + # Add the device keys to the results. + user_devices = resync_results["devices"] + user_results = results.setdefault(user_id, {}) + for device in user_devices: + user_results[device["device_id"]] = device["keys"] + user_ids_updated.append(user_id) + + # Add any cross signing keys to the results. + master_key = resync_results.get("master_key") + self_signing_key = resync_results.get("self_signing_key") + + if master_key: + cross_signing_keys["master_keys"][user_id] = master_key + + if self_signing_key: + cross_signing_keys["self_signing_keys"][user_id] = self_signing_key + except Exception as e: + failures[destination] = _exception_to_failure(e) + + if len(destination_query) == len(user_ids_updated): + # We've updated all the users in the query and we do not need to + # make any further remote calls. + return + + # Remove all the users from the query which we have updated + for user_id in user_ids_updated: + destination_query.pop(user_id) + + try: + remote_result = await self.federation.query_client_keys( + destination, {"device_keys": destination_query}, timeout=timeout + ) + + for user_id, keys in remote_result["device_keys"].items(): + if user_id in destination_query: + results[user_id] = keys + + if "master_keys" in remote_result: + for user_id, key in remote_result["master_keys"].items(): + if user_id in destination_query: + cross_signing_keys["master_keys"][user_id] = key + + if "self_signing_keys" in remote_result: + for user_id, key in remote_result["self_signing_keys"].items(): + if user_id in destination_query: + cross_signing_keys["self_signing_keys"][user_id] = key + + except Exception as e: + failure = _exception_to_failure(e) + failures[destination] = failure + set_tag("error", True) + set_tag("reason", failure) + + return + async def get_cross_signing_keys_from_cache( self, query: Iterable[str], from_user_id: Optional[str] ) -> Dict[str, Dict[str, dict]]: @@ -447,7 +486,7 @@ async def claim_one_time_keys( } @trace - async def claim_client_keys(destination): + async def claim_client_keys(destination: str) -> None: set_tag("destination", destination) device_keys = remote_queries[destination] try: diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py index cab7cbd6df59..a6d9970f2db6 100644 --- a/synapse/handlers/event_auth.py +++ b/synapse/handlers/event_auth.py @@ -22,9 +22,11 @@ RestrictedJoinRuleTypes, ) from synapse.api.errors import AuthError, Codes, SynapseError -from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion +from synapse.api.room_versions import RoomVersion +from synapse.event_auth import check_auth_rules_for_event from synapse.events import EventBase from synapse.events.builder import EventBuilder +from synapse.events.snapshot import EventContext from synapse.types import StateMap, get_domain_from_id from synapse.util.metrics import Measure @@ -44,14 +46,15 @@ def __init__(self, hs: "HomeServer"): self._store = hs.get_datastore() self._server_name = hs.hostname - async def check_from_context( - self, room_version: str, event, context, do_sig_check=True + async def check_auth_rules_from_context( + self, + room_version_obj: RoomVersion, + event: EventBase, + context: EventContext, ) -> None: + """Check an event passes the auth rules at its own auth events""" auth_event_ids = event.auth_event_ids() auth_events_by_id = await self._store.get_events(auth_event_ids) - auth_events = {(e.type, e.state_key): e for e in auth_events_by_id.values()} - - room_version_obj = KNOWN_ROOM_VERSIONS[room_version] # Allow server admin to change room configuration if ( @@ -69,9 +72,7 @@ async def check_from_context( ): logger.debug("Allowing! %s", event) else: - event_auth.check( - room_version_obj, event, auth_events=auth_events, do_sig_check=do_sig_check - ) + check_auth_rules_for_event(room_version_obj, event, auth_events_by_id.values()) def compute_auth_events( self, @@ -230,7 +231,7 @@ async def check_restricted_join_rules( raise AuthError( 403, - "You do not belong to any of the required rooms to join this room.", + "You do not belong to any of the required rooms/spaces to join this room.", ) async def has_restricted_join_rules( diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py index 4b3f037072d9..32b0254c5f08 100644 --- a/synapse/handlers/events.py +++ b/synapse/handlers/events.py @@ -25,8 +25,6 @@ from synapse.types import JsonDict, UserID from synapse.visibility import filter_events_for_client -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer @@ -34,11 +32,11 @@ logger = logging.getLogger(__name__) -class EventStreamHandler(BaseHandler): +class EventStreamHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) - + self.store = hs.get_datastore() self.clock = hs.get_clock() + self.hs = hs self.notifier = hs.get_notifier() self.state = hs.get_state_handler() @@ -124,8 +122,7 @@ async def get_stream( events, time_now, as_client_event=as_client_event, - # We don't bundle "live" events, as otherwise clients - # will end up double counting annotations. + # Don't bundle aggregations as this is a deprecated API. bundle_aggregations=False, ) @@ -138,9 +135,9 @@ async def get_stream( return chunk -class EventHandler(BaseHandler): +class EventHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) + self.store = hs.get_datastore() self.storage = hs.get_storage() async def get_event( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8197b60b7673..1ea837d08211 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -15,25 +15,10 @@ """Contains handlers for federation events.""" -import itertools import logging -from collections.abc import Container from http import HTTPStatus -from typing import ( - TYPE_CHECKING, - Collection, - Dict, - Iterable, - List, - Optional, - Sequence, - Set, - Tuple, - Union, -) +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union -import attr -from prometheus_client import Counter from signedjson.key import decode_verify_key_bytes from signedjson.sign import verify_signed_json from unpaddedbase64 import decode_base64 @@ -41,30 +26,24 @@ from twisted.internet import defer from synapse import event_auth -from synapse.api.constants import ( - EventTypes, - Membership, - RejectedReason, - RoomEncryptionAlgorithms, -) +from synapse.api.constants import EventContentFields, EventTypes, Membership from synapse.api.errors import ( AuthError, CodeMessageException, Codes, FederationDeniedError, - FederationError, HttpResponseException, NotFoundError, RequestSendFailed, SynapseError, ) -from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion, RoomVersions +from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion from synapse.crypto.event_signing import compute_event_signature -from synapse.event_auth import auth_types_for_event +from synapse.event_auth import validate_event_for_room_version from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.events.validator import EventValidator -from synapse.handlers._base import BaseHandler +from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import ( make_deferred_yieldable, @@ -73,28 +52,14 @@ run_in_background, ) from synapse.logging.utils import log_function -from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, - ReplicationFederationSendEventsRestServlet, ReplicationStoreRoomOnOutlierMembershipRestServlet, ) -from synapse.state import StateResolutionStore from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.types import ( - JsonDict, - MutableStateMap, - PersistedEventPosition, - RoomStreamToken, - StateMap, - UserID, - get_domain_from_id, -) -from synapse.util.async_helpers import Linearizer, concurrently_execute -from synapse.util.iterutils import batch_iter +from synapse.types import JsonDict, StateMap, get_domain_from_id +from synapse.util.async_helpers import Linearizer from synapse.util.retryutils import NotRetryingDestination -from synapse.util.stringutils import shortstr from synapse.visibility import filter_events_for_server if TYPE_CHECKING: @@ -102,43 +67,45 @@ logger = logging.getLogger(__name__) -soft_failed_event_counter = Counter( - "synapse_federation_soft_failed_events_total", - "Events received over federation that we marked as soft_failed", -) - - -@attr.s(slots=True) -class _NewEventInfo: - """Holds information about a received event, ready for passing to _auth_and_persist_events - Attributes: - event: the received event +def get_domains_from_state(state: StateMap[EventBase]) -> List[Tuple[str, int]]: + """Get joined domains from state - state: the state at that event + Args: + state: State map from type/state key to event. - auth_events: the auth_event map for that event + Returns: + Returns a list of servers with the lowest depth of their joins. + Sorted by lowest depth first. """ + joined_users = [ + (state_key, int(event.depth)) + for (e_type, state_key), event in state.items() + if e_type == EventTypes.Member and event.membership == Membership.JOIN + ] + + joined_domains: Dict[str, int] = {} + for u, d in joined_users: + try: + dom = get_domain_from_id(u) + old_d = joined_domains.get(dom) + if old_d: + joined_domains[dom] = min(d, old_d) + else: + joined_domains[dom] = d + except Exception: + pass + + return sorted(joined_domains.items(), key=lambda d: d[1]) - event = attr.ib(type=EventBase) - state = attr.ib(type=Optional[Sequence[EventBase]], default=None) - auth_events = attr.ib(type=Optional[MutableStateMap[EventBase]], default=None) +class FederationHandler: + """Handles general incoming federation requests -class FederationHandler(BaseHandler): - """Handles events that originated from federation. - Responsible for: - a) handling received Pdus before handing them on as Events to the rest - of the homeserver (including auth and state conflict resolutions) - b) converting events that were produced by local clients that may need - to be sent to remote homeservers. - c) doing the necessary dances to invite remote users and join remote - rooms. + Incoming events are *not* handled here, for which see FederationEventHandler. """ def __init__(self, hs: "HomeServer"): - super().__init__(hs) - self.hs = hs self.store = hs.get_datastore() @@ -146,896 +113,36 @@ def __init__(self, hs: "HomeServer"): self.state_store = self.storage.state self.federation_client = hs.get_federation_client() self.state_handler = hs.get_state_handler() - self._state_resolution_handler = hs.get_state_resolution_handler() self.server_name = hs.hostname self.keyring = hs.get_keyring() - self.action_generator = hs.get_action_generator() self.is_mine_id = hs.is_mine_id self.spam_checker = hs.get_spam_checker() self.event_creation_handler = hs.get_event_creation_handler() + self.event_builder_factory = hs.get_event_builder_factory() self._event_auth_handler = hs.get_event_auth_handler() - self._message_handler = hs.get_message_handler() - self._server_notices_mxid = hs.config.server_notices_mxid + self._server_notices_mxid = hs.config.servernotices.server_notices_mxid self.config = hs.config self.http_client = hs.get_proxied_blacklisted_http_client() - self._instance_name = hs.get_instance_name() self._replication = hs.get_replication_data_handler() + self._federation_event_handler = hs.get_federation_event_handler() - self._send_events = ReplicationFederationSendEventsRestServlet.make_client(hs) self._clean_room_for_join_client = ReplicationCleanRoomRestServlet.make_client( hs ) - if hs.config.worker_app: - self._user_device_resync = ( - ReplicationUserDevicesResyncRestServlet.make_client(hs) - ) + if hs.config.worker.worker_app: self._maybe_store_room_on_outlier_membership = ( ReplicationStoreRoomOnOutlierMembershipRestServlet.make_client(hs) ) else: - self._device_list_updater = hs.get_device_handler().device_list_updater self._maybe_store_room_on_outlier_membership = ( self.store.maybe_store_room_on_outlier_membership ) - # When joining a room we need to queue any events for that room up. - # For each room, a list of (pdu, origin) tuples. - self.room_queues: Dict[str, List[Tuple[EventBase, str]]] = {} - self._room_pdu_linearizer = Linearizer("fed_room_pdu") - self._room_backfill = Linearizer("room_backfill") self.third_party_event_rules = hs.get_third_party_event_rules() - self._ephemeral_messages_enabled = hs.config.enable_ephemeral_messages - - async def on_receive_pdu( - self, origin: str, pdu: EventBase, sent_to_us_directly: bool = False - ) -> None: - """Process a PDU received via a federation /send/ transaction, or - via backfill of missing prev_events - - Args: - origin: server which initiated the /send/ transaction. Will - be used to fetch missing events or state. - pdu: received PDU - sent_to_us_directly: True if this event was pushed to us; False if - we pulled it as the result of a missing prev_event. - """ - - room_id = pdu.room_id - event_id = pdu.event_id - - logger.info("handling received PDU: %s", pdu) - - # We reprocess pdus when we have seen them only as outliers - existing = await self.store.get_event( - event_id, allow_none=True, allow_rejected=True - ) - - # FIXME: Currently we fetch an event again when we already have it - # if it has been marked as an outlier. - - already_seen = existing and ( - not existing.internal_metadata.is_outlier() - or pdu.internal_metadata.is_outlier() - ) - if already_seen: - logger.debug("Already seen pdu") - return - - # do some initial sanity-checking of the event. In particular, make - # sure it doesn't have hundreds of prev_events or auth_events, which - # could cause a huge state resolution or cascade of event fetches. - try: - self._sanity_check_event(pdu) - except SynapseError as err: - logger.warning("Received event failed sanity checks") - raise FederationError("ERROR", err.code, err.msg, affected=pdu.event_id) - - # If we are currently in the process of joining this room, then we - # queue up events for later processing. - if room_id in self.room_queues: - logger.info( - "Queuing PDU from %s for now: join in progress", - origin, - ) - self.room_queues[room_id].append((pdu, origin)) - return - - # If we're not in the room just ditch the event entirely. This is - # probably an old server that has come back and thinks we're still in - # the room (or we've been rejoined to the room by a state reset). - # - # Note that if we were never in the room then we would have already - # dropped the event, since we wouldn't know the room version. - is_in_room = await self._event_auth_handler.check_host_in_room( - room_id, self.server_name - ) - if not is_in_room: - logger.info( - "Ignoring PDU from %s as we're not in the room", - origin, - ) - return None - - state = None - - # Get missing pdus if necessary. - if not pdu.internal_metadata.is_outlier(): - # We only backfill backwards to the min depth. - min_depth = await self.get_min_depth_for_context(pdu.room_id) - - logger.debug("min_depth: %d", min_depth) - - prevs = set(pdu.prev_event_ids()) - seen = await self.store.have_events_in_timeline(prevs) - - if min_depth is not None and pdu.depth < min_depth: - # This is so that we don't notify the user about this - # message, to work around the fact that some events will - # reference really really old events we really don't want to - # send to the clients. - pdu.internal_metadata.outlier = True - elif min_depth is not None and pdu.depth > min_depth: - missing_prevs = prevs - seen - if sent_to_us_directly and missing_prevs: - # If we're missing stuff, ensure we only fetch stuff one - # at a time. - logger.info( - "Acquiring room lock to fetch %d missing prev_events: %s", - len(missing_prevs), - shortstr(missing_prevs), - ) - with (await self._room_pdu_linearizer.queue(pdu.room_id)): - logger.info( - "Acquired room lock to fetch %d missing prev_events", - len(missing_prevs), - ) - - try: - await self._get_missing_events_for_pdu( - origin, pdu, prevs, min_depth - ) - except Exception as e: - raise Exception( - "Error fetching missing prev_events for %s: %s" - % (event_id, e) - ) from e - - # Update the set of things we've seen after trying to - # fetch the missing stuff - seen = await self.store.have_events_in_timeline(prevs) - - if not prevs - seen: - logger.info( - "Found all missing prev_events", - ) - - if prevs - seen: - # We've still not been able to get all of the prev_events for this event. - # - # In this case, we need to fall back to asking another server in the - # federation for the state at this event. That's ok provided we then - # resolve the state against other bits of the DAG before using it (which - # will ensure that you can't just take over a room by sending an event, - # withholding its prev_events, and declaring yourself to be an admin in - # the subsequent state request). - # - # Now, if we're pulling this event as a missing prev_event, then clearly - # this event is not going to become the only forward-extremity and we are - # guaranteed to resolve its state against our existing forward - # extremities, so that should be fine. - # - # On the other hand, if this event was pushed to us, it is possible for - # it to become the only forward-extremity in the room, and we would then - # trust its state to be the state for the whole room. This is very bad. - # Further, if the event was pushed to us, there is no excuse for us not to - # have all the prev_events. We therefore reject any such events. - # - # XXX this really feels like it could/should be merged with the above, - # but there is an interaction with min_depth that I'm not really - # following. - - if sent_to_us_directly: - logger.warning( - "Rejecting: failed to fetch %d prev events: %s", - len(prevs - seen), - shortstr(prevs - seen), - ) - raise FederationError( - "ERROR", - 403, - ( - "Your server isn't divulging details about prev_events " - "referenced in this event." - ), - affected=pdu.event_id, - ) - - logger.info( - "Event %s is missing prev_events: calculating state for a " - "backwards extremity", - event_id, - ) - - # Calculate the state after each of the previous events, and - # resolve them to find the correct state at the current event. - event_map = {event_id: pdu} - try: - # Get the state of the events we know about - ours = await self.state_store.get_state_groups_ids(room_id, seen) - - # state_maps is a list of mappings from (type, state_key) to event_id - state_maps: List[StateMap[str]] = list(ours.values()) - - # we don't need this any more, let's delete it. - del ours - - # Ask the remote server for the states we don't - # know about - for p in prevs - seen: - logger.info("Requesting state after missing prev_event %s", p) - - with nested_logging_context(p): - # note that if any of the missing prevs share missing state or - # auth events, the requests to fetch those events are deduped - # by the get_pdu_cache in federation_client. - remote_state = ( - await self._get_state_after_missing_prev_event( - origin, room_id, p - ) - ) - - remote_state_map = { - (x.type, x.state_key): x.event_id for x in remote_state - } - state_maps.append(remote_state_map) - - for x in remote_state: - event_map[x.event_id] = x - - room_version = await self.store.get_room_version_id(room_id) - state_map = ( - await self._state_resolution_handler.resolve_events_with_store( - room_id, - room_version, - state_maps, - event_map, - state_res_store=StateResolutionStore(self.store), - ) - ) - - # We need to give _process_received_pdu the actual state events - # rather than event ids, so generate that now. - - # First though we need to fetch all the events that are in - # state_map, so we can build up the state below. - evs = await self.store.get_events( - list(state_map.values()), - get_prev_content=False, - redact_behaviour=EventRedactBehaviour.AS_IS, - ) - event_map.update(evs) - - state = [event_map[e] for e in state_map.values()] - except Exception: - logger.warning( - "Error attempting to resolve state at missing " "prev_events", - exc_info=True, - ) - raise FederationError( - "ERROR", - 403, - "We can't get valid state history.", - affected=event_id, - ) - - await self._process_received_pdu(origin, pdu, state=state) - - async def _get_missing_events_for_pdu( - self, origin: str, pdu: EventBase, prevs: Set[str], min_depth: int - ) -> None: - """ - Args: - origin: Origin of the pdu. Will be called to get the missing events - pdu: received pdu - prevs: List of event ids which we are missing - min_depth: Minimum depth of events to return. - """ - - room_id = pdu.room_id - event_id = pdu.event_id - - seen = await self.store.have_events_in_timeline(prevs) - - if not prevs - seen: - return - - latest_list = await self.store.get_latest_event_ids_in_room(room_id) - - # We add the prev events that we have seen to the latest - # list to ensure the remote server doesn't give them to us - latest = set(latest_list) - latest |= seen - - logger.info( - "Requesting missing events between %s and %s", - shortstr(latest), - event_id, - ) - - # XXX: we set timeout to 10s to help workaround - # https://github.com/matrix-org/synapse/issues/1733. - # The reason is to avoid holding the linearizer lock - # whilst processing inbound /send transactions, causing - # FDs to stack up and block other inbound transactions - # which empirically can currently take up to 30 minutes. - # - # N.B. this explicitly disables retry attempts. - # - # N.B. this also increases our chances of falling back to - # fetching fresh state for the room if the missing event - # can't be found, which slightly reduces our security. - # it may also increase our DAG extremity count for the room, - # causing additional state resolution? See #1760. - # However, fetching state doesn't hold the linearizer lock - # apparently. - # - # see https://github.com/matrix-org/synapse/pull/1744 - # - # ---- - # - # Update richvdh 2018/09/18: There are a number of problems with timing this - # request out aggressively on the client side: - # - # - it plays badly with the server-side rate-limiter, which starts tarpitting you - # if you send too many requests at once, so you end up with the server carefully - # working through the backlog of your requests, which you have already timed - # out. - # - # - for this request in particular, we now (as of - # https://github.com/matrix-org/synapse/pull/3456) reject any PDUs where the - # server can't produce a plausible-looking set of prev_events - so we becone - # much more likely to reject the event. - # - # - contrary to what it says above, we do *not* fall back to fetching fresh state - # for the room if get_missing_events times out. Rather, we give up processing - # the PDU whose prevs we are missing, which then makes it much more likely that - # we'll end up back here for the *next* PDU in the list, which exacerbates the - # problem. - # - # - the aggressive 10s timeout was introduced to deal with incoming federation - # requests taking 8 hours to process. It's not entirely clear why that was going - # on; certainly there were other issues causing traffic storms which are now - # resolved, and I think in any case we may be more sensible about our locking - # now. We're *certainly* more sensible about our logging. - # - # All that said: Let's try increasing the timeout to 60s and see what happens. - - try: - missing_events = await self.federation_client.get_missing_events( - origin, - room_id, - earliest_events_ids=list(latest), - latest_events=[pdu], - limit=10, - min_depth=min_depth, - timeout=60000, - ) - except (RequestSendFailed, HttpResponseException, NotRetryingDestination) as e: - # We failed to get the missing events, but since we need to handle - # the case of `get_missing_events` not returning the necessary - # events anyway, it is safe to simply log the error and continue. - logger.warning("Failed to get prev_events: %s", e) - return - - logger.info( - "Got %d prev_events: %s", - len(missing_events), - shortstr(missing_events), - ) - - # We want to sort these by depth so we process them and - # tell clients about them in order. - missing_events.sort(key=lambda x: x.depth) - - for ev in missing_events: - logger.info( - "Handling received prev_event %s", - ev.event_id, - ) - with nested_logging_context(ev.event_id): - try: - await self.on_receive_pdu(origin, ev, sent_to_us_directly=False) - except FederationError as e: - if e.code == 403: - logger.warning( - "Received prev_event %s failed history check.", - ev.event_id, - ) - else: - raise - - async def _get_state_for_room( - self, - destination: str, - room_id: str, - event_id: str, - ) -> List[EventBase]: - """Requests all of the room state at a given event from a remote - homeserver. - - Will also fetch any missing events reported in the `auth_chain_ids` - section of `/state_ids`. - - Args: - destination: The remote homeserver to query for the state. - room_id: The id of the room we're interested in. - event_id: The id of the event we want the state at. - - Returns: - A list of events in the state, not including the event itself. - """ - ( - state_event_ids, - auth_event_ids, - ) = await self.federation_client.get_room_state_ids( - destination, room_id, event_id=event_id - ) - - # Fetch the state events from the DB, and check we have the auth events. - event_map = await self.store.get_events(state_event_ids, allow_rejected=True) - auth_events_in_store = await self.store.have_seen_events( - room_id, auth_event_ids - ) - - # Check for missing events. We handle state and auth event seperately, - # as we want to pull the state from the DB, but we don't for the auth - # events. (Note: we likely won't use the majority of the auth chain, and - # it can be *huge* for large rooms, so it's worth ensuring that we don't - # unnecessarily pull it from the DB). - missing_state_events = set(state_event_ids) - set(event_map) - missing_auth_events = set(auth_event_ids) - set(auth_events_in_store) - if missing_state_events or missing_auth_events: - await self._get_events_and_persist( - destination=destination, - room_id=room_id, - events=missing_state_events | missing_auth_events, - ) - - if missing_state_events: - new_events = await self.store.get_events( - missing_state_events, allow_rejected=True - ) - event_map.update(new_events) - - missing_state_events.difference_update(new_events) - - if missing_state_events: - logger.warning( - "Failed to fetch missing state events for %s %s", - event_id, - missing_state_events, - ) - - if missing_auth_events: - auth_events_in_store = await self.store.have_seen_events( - room_id, missing_auth_events - ) - missing_auth_events.difference_update(auth_events_in_store) - - if missing_auth_events: - logger.warning( - "Failed to fetch missing auth events for %s %s", - event_id, - missing_auth_events, - ) - - remote_state = list(event_map.values()) - - # check for events which were in the wrong room. - # - # this can happen if a remote server claims that the state or - # auth_events at an event in room A are actually events in room B - - bad_events = [ - (event.event_id, event.room_id) - for event in remote_state - if event.room_id != room_id - ] - - for bad_event_id, bad_room_id in bad_events: - # This is a bogus situation, but since we may only discover it a long time - # after it happened, we try our best to carry on, by just omitting the - # bad events from the returned auth/state set. - logger.warning( - "Remote server %s claims event %s in room %s is an auth/state " - "event in room %s", - destination, - bad_event_id, - bad_room_id, - room_id, - ) - - if bad_events: - remote_state = [e for e in remote_state if e.room_id == room_id] - - return remote_state - - async def _get_state_after_missing_prev_event( - self, - destination: str, - room_id: str, - event_id: str, - ) -> List[EventBase]: - """Requests all of the room state at a given event from a remote homeserver. - - Args: - destination: The remote homeserver to query for the state. - room_id: The id of the room we're interested in. - event_id: The id of the event we want the state at. - - Returns: - A list of events in the state, including the event itself - """ - # TODO: This function is basically the same as _get_state_for_room. Can - # we make backfill() use it, rather than having two code paths? I think the - # only difference is that backfill() persists the prev events separately. - - ( - state_event_ids, - auth_event_ids, - ) = await self.federation_client.get_room_state_ids( - destination, room_id, event_id=event_id - ) - - logger.debug( - "state_ids returned %i state events, %i auth events", - len(state_event_ids), - len(auth_event_ids), - ) - - # start by just trying to fetch the events from the store - desired_events = set(state_event_ids) - desired_events.add(event_id) - logger.debug("Fetching %i events from cache/store", len(desired_events)) - fetched_events = await self.store.get_events( - desired_events, allow_rejected=True - ) - - missing_desired_events = desired_events - fetched_events.keys() - logger.debug( - "We are missing %i events (got %i)", - len(missing_desired_events), - len(fetched_events), - ) - - # We probably won't need most of the auth events, so let's just check which - # we have for now, rather than thrashing the event cache with them all - # unnecessarily. - - # TODO: we probably won't actually need all of the auth events, since we - # already have a bunch of the state events. It would be nice if the - # federation api gave us a way of finding out which we actually need. - - missing_auth_events = set(auth_event_ids) - fetched_events.keys() - missing_auth_events.difference_update( - await self.store.have_seen_events(room_id, missing_auth_events) - ) - logger.debug("We are also missing %i auth events", len(missing_auth_events)) - - missing_events = missing_desired_events | missing_auth_events - logger.debug("Fetching %i events from remote", len(missing_events)) - await self._get_events_and_persist( - destination=destination, room_id=room_id, events=missing_events - ) - - # we need to make sure we re-load from the database to get the rejected - # state correct. - fetched_events.update( - await self.store.get_events(missing_desired_events, allow_rejected=True) - ) - - # check for events which were in the wrong room. - # - # this can happen if a remote server claims that the state or - # auth_events at an event in room A are actually events in room B - - bad_events = [ - (event_id, event.room_id) - for event_id, event in fetched_events.items() - if event.room_id != room_id - ] - - for bad_event_id, bad_room_id in bad_events: - # This is a bogus situation, but since we may only discover it a long time - # after it happened, we try our best to carry on, by just omitting the - # bad events from the returned state set. - logger.warning( - "Remote server %s claims event %s in room %s is an auth/state " - "event in room %s", - destination, - bad_event_id, - bad_room_id, - room_id, - ) - - del fetched_events[bad_event_id] - - # if we couldn't get the prev event in question, that's a problem. - remote_event = fetched_events.get(event_id) - if not remote_event: - raise Exception("Unable to get missing prev_event %s" % (event_id,)) - - # missing state at that event is a warning, not a blocker - # XXX: this doesn't sound right? it means that we'll end up with incomplete - # state. - failed_to_fetch = desired_events - fetched_events.keys() - if failed_to_fetch: - logger.warning( - "Failed to fetch missing state events for %s %s", - event_id, - failed_to_fetch, - ) - - remote_state = [ - fetched_events[e_id] for e_id in state_event_ids if e_id in fetched_events - ] - - if remote_event.is_state() and remote_event.rejected_reason is None: - remote_state.append(remote_event) - - return remote_state - - async def _process_received_pdu( - self, - origin: str, - event: EventBase, - state: Optional[Iterable[EventBase]], - ) -> None: - """Called when we have a new pdu. We need to do auth checks and put it - through the StateHandler. - - Args: - origin: server sending the event - - event: event to be persisted - - state: Normally None, but if we are handling a gap in the graph - (ie, we are missing one or more prev_events), the resolved state at the - event - """ - logger.debug("Processing event: %s", event) - - try: - context = await self.state_handler.compute_event_context( - event, old_state=state - ) - await self._auth_and_persist_event(origin, event, context, state=state) - except AuthError as e: - raise FederationError("ERROR", e.code, e.msg, affected=event.event_id) - - # For encrypted messages we check that we know about the sending device, - # if we don't then we mark the device cache for that user as stale. - if event.type == EventTypes.Encrypted: - device_id = event.content.get("device_id") - sender_key = event.content.get("sender_key") - - cached_devices = await self.store.get_cached_devices_for_user(event.sender) - - resync = False # Whether we should resync device lists. - - device = None - if device_id is not None: - device = cached_devices.get(device_id) - if device is None: - logger.info( - "Received event from remote device not in our cache: %s %s", - event.sender, - device_id, - ) - resync = True - - # We also check if the `sender_key` matches what we expect. - if sender_key is not None: - # Figure out what sender key we're expecting. If we know the - # device and recognize the algorithm then we can work out the - # exact key to expect. Otherwise check it matches any key we - # have for that device. - - current_keys: Container[str] = [] - - if device: - keys = device.get("keys", {}).get("keys", {}) - - if ( - event.content.get("algorithm") - == RoomEncryptionAlgorithms.MEGOLM_V1_AES_SHA2 - ): - # For this algorithm we expect a curve25519 key. - key_name = "curve25519:%s" % (device_id,) - current_keys = [keys.get(key_name)] - else: - # We don't know understand the algorithm, so we just - # check it matches a key for the device. - current_keys = keys.values() - elif device_id: - # We don't have any keys for the device ID. - pass - else: - # The event didn't include a device ID, so we just look for - # keys across all devices. - current_keys = [ - key - for device in cached_devices.values() - for key in device.get("keys", {}).get("keys", {}).values() - ] - - # We now check that the sender key matches (one of) the expected - # keys. - if sender_key not in current_keys: - logger.info( - "Received event from remote device with unexpected sender key: %s %s: %s", - event.sender, - device_id or "", - sender_key, - ) - resync = True - - if resync: - run_as_background_process( - "resync_device_due_to_pdu", self._resync_device, event.sender - ) - - async def _resync_device(self, sender: str) -> None: - """We have detected that the device list for the given user may be out - of sync, so we try and resync them. - """ - - try: - await self.store.mark_remote_user_device_cache_as_stale(sender) - - # Immediately attempt a resync in the background - if self.config.worker_app: - await self._user_device_resync(user_id=sender) - else: - await self._device_list_updater.user_device_resync(sender) - except Exception: - logger.exception("Failed to resync device for %s", sender) - - @log_function - async def backfill( - self, dest: str, room_id: str, limit: int, extremities: List[str] - ) -> List[EventBase]: - """Trigger a backfill request to `dest` for the given `room_id` - - This will attempt to get more events from the remote. If the other side - has no new events to offer, this will return an empty list. - - As the events are received, we check their signatures, and also do some - sanity-checking on them. If any of the backfilled events are invalid, - this method throws a SynapseError. - - TODO: make this more useful to distinguish failures of the remote - server from invalid events (there is probably no point in trying to - re-fetch invalid events from every other HS in the room.) - """ - if dest == self.server_name: - raise SynapseError(400, "Can't backfill from self.") - - events = await self.federation_client.backfill( - dest, room_id, limit=limit, extremities=extremities - ) - - if not events: - return [] - - # ideally we'd sanity check the events here for excess prev_events etc, - # but it's hard to reject events at this point without completely - # breaking backfill in the same way that it is currently broken by - # events whose signature we cannot verify (#3121). - # - # So for now we accept the events anyway. #3124 tracks this. - # - # for ev in events: - # self._sanity_check_event(ev) - - # Don't bother processing events we already have. - seen_events = await self.store.have_events_in_timeline( - {e.event_id for e in events} - ) - - events = [e for e in events if e.event_id not in seen_events] - - if not events: - return [] - - event_map = {e.event_id: e for e in events} - - event_ids = {e.event_id for e in events} - - # build a list of events whose prev_events weren't in the batch. - # (XXX: this will include events whose prev_events we already have; that doesn't - # sound right?) - edges = [ev.event_id for ev in events if set(ev.prev_event_ids()) - event_ids] - - logger.info("backfill: Got %d events with %d edges", len(events), len(edges)) - - # For each edge get the current state. - - state_events = {} - events_to_state = {} - for e_id in edges: - state = await self._get_state_for_room( - destination=dest, - room_id=room_id, - event_id=e_id, - ) - state_events.update({s.event_id: s for s in state}) - events_to_state[e_id] = state - - required_auth = { - a_id - for event in events + list(state_events.values()) - for a_id in event.auth_event_ids() - } - auth_events = await self.store.get_events(required_auth, allow_rejected=True) - auth_events.update( - {e_id: event_map[e_id] for e_id in required_auth if e_id in event_map} - ) - - ev_infos = [] - - # Step 1: persist the events in the chunk we fetched state for (i.e. - # the backwards extremities), with custom auth events and state - for e_id in events_to_state: - # For paranoia we ensure that these events are marked as - # non-outliers - ev = event_map[e_id] - assert not ev.internal_metadata.is_outlier() - - ev_infos.append( - _NewEventInfo( - event=ev, - state=events_to_state[e_id], - auth_events={ - ( - auth_events[a_id].type, - auth_events[a_id].state_key, - ): auth_events[a_id] - for a_id in ev.auth_event_ids() - if a_id in auth_events - }, - ) - ) - - if ev_infos: - await self._auth_and_persist_events( - dest, room_id, ev_infos, backfilled=True - ) - - # Step 2: Persist the rest of the events in the chunk one by one - events.sort(key=lambda e: e.depth) - - for event in events: - if event in events_to_state: - continue - - # For paranoia we ensure that these events are marked as - # non-outliers - assert not event.internal_metadata.is_outlier() - - context = await self.state_handler.compute_event_context(event) - - # We store these one at a time since each event depends on the - # previous to work out the state. - # TODO: We can probably do something more clever here. - await self._auth_and_persist_event(dest, event, context, backfilled=True) - - return events - async def maybe_backfill( self, room_id: str, current_depth: int, limit: int ) -> bool: @@ -1057,9 +164,19 @@ async def maybe_backfill( async def _maybe_backfill_inner( self, room_id: str, current_depth: int, limit: int ) -> bool: - extremities = await self.store.get_oldest_events_with_depth_in_room(room_id) + oldest_events_with_depth = ( + await self.store.get_oldest_event_ids_with_depth_in_room(room_id) + ) + insertion_events_to_be_backfilled = ( + await self.store.get_insertion_event_backwards_extremities_in_room(room_id) + ) + logger.debug( + "_maybe_backfill_inner: extremities oldest_events_with_depth=%s insertion_events_to_be_backfilled=%s", + oldest_events_with_depth, + insertion_events_to_be_backfilled, + ) - if not extremities: + if not oldest_events_with_depth and not insertion_events_to_be_backfilled: logger.debug("Not backfilling as no extremeties found.") return False @@ -1089,10 +206,12 @@ async def _maybe_backfill_inner( # state *before* the event, ignoring the special casing certain event # types have. - forward_events = await self.store.get_successor_events(list(extremities)) + forward_event_ids = await self.store.get_successor_events( + list(oldest_events_with_depth) + ) extremities_events = await self.store.get_events( - forward_events, + forward_event_ids, redact_behaviour=EventRedactBehaviour.AS_IS, get_prev_content=False, ) @@ -1106,10 +225,19 @@ async def _maybe_backfill_inner( redact=False, check_history_visibility_only=True, ) + logger.debug( + "_maybe_backfill_inner: filtered_extremities %s", filtered_extremities + ) - if not filtered_extremities: + if not filtered_extremities and not insertion_events_to_be_backfilled: return False + extremities = { + **oldest_events_with_depth, + # TODO: insertion_events_to_be_backfilled is currently skipping the filtered_extremities checks + **insertion_events_to_be_backfilled, + } + # Check if we reached a point where we should start backfilling. sorted_extremeties_tuple = sorted(extremities.items(), key=lambda e: -int(e[1])) max_depth = sorted_extremeties_tuple[0][1] @@ -1132,18 +260,10 @@ async def _maybe_backfill_inner( ) return False - logger.debug( - "room_id: %s, backfill: current_depth: %s, max_depth: %s, extrems: %s", - room_id, - current_depth, - max_depth, - sorted_extremeties_tuple, - ) - # We ignore extremities that have a greater depth than our current depth # as: # 1. we don't really care about getting events that have happened - # before our current position; and + # after our current position; and # 2. we have likely previously tried and failed to backfill from that # extremity, so to avoid getting "stuck" requesting the same # backfill repeatedly we drop those extremities. @@ -1151,9 +271,19 @@ async def _maybe_backfill_inner( t for t in sorted_extremeties_tuple if int(t[1]) <= current_depth ] + logger.debug( + "room_id: %s, backfill: current_depth: %s, limit: %s, max_depth: %s, extrems: %s filtered_sorted_extremeties_tuple: %s", + room_id, + current_depth, + limit, + max_depth, + sorted_extremeties_tuple, + filtered_sorted_extremeties_tuple, + ) + # However, we need to check that the filtered extremities are non-empty. # If they are empty then either we can a) bail or b) still attempt to - # backill. We opt to try backfilling anyway just in case we do get + # backfill. We opt to try backfilling anyway just in case we do get # relevant events. if filtered_sorted_extremeties_tuple: sorted_extremeties_tuple = filtered_sorted_extremeties_tuple @@ -1169,36 +299,6 @@ async def _maybe_backfill_inner( curr_state = await self.state_handler.get_current_state(room_id) - def get_domains_from_state(state: StateMap[EventBase]) -> List[Tuple[str, int]]: - """Get joined domains from state - - Args: - state: State map from type/state key to event. - - Returns: - Returns a list of servers with the lowest depth of their joins. - Sorted by lowest depth first. - """ - joined_users = [ - (state_key, int(event.depth)) - for (e_type, state_key), event in state.items() - if e_type == EventTypes.Member and event.membership == Membership.JOIN - ] - - joined_domains: Dict[str, int] = {} - for u, d in joined_users: - try: - dom = get_domain_from_id(u) - old_d = joined_domains.get(dom) - if old_d: - joined_domains[dom] = min(d, old_d) - else: - joined_domains[dom] = d - except Exception: - pass - - return sorted(joined_domains.items(), key=lambda d: d[1]) - curr_domains = get_domains_from_state(curr_state) likely_domains = [ @@ -1209,14 +309,14 @@ async def try_backfill(domains: List[str]) -> bool: # TODO: Should we try multiple of these at a time? for dom in domains: try: - await self.backfill( + await self._federation_event_handler.backfill( dom, room_id, limit=100, extremities=extremities ) # If this succeeded then we probably already have the # appropriate stuff. # TODO: We can probably do something more intelligent here. return True - except SynapseError as e: + except (SynapseError, InvalidResponseError) as e: logger.info("Failed to backfill from %s because %s", dom, e) continue except HttpResponseException as e: @@ -1283,7 +383,7 @@ async def try_backfill(domains: List[str]) -> bool: for key, state_dict in states.items() } - for e_id, _ in sorted_extremeties_tuple: + for e_id in event_ids: likely_extremeties_domains = get_domains_from_state(states[e_id]) success = await try_backfill( @@ -1300,115 +400,6 @@ async def try_backfill(domains: List[str]) -> bool: return False - async def _get_events_and_persist( - self, destination: str, room_id: str, events: Iterable[str] - ) -> None: - """Fetch the given events from a server, and persist them as outliers. - - This function *does not* recursively get missing auth events of the - newly fetched events. Callers must include in the `events` argument - any missing events from the auth chain. - - Logs a warning if we can't find the given event. - """ - - room_version = await self.store.get_room_version(room_id) - - event_map: Dict[str, EventBase] = {} - - async def get_event(event_id: str): - with nested_logging_context(event_id): - try: - event = await self.federation_client.get_pdu( - [destination], - event_id, - room_version, - outlier=True, - ) - if event is None: - logger.warning( - "Server %s didn't return event %s", - destination, - event_id, - ) - return - - event_map[event.event_id] = event - - except Exception as e: - logger.warning( - "Error fetching missing state/auth event %s: %s %s", - event_id, - type(e), - e, - ) - - await concurrently_execute(get_event, events, 5) - - # Make a map of auth events for each event. We do this after fetching - # all the events as some of the events' auth events will be in the list - # of requested events. - - auth_events = [ - aid - for event in event_map.values() - for aid in event.auth_event_ids() - if aid not in event_map - ] - persisted_events = await self.store.get_events( - auth_events, - allow_rejected=True, - ) - - event_infos = [] - for event in event_map.values(): - auth = {} - for auth_event_id in event.auth_event_ids(): - ae = persisted_events.get(auth_event_id) or event_map.get(auth_event_id) - if ae: - auth[(ae.type, ae.state_key)] = ae - else: - logger.info("Missing auth event %s", auth_event_id) - - event_infos.append(_NewEventInfo(event, None, auth)) - - if event_infos: - await self._auth_and_persist_events( - destination, - room_id, - event_infos, - ) - - def _sanity_check_event(self, ev: EventBase) -> None: - """ - Do some early sanity checks of a received event - - In particular, checks it doesn't have an excessive number of - prev_events or auth_events, which could cause a huge state resolution - or cascade of event fetches. - - Args: - ev: event to be checked - - Raises: - SynapseError if the event does not pass muster - """ - if len(ev.prev_event_ids()) > 20: - logger.warning( - "Rejecting event %s which has %i prev_events", - ev.event_id, - len(ev.prev_event_ids()), - ) - raise SynapseError(HTTPStatus.BAD_REQUEST, "Too many prev_events") - - if len(ev.auth_event_ids()) > 10: - logger.warning( - "Rejecting event %s which has %i auth_events", - ev.event_id, - len(ev.auth_event_ids()), - ) - raise SynapseError(HTTPStatus.BAD_REQUEST, "Too many auth_events") - async def send_invite(self, target_host: str, event: EventBase) -> EventBase: """Sends the invite to the remote server for signing. @@ -1474,9 +465,9 @@ async def do_invite_join( # This shouldn't happen, because the RoomMemberHandler has a # linearizer lock which only allows one operation per user per room # at a time - so this is just paranoia. - assert room_id not in self.room_queues + assert room_id not in self._federation_event_handler.room_queues - self.room_queues[room_id] = [] + self._federation_event_handler.room_queues[room_id] = [] await self._clean_room_for_join(room_id) @@ -1517,9 +508,10 @@ async def do_invite_join( await self.store.upsert_room_on_join( room_id=room_id, room_version=room_version_obj, + auth_events=auth_chain, ) - max_stream_id = await self._persist_auth_tree( + max_stream_id = await self._federation_event_handler.process_remote_join( origin, room_id, auth_chain, state, event, room_version_obj ) @@ -1550,8 +542,8 @@ async def do_invite_join( logger.debug("Finished joining %s to %s", joinee, room_id) return event.event_id, max_stream_id finally: - room_queue = self.room_queues[room_id] - del self.room_queues[room_id] + room_queue = self._federation_event_handler.room_queues[room_id] + del self._federation_event_handler.room_queues[room_id] # we don't need to wait for the queued events to be processed - # it's just a best-effort thing at this point. We do want to do @@ -1602,6 +594,13 @@ async def do_knock( target_hosts, room_id, knockee, Membership.KNOCK, content, params=params ) + # Mark the knock as an outlier as we don't yet have the state at this point in + # the DAG. + event.internal_metadata.outlier = True + + # ... but tell /sync to send it to clients anyway. + event.internal_metadata.out_of_band_membership = True + # Record the room ID and its version so that we have a record of the room await self._maybe_store_room_on_outlier_membership( room_id=event.room_id, room_version=event_format_version @@ -1626,8 +625,8 @@ async def do_knock( # in the invitee's sync stream. It is stripped out for all other local users. event.unsigned["knock_room_state"] = stripped_room_state["knock_state_events"] - context = await self.state_handler.compute_event_context(event) - stream_id = await self.persist_events_and_notify( + context = EventContext.for_outlier() + stream_id = await self._federation_event_handler.persist_events_and_notify( event.room_id, [(event, context)] ) return event.event_id, stream_id @@ -1643,13 +642,11 @@ async def _handle_queued_pdus( for p, origin in room_queue: try: logger.info( - "Processing queued PDU %s which was received " - "while we were joining %s", - p.event_id, - p.room_id, + "Processing queued PDU %s which was received while we were joining", + p, ) with nested_logging_context(p.event_id): - await self.on_receive_pdu(origin, p, sent_to_us_directly=True) + await self._federation_event_handler.on_receive_pdu(origin, p) except Exception as e: logger.warning( "Error handling queued PDU %s from %s: %s", p.event_id, origin, e @@ -1716,14 +713,14 @@ async def on_make_join_request( if include_auth_user_id: event_content[ - "join_authorised_via_users_server" + EventContentFields.AUTHORISING_USER ] = await self._event_auth_handler.get_user_which_could_invite( room_id, state_ids, ) - builder = self.event_builder_factory.new( - room_version.identifier, + builder = self.event_builder_factory.for_room_version( + room_version, { "type": EventTypes.Member, "content": event_content, @@ -1742,14 +739,13 @@ async def on_make_join_request( raise # Ensure the user can even join the room. - await self._check_join_restrictions(context, event) + await self._federation_event_handler.check_join_restrictions(context, event) # The remote hasn't signed it yet, obviously. We'll do the full checks # when we get the event back in `on_send_join_request` - await self._event_auth_handler.check_from_context( - room_version.identifier, event, context, do_sig_check=False + await self._event_auth_handler.check_auth_rules_from_context( + room_version, event, context ) - return event async def on_invite_request( @@ -1766,7 +762,7 @@ async def on_invite_request( if is_blocked: raise SynapseError(403, "This room has been blocked on this server") - if self.hs.config.block_non_admin_invites: + if self.hs.config.server.block_non_admin_invites: raise SynapseError(403, "This server does not accept room invites") if not await self.spam_checker.user_may_invite( @@ -1818,8 +814,10 @@ async def on_invite_request( ) ) - context = await self.state_handler.compute_event_context(event) - await self.persist_events_and_notify(event.room_id, [(event, context)]) + context = EventContext.for_outlier() + await self._federation_event_handler.persist_events_and_notify( + event.room_id, [(event, context)] + ) return event @@ -1845,8 +843,8 @@ async def do_remotely_reject_invite( await self.federation_client.send_leave(host_list, event) - context = await self.state_handler.compute_event_context(event) - stream_id = await self.persist_events_and_notify( + context = EventContext.for_outlier() + stream_id = await self._federation_event_handler.persist_events_and_notify( event.room_id, [(event, context)] ) @@ -1899,9 +897,9 @@ async def on_make_leave_request( ) raise SynapseError(403, "User not from origin", Codes.FORBIDDEN) - room_version = await self.store.get_room_version_id(room_id) - builder = self.event_builder_factory.new( - room_version, + room_version_obj = await self.store.get_room_version(room_id) + builder = self.event_builder_factory.for_room_version( + room_version_obj, { "type": EventTypes.Member, "content": {"membership": Membership.LEAVE}, @@ -1918,8 +916,8 @@ async def on_make_leave_request( try: # The remote hasn't signed it yet, obviously. We'll do the full checks # when we get the event back in `on_send_leave_request` - await self._event_auth_handler.check_from_context( - room_version, event, context, do_sig_check=False + await self._event_auth_handler.check_auth_rules_from_context( + room_version_obj, event, context ) except AuthError as e: logger.warning("Failed to create new leave %r because %s", event, e) @@ -1951,10 +949,10 @@ async def on_make_knock_request( ) raise SynapseError(403, "User not from origin", Codes.FORBIDDEN) - room_version = await self.store.get_room_version_id(room_id) + room_version_obj = await self.store.get_room_version(room_id) - builder = self.event_builder_factory.new( - room_version, + builder = self.event_builder_factory.for_room_version( + room_version_obj, { "type": EventTypes.Member, "content": {"membership": Membership.KNOCK}, @@ -1980,124 +978,14 @@ async def on_make_knock_request( try: # The remote hasn't signed it yet, obviously. We'll do the full checks # when we get the event back in `on_send_knock_request` - await self._event_auth_handler.check_from_context( - room_version, event, context, do_sig_check=False + await self._event_auth_handler.check_auth_rules_from_context( + room_version_obj, event, context ) except AuthError as e: logger.warning("Failed to create new knock %r because %s", event, e) - raise e - - return event - - @log_function - async def on_send_membership_event( - self, origin: str, event: EventBase - ) -> Tuple[EventBase, EventContext]: - """ - We have received a join/leave/knock event for a room via send_join/leave/knock. - - Verify that event and send it into the room on the remote homeserver's behalf. - - This is quite similar to on_receive_pdu, with the following principal - differences: - * only membership events are permitted (and only events with - sender==state_key -- ie, no kicks or bans) - * *We* send out the event on behalf of the remote server. - * We enforce the membership restrictions of restricted rooms. - * Rejected events result in an exception rather than being stored. - - There are also other differences, however it is not clear if these are by - design or omission. In particular, we do not attempt to backfill any missing - prev_events. - - Args: - origin: The homeserver of the remote (joining/invited/knocking) user. - event: The member event that has been signed by the remote homeserver. - - Returns: - The event and context of the event after inserting it into the room graph. - - Raises: - SynapseError if the event is not accepted into the room - """ - logger.debug( - "on_send_membership_event: Got event: %s, signatures: %s", - event.event_id, - event.signatures, - ) - - if get_domain_from_id(event.sender) != origin: - logger.info( - "Got send_membership request for user %r from different origin %s", - event.sender, - origin, - ) - raise SynapseError(403, "User not from origin", Codes.FORBIDDEN) - - if event.sender != event.state_key: - raise SynapseError(400, "state_key and sender must match", Codes.BAD_JSON) - - assert not event.internal_metadata.outlier - - # Send this event on behalf of the other server. - # - # The remote server isn't a full participant in the room at this point, so - # may not have an up-to-date list of the other homeservers participating in - # the room, so we send it on their behalf. - event.internal_metadata.send_on_behalf_of = origin - - context = await self.state_handler.compute_event_context(event) - context = await self._check_event_auth(origin, event, context) - if context.rejected: - raise SynapseError( - 403, f"{event.membership} event was rejected", Codes.FORBIDDEN - ) - - # for joins, we need to check the restrictions of restricted rooms - if event.membership == Membership.JOIN: - await self._check_join_restrictions(context, event) - - # for knock events, we run the third-party event rules. It's not entirely clear - # why we don't do this for other sorts of membership events. - if event.membership == Membership.KNOCK: - event_allowed, _ = await self.third_party_event_rules.check_event_allowed( - event, context - ) - if not event_allowed: - logger.info("Sending of knock %s forbidden by third-party rules", event) - raise SynapseError( - 403, "This event is not allowed in this context", Codes.FORBIDDEN - ) - - # all looks good, we can persist the event. - await self._run_push_actions_and_persist_event(event, context) - return event, context - - async def _check_join_restrictions( - self, context: EventContext, event: EventBase - ) -> None: - """Check that restrictions in restricted join rules are matched - - Called when we receive a join event via send_join. - - Raises an auth error if the restrictions are not matched. - """ - prev_state_ids = await context.get_prev_state_ids() + raise e - # Check if the user is already in the room or invited to the room. - user_id = event.state_key - prev_member_event_id = prev_state_ids.get((EventTypes.Member, user_id), None) - prev_member_event = None - if prev_member_event_id: - prev_member_event = await self.store.get_event(prev_member_event_id) - - # Check if the member should be allowed access via membership in a space. - await self._event_auth_handler.check_restricted_join_rules( - prev_state_ids, - event.room_version, - user_id, - prev_member_event, - ) + return event async def get_state_for_pdu(self, room_id: str, event_id: str) -> List[EventBase]: """Returns the state at the event. i.e. not including said event.""" @@ -2199,341 +1087,6 @@ async def get_persisted_pdu( else: return None - async def get_min_depth_for_context(self, context: str) -> int: - return await self.store.get_min_depth(context) - - async def _auth_and_persist_event( - self, - origin: str, - event: EventBase, - context: EventContext, - state: Optional[Iterable[EventBase]] = None, - auth_events: Optional[MutableStateMap[EventBase]] = None, - backfilled: bool = False, - ) -> None: - """ - Process an event by performing auth checks and then persisting to the database. - - Args: - origin: The host the event originates from. - event: The event itself. - context: - The event context. - - NB that this function potentially modifies it. - state: - The state events used to check the event for soft-fail. If this is - not provided the current state events will be used. - auth_events: - Map from (event_type, state_key) to event - - Normally, our calculated auth_events based on the state of the room - at the event's position in the DAG, though occasionally (eg if the - event is an outlier), may be the auth events claimed by the remote - server. - backfilled: True if the event was backfilled. - """ - context = await self._check_event_auth( - origin, - event, - context, - state=state, - auth_events=auth_events, - backfilled=backfilled, - ) - - await self._run_push_actions_and_persist_event(event, context, backfilled) - - async def _run_push_actions_and_persist_event( - self, event: EventBase, context: EventContext, backfilled: bool = False - ): - """Run the push actions for a received event, and persist it. - - Args: - event: The event itself. - context: The event context. - backfilled: True if the event was backfilled. - """ - try: - if ( - not event.internal_metadata.is_outlier() - and not backfilled - and not context.rejected - ): - await self.action_generator.handle_push_actions_for_event( - event, context - ) - - await self.persist_events_and_notify( - event.room_id, [(event, context)], backfilled=backfilled - ) - except Exception: - run_in_background( - self.store.remove_push_actions_from_staging, event.event_id - ) - raise - - async def _auth_and_persist_events( - self, - origin: str, - room_id: str, - event_infos: Collection[_NewEventInfo], - backfilled: bool = False, - ) -> None: - """Creates the appropriate contexts and persists events. The events - should not depend on one another, e.g. this should be used to persist - a bunch of outliers, but not a chunk of individual events that depend - on each other for state calculations. - - Notifies about the events where appropriate. - """ - - if not event_infos: - return - - async def prep(ev_info: _NewEventInfo): - event = ev_info.event - with nested_logging_context(suffix=event.event_id): - res = await self.state_handler.compute_event_context( - event, old_state=ev_info.state - ) - res = await self._check_event_auth( - origin, - event, - res, - state=ev_info.state, - auth_events=ev_info.auth_events, - backfilled=backfilled, - ) - return res - - contexts = await make_deferred_yieldable( - defer.gatherResults( - [run_in_background(prep, ev_info) for ev_info in event_infos], - consumeErrors=True, - ) - ) - - await self.persist_events_and_notify( - room_id, - [ - (ev_info.event, context) - for ev_info, context in zip(event_infos, contexts) - ], - backfilled=backfilled, - ) - - async def _persist_auth_tree( - self, - origin: str, - room_id: str, - auth_events: List[EventBase], - state: List[EventBase], - event: EventBase, - room_version: RoomVersion, - ) -> int: - """Checks the auth chain is valid (and passes auth checks) for the - state and event. Then persists the auth chain and state atomically. - Persists the event separately. Notifies about the persisted events - where appropriate. - - Will attempt to fetch missing auth events. - - Args: - origin: Where the events came from - room_id, - auth_events - state - event - room_version: The room version we expect this room to have, and - will raise if it doesn't match the version in the create event. - """ - events_to_context = {} - for e in itertools.chain(auth_events, state): - e.internal_metadata.outlier = True - ctx = await self.state_handler.compute_event_context(e) - events_to_context[e.event_id] = ctx - - event_map = { - e.event_id: e for e in itertools.chain(auth_events, state, [event]) - } - - create_event = None - for e in auth_events: - if (e.type, e.state_key) == (EventTypes.Create, ""): - create_event = e - break - - if create_event is None: - # If the state doesn't have a create event then the room is - # invalid, and it would fail auth checks anyway. - raise SynapseError(400, "No create event in state") - - room_version_id = create_event.content.get( - "room_version", RoomVersions.V1.identifier - ) - - if room_version.identifier != room_version_id: - raise SynapseError(400, "Room version mismatch") - - missing_auth_events = set() - for e in itertools.chain(auth_events, state, [event]): - for e_id in e.auth_event_ids(): - if e_id not in event_map: - missing_auth_events.add(e_id) - - for e_id in missing_auth_events: - m_ev = await self.federation_client.get_pdu( - [origin], - e_id, - room_version=room_version, - outlier=True, - timeout=10000, - ) - if m_ev and m_ev.event_id == e_id: - event_map[e_id] = m_ev - else: - logger.info("Failed to find auth event %r", e_id) - - for e in itertools.chain(auth_events, state, [event]): - auth_for_e = { - (event_map[e_id].type, event_map[e_id].state_key): event_map[e_id] - for e_id in e.auth_event_ids() - if e_id in event_map - } - if create_event: - auth_for_e[(EventTypes.Create, "")] = create_event - - try: - event_auth.check(room_version, e, auth_events=auth_for_e) - except SynapseError as err: - # we may get SynapseErrors here as well as AuthErrors. For - # instance, there are a couple of (ancient) events in some - # rooms whose senders do not have the correct sigil; these - # cause SynapseErrors in auth.check. We don't want to give up - # the attempt to federate altogether in such cases. - - logger.warning("Rejecting %s because %s", e.event_id, err.msg) - - if e == event: - raise - events_to_context[e.event_id].rejected = RejectedReason.AUTH_ERROR - - if auth_events or state: - await self.persist_events_and_notify( - room_id, - [ - (e, events_to_context[e.event_id]) - for e in itertools.chain(auth_events, state) - ], - ) - - new_event_context = await self.state_handler.compute_event_context( - event, old_state=state - ) - - return await self.persist_events_and_notify( - room_id, [(event, new_event_context)] - ) - - async def _check_for_soft_fail( - self, - event: EventBase, - state: Optional[Iterable[EventBase]], - backfilled: bool, - origin: str, - ) -> None: - """Checks if we should soft fail the event; if so, marks the event as - such. - - Args: - event - state: The state at the event if we don't have all the event's prev events - backfilled: Whether the event is from backfill - origin: The host the event originates from. - """ - # For new (non-backfilled and non-outlier) events we check if the event - # passes auth based on the current state. If it doesn't then we - # "soft-fail" the event. - if backfilled or event.internal_metadata.is_outlier(): - return - - extrem_ids_list = await self.store.get_latest_event_ids_in_room(event.room_id) - extrem_ids = set(extrem_ids_list) - prev_event_ids = set(event.prev_event_ids()) - - if extrem_ids == prev_event_ids: - # If they're the same then the current state is the same as the - # state at the event, so no point rechecking auth for soft fail. - return - - room_version = await self.store.get_room_version_id(event.room_id) - room_version_obj = KNOWN_ROOM_VERSIONS[room_version] - - # Calculate the "current state". - if state is not None: - # If we're explicitly given the state then we won't have all the - # prev events, and so we have a gap in the graph. In this case - # we want to be a little careful as we might have been down for - # a while and have an incorrect view of the current state, - # however we still want to do checks as gaps are easy to - # maliciously manufacture. - # - # So we use a "current state" that is actually a state - # resolution across the current forward extremities and the - # given state at the event. This should correctly handle cases - # like bans, especially with state res v2. - - state_sets_d = await self.state_store.get_state_groups( - event.room_id, extrem_ids - ) - state_sets: List[Iterable[EventBase]] = list(state_sets_d.values()) - state_sets.append(state) - current_states = await self.state_handler.resolve_events( - room_version, state_sets, event - ) - current_state_ids: StateMap[str] = { - k: e.event_id for k, e in current_states.items() - } - else: - current_state_ids = await self.state_handler.get_current_state_ids( - event.room_id, latest_event_ids=extrem_ids - ) - - logger.debug( - "Doing soft-fail check for %s: state %s", - event.event_id, - current_state_ids, - ) - - # Now check if event pass auth against said current state - auth_types = auth_types_for_event(room_version_obj, event) - current_state_ids_list = [ - e for k, e in current_state_ids.items() if k in auth_types - ] - - auth_events_map = await self.store.get_events(current_state_ids_list) - current_auth_events = { - (e.type, e.state_key): e for e in auth_events_map.values() - } - - try: - event_auth.check(room_version_obj, event, auth_events=current_auth_events) - except AuthError as e: - logger.warning( - "Soft-failing %r (from %s) because %s", - event, - e, - origin, - extra={ - "room_id": event.room_id, - "mxid": event.sender, - "hs": origin, - }, - ) - soft_failed_event_counter.inc() - event.internal_metadata.soft_failed = True - async def on_get_missing_events( self, origin: str, @@ -2562,463 +1115,6 @@ async def on_get_missing_events( return missing_events - async def _check_event_auth( - self, - origin: str, - event: EventBase, - context: EventContext, - state: Optional[Iterable[EventBase]] = None, - auth_events: Optional[MutableStateMap[EventBase]] = None, - backfilled: bool = False, - ) -> EventContext: - """ - Checks whether an event should be rejected (for failing auth checks). - - Args: - origin: The host the event originates from. - event: The event itself. - context: - The event context. - - NB that this function potentially modifies it. - state: - The state events used to check the event for soft-fail. If this is - not provided the current state events will be used. - auth_events: - Map from (event_type, state_key) to event - - Normally, our calculated auth_events based on the state of the room - at the event's position in the DAG, though occasionally (eg if the - event is an outlier), may be the auth events claimed by the remote - server. - - Also NB that this function adds entries to it. - - If this is not provided, it is calculated from the previous state IDs. - backfilled: True if the event was backfilled. - - Returns: - The updated context object. - """ - room_version = await self.store.get_room_version_id(event.room_id) - room_version_obj = KNOWN_ROOM_VERSIONS[room_version] - - if not auth_events: - prev_state_ids = await context.get_prev_state_ids() - auth_events_ids = self._event_auth_handler.compute_auth_events( - event, prev_state_ids, for_verification=True - ) - auth_events_x = await self.store.get_events(auth_events_ids) - auth_events = {(e.type, e.state_key): e for e in auth_events_x.values()} - - # This is a hack to fix some old rooms where the initial join event - # didn't reference the create event in its auth events. - if event.type == EventTypes.Member and not event.auth_event_ids(): - if len(event.prev_event_ids()) == 1 and event.depth < 5: - c = await self.store.get_event( - event.prev_event_ids()[0], allow_none=True - ) - if c and c.type == EventTypes.Create: - auth_events[(c.type, c.state_key)] = c - - try: - context = await self._update_auth_events_and_context_for_auth( - origin, event, context, auth_events - ) - except Exception: - # We don't really mind if the above fails, so lets not fail - # processing if it does. However, it really shouldn't fail so - # let's still log as an exception since we'll still want to fix - # any bugs. - logger.exception( - "Failed to double check auth events for %s with remote. " - "Ignoring failure and continuing processing of event.", - event.event_id, - ) - - try: - event_auth.check(room_version_obj, event, auth_events=auth_events) - except AuthError as e: - logger.warning("Failed auth resolution for %r because %s", event, e) - context.rejected = RejectedReason.AUTH_ERROR - - if not context.rejected: - await self._check_for_soft_fail(event, state, backfilled, origin=origin) - - if event.type == EventTypes.GuestAccess and not context.rejected: - await self.maybe_kick_guest_users(event) - - # If we are going to send this event over federation we precaclculate - # the joined hosts. - if event.internal_metadata.get_send_on_behalf_of(): - await self.event_creation_handler.cache_joined_hosts_for_event( - event, context - ) - - return context - - async def _update_auth_events_and_context_for_auth( - self, - origin: str, - event: EventBase, - context: EventContext, - auth_events: MutableStateMap[EventBase], - ) -> EventContext: - """Helper for _check_event_auth. See there for docs. - - Checks whether a given event has the expected auth events. If it - doesn't then we talk to the remote server to compare state to see if - we can come to a consensus (e.g. if one server missed some valid - state). - - This attempts to resolve any potential divergence of state between - servers, but is not essential and so failures should not block further - processing of the event. - - Args: - origin: - event: - context: - - auth_events: - Map from (event_type, state_key) to event - - Normally, our calculated auth_events based on the state of the room - at the event's position in the DAG, though occasionally (eg if the - event is an outlier), may be the auth events claimed by the remote - server. - - Also NB that this function adds entries to it. - - Returns: - updated context - """ - event_auth_events = set(event.auth_event_ids()) - - # missing_auth is the set of the event's auth_events which we don't yet have - # in auth_events. - missing_auth = event_auth_events.difference( - e.event_id for e in auth_events.values() - ) - - # if we have missing events, we need to fetch those events from somewhere. - # - # we start by checking if they are in the store, and then try calling /event_auth/. - if missing_auth: - have_events = await self.store.have_seen_events(event.room_id, missing_auth) - logger.debug("Events %s are in the store", have_events) - missing_auth.difference_update(have_events) - - if missing_auth: - # If we don't have all the auth events, we need to get them. - logger.info("auth_events contains unknown events: %s", missing_auth) - try: - try: - remote_auth_chain = await self.federation_client.get_event_auth( - origin, event.room_id, event.event_id - ) - except RequestSendFailed as e1: - # The other side isn't around or doesn't implement the - # endpoint, so lets just bail out. - logger.info("Failed to get event auth from remote: %s", e1) - return context - - seen_remotes = await self.store.have_seen_events( - event.room_id, [e.event_id for e in remote_auth_chain] - ) - - for e in remote_auth_chain: - if e.event_id in seen_remotes: - continue - - if e.event_id == event.event_id: - continue - - try: - auth_ids = e.auth_event_ids() - auth = { - (e.type, e.state_key): e - for e in remote_auth_chain - if e.event_id in auth_ids or e.type == EventTypes.Create - } - e.internal_metadata.outlier = True - - logger.debug( - "_check_event_auth %s missing_auth: %s", - event.event_id, - e.event_id, - ) - missing_auth_event_context = ( - await self.state_handler.compute_event_context(e) - ) - await self._auth_and_persist_event( - origin, e, missing_auth_event_context, auth_events=auth - ) - - if e.event_id in event_auth_events: - auth_events[(e.type, e.state_key)] = e - except AuthError: - pass - - except Exception: - logger.exception("Failed to get auth chain") - - if event.internal_metadata.is_outlier(): - # XXX: given that, for an outlier, we'll be working with the - # event's *claimed* auth events rather than those we calculated: - # (a) is there any point in this test, since different_auth below will - # obviously be empty - # (b) alternatively, why don't we do it earlier? - logger.info("Skipping auth_event fetch for outlier") - return context - - different_auth = event_auth_events.difference( - e.event_id for e in auth_events.values() - ) - - if not different_auth: - return context - - logger.info( - "auth_events refers to events which are not in our calculated auth " - "chain: %s", - different_auth, - ) - - # XXX: currently this checks for redactions but I'm not convinced that is - # necessary? - different_events = await self.store.get_events_as_list(different_auth) - - for d in different_events: - if d.room_id != event.room_id: - logger.warning( - "Event %s refers to auth_event %s which is in a different room", - event.event_id, - d.event_id, - ) - - # don't attempt to resolve the claimed auth events against our own - # in this case: just use our own auth events. - # - # XXX: should we reject the event in this case? It feels like we should, - # but then shouldn't we also do so if we've failed to fetch any of the - # auth events? - return context - - # now we state-resolve between our own idea of the auth events, and the remote's - # idea of them. - - local_state = auth_events.values() - remote_auth_events = dict(auth_events) - remote_auth_events.update({(d.type, d.state_key): d for d in different_events}) - remote_state = remote_auth_events.values() - - room_version = await self.store.get_room_version_id(event.room_id) - new_state = await self.state_handler.resolve_events( - room_version, (local_state, remote_state), event - ) - - logger.info( - "After state res: updating auth_events with new state %s", - { - (d.type, d.state_key): d.event_id - for d in new_state.values() - if auth_events.get((d.type, d.state_key)) != d - }, - ) - - auth_events.update(new_state) - - context = await self._update_context_for_auth_events( - event, context, auth_events - ) - - return context - - async def _update_context_for_auth_events( - self, event: EventBase, context: EventContext, auth_events: StateMap[EventBase] - ) -> EventContext: - """Update the state_ids in an event context after auth event resolution, - storing the changes as a new state group. - - Args: - event: The event we're handling the context for - - context: initial event context - - auth_events: Events to update in the event context. - - Returns: - new event context - """ - # exclude the state key of the new event from the current_state in the context. - if event.is_state(): - event_key: Optional[Tuple[str, str]] = (event.type, event.state_key) - else: - event_key = None - state_updates = { - k: a.event_id for k, a in auth_events.items() if k != event_key - } - - current_state_ids = await context.get_current_state_ids() - current_state_ids = dict(current_state_ids) # type: ignore - - current_state_ids.update(state_updates) - - prev_state_ids = await context.get_prev_state_ids() - prev_state_ids = dict(prev_state_ids) - - prev_state_ids.update({k: a.event_id for k, a in auth_events.items()}) - - # create a new state group as a delta from the existing one. - prev_group = context.state_group - state_group = await self.state_store.store_state_group( - event.event_id, - event.room_id, - prev_group=prev_group, - delta_ids=state_updates, - current_state_ids=current_state_ids, - ) - - return EventContext.with_state( - state_group=state_group, - state_group_before_event=context.state_group_before_event, - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, - prev_group=prev_group, - delta_ids=state_updates, - ) - - async def construct_auth_difference( - self, local_auth: Iterable[EventBase], remote_auth: Iterable[EventBase] - ) -> Dict: - """Given a local and remote auth chain, find the differences. This - assumes that we have already processed all events in remote_auth - - Params: - local_auth - remote_auth - - Returns: - dict - """ - - logger.debug("construct_auth_difference Start!") - - # TODO: Make sure we are OK with local_auth or remote_auth having more - # auth events in them than strictly necessary. - - def sort_fun(ev): - return ev.depth, ev.event_id - - logger.debug("construct_auth_difference after sort_fun!") - - # We find the differences by starting at the "bottom" of each list - # and iterating up on both lists. The lists are ordered by depth and - # then event_id, we iterate up both lists until we find the event ids - # don't match. Then we look at depth/event_id to see which side is - # missing that event, and iterate only up that list. Repeat. - - remote_list = list(remote_auth) - remote_list.sort(key=sort_fun) - - local_list = list(local_auth) - local_list.sort(key=sort_fun) - - local_iter = iter(local_list) - remote_iter = iter(remote_list) - - logger.debug("construct_auth_difference before get_next!") - - def get_next(it, opt=None): - try: - return next(it) - except Exception: - return opt - - current_local = get_next(local_iter) - current_remote = get_next(remote_iter) - - logger.debug("construct_auth_difference before while") - - missing_remotes = [] - missing_locals = [] - while current_local or current_remote: - if current_remote is None: - missing_locals.append(current_local) - current_local = get_next(local_iter) - continue - - if current_local is None: - missing_remotes.append(current_remote) - current_remote = get_next(remote_iter) - continue - - if current_local.event_id == current_remote.event_id: - current_local = get_next(local_iter) - current_remote = get_next(remote_iter) - continue - - if current_local.depth < current_remote.depth: - missing_locals.append(current_local) - current_local = get_next(local_iter) - continue - - if current_local.depth > current_remote.depth: - missing_remotes.append(current_remote) - current_remote = get_next(remote_iter) - continue - - # They have the same depth, so we fall back to the event_id order - if current_local.event_id < current_remote.event_id: - missing_locals.append(current_local) - current_local = get_next(local_iter) - - if current_local.event_id > current_remote.event_id: - missing_remotes.append(current_remote) - current_remote = get_next(remote_iter) - continue - - logger.debug("construct_auth_difference after while") - - # missing locals should be sent to the server - # We should find why we are missing remotes, as they will have been - # rejected. - - # Remove events from missing_remotes if they are referencing a missing - # remote. We only care about the "root" rejected ones. - missing_remote_ids = [e.event_id for e in missing_remotes] - base_remote_rejected = list(missing_remotes) - for e in missing_remotes: - for e_id in e.auth_event_ids(): - if e_id in missing_remote_ids: - try: - base_remote_rejected.remove(e) - except ValueError: - pass - - reason_map = {} - - for e in base_remote_rejected: - reason = await self.store.get_rejection_reason(e.event_id) - if reason is None: - # TODO: e is not in the current state, so we should - # construct some proof of that. - continue - - reason_map[e.event_id] = reason - - logger.debug("construct_auth_difference returning") - - return { - "auth_chain": local_auth, - "rejects": { - e.event_id: {"reason": reason_map[e.event_id], "proof": None} - for e in base_remote_rejected - }, - "missing": [e.event_id for e in missing_locals], - } - @log_function async def exchange_third_party_invite( self, sender_user_id: str, target_user_id: str, room_id: str, signed: JsonDict @@ -3037,8 +1133,10 @@ async def exchange_third_party_invite( } if await self._event_auth_handler.check_host_in_room(room_id, self.hs.hostname): - room_version = await self.store.get_room_version_id(room_id) - builder = self.event_builder_factory.new(room_version, event_dict) + room_version_obj = await self.store.get_room_version(room_id) + builder = self.event_builder_factory.for_room_version( + room_version_obj, event_dict + ) EventValidator().validate_builder(builder) event, context = await self.event_creation_handler.create_new_client_event( @@ -3046,7 +1144,7 @@ async def exchange_third_party_invite( ) event, context = await self.add_display_name_to_third_party_invite( - room_version, event_dict, event, context + room_version_obj, event_dict, event, context ) EventValidator().validate_new(event, self.config) @@ -3056,8 +1154,9 @@ async def exchange_third_party_invite( event.internal_metadata.send_on_behalf_of = self.hs.hostname try: - await self._event_auth_handler.check_from_context( - room_version, event, context + validate_event_for_room_version(room_version_obj, event) + await self._event_auth_handler.check_auth_rules_from_context( + room_version_obj, event, context ) except AuthError as e: logger.warning("Denying new third party invite %r because %s", event, e) @@ -3091,22 +1190,25 @@ async def on_exchange_third_party_invite_request( """ assert_params_in_dict(event_dict, ["room_id"]) - room_version = await self.store.get_room_version_id(event_dict["room_id"]) + room_version_obj = await self.store.get_room_version(event_dict["room_id"]) # NB: event_dict has a particular specced format we might need to fudge # if we change event formats too much. - builder = self.event_builder_factory.new(room_version, event_dict) + builder = self.event_builder_factory.for_room_version( + room_version_obj, event_dict + ) event, context = await self.event_creation_handler.create_new_client_event( builder=builder ) event, context = await self.add_display_name_to_third_party_invite( - room_version, event_dict, event, context + room_version_obj, event_dict, event, context ) try: - await self._event_auth_handler.check_from_context( - room_version, event, context + validate_event_for_room_version(room_version_obj, event) + await self._event_auth_handler.check_auth_rules_from_context( + room_version_obj, event, context ) except AuthError as e: logger.warning("Denying third party invite %r because %s", event, e) @@ -3123,7 +1225,7 @@ async def on_exchange_third_party_invite_request( async def add_display_name_to_third_party_invite( self, - room_version: str, + room_version_obj: RoomVersion, event_dict: JsonDict, event: EventBase, context: EventContext, @@ -3155,13 +1257,15 @@ async def add_display_name_to_third_party_invite( # auth checks. If we need the invite and don't have it then the # auth check code will explode appropriately. - builder = self.event_builder_factory.new(room_version, event_dict) + builder = self.event_builder_factory.for_room_version( + room_version_obj, event_dict + ) EventValidator().validate_builder(builder) event, context = await self.event_creation_handler.create_new_client_event( builder=builder ) EventValidator().validate_new(event, self.config) - return (event, context) + return event, context async def _check_signature(self, event: EventBase, context: EventContext) -> None: """ @@ -3275,99 +1379,6 @@ async def _check_key_revocation(self, public_key: str, url: str) -> None: if "valid" not in response or not response["valid"]: raise AuthError(403, "Third party certificate was invalid") - async def persist_events_and_notify( - self, - room_id: str, - event_and_contexts: Sequence[Tuple[EventBase, EventContext]], - backfilled: bool = False, - ) -> int: - """Persists events and tells the notifier/pushers about them, if - necessary. - - Args: - room_id: The room ID of events being persisted. - event_and_contexts: Sequence of events with their associated - context that should be persisted. All events must belong to - the same room. - backfilled: Whether these events are a result of - backfilling or not - - Returns: - The stream ID after which all events have been persisted. - """ - if not event_and_contexts: - return self.store.get_current_events_token() - - instance = self.config.worker.events_shard_config.get_instance(room_id) - if instance != self._instance_name: - # Limit the number of events sent over replication. We choose 200 - # here as that is what we default to in `max_request_body_size(..)` - for batch in batch_iter(event_and_contexts, 200): - result = await self._send_events( - instance_name=instance, - store=self.store, - room_id=room_id, - event_and_contexts=batch, - backfilled=backfilled, - ) - return result["max_stream_id"] - else: - assert self.storage.persistence - - # Note that this returns the events that were persisted, which may not be - # the same as were passed in if some were deduplicated due to transaction IDs. - events, max_stream_token = await self.storage.persistence.persist_events( - event_and_contexts, backfilled=backfilled - ) - - if self._ephemeral_messages_enabled: - for event in events: - # If there's an expiry timestamp on the event, schedule its expiry. - self._message_handler.maybe_schedule_expiry(event) - - if not backfilled: # Never notify for backfilled events - for event in events: - await self._notify_persisted_event(event, max_stream_token) - - return max_stream_token.stream - - async def _notify_persisted_event( - self, event: EventBase, max_stream_token: RoomStreamToken - ) -> None: - """Checks to see if notifier/pushers should be notified about the - event or not. - - Args: - event: - max_stream_id: The max_stream_id returned by persist_events - """ - - extra_users = [] - if event.type == EventTypes.Member: - target_user_id = event.state_key - - # We notify for memberships if its an invite for one of our - # users - if event.internal_metadata.is_outlier(): - if event.membership != Membership.INVITE: - if not self.is_mine_id(target_user_id): - return - - target_user = UserID.from_string(target_user_id) - extra_users.append(target_user) - elif event.internal_metadata.is_outlier(): - return - - # the event has been persisted so it should have a stream ordering. - assert event.internal_metadata.stream_ordering - - event_pos = PersistedEventPosition( - self._instance_name, event.internal_metadata.stream_ordering - ) - self.notifier.on_new_room_event( - event, event_pos, max_stream_token, extra_users=extra_users - ) - async def _clean_room_for_join(self, room_id: str) -> None: """Called to clean up any data in DB for a given room, ready for the server to join the room. @@ -3375,7 +1386,7 @@ async def _clean_room_for_join(self, room_id: str) -> None: Args: room_id """ - if self.config.worker_app: + if self.config.worker.worker_app: await self._clean_room_for_join_client(room_id) else: await self.store.clean_room_for_join(room_id) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py new file mode 100644 index 000000000000..9917613298c6 --- /dev/null +++ b/synapse/handlers/federation_event.py @@ -0,0 +1,1941 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +import logging +from http import HTTPStatus +from typing import ( + TYPE_CHECKING, + Collection, + Container, + Dict, + Iterable, + List, + Optional, + Sequence, + Set, + Tuple, +) + +from prometheus_client import Counter + +from synapse.api.constants import ( + EventContentFields, + EventTypes, + GuestAccess, + Membership, + RejectedReason, + RoomEncryptionAlgorithms, +) +from synapse.api.errors import ( + AuthError, + Codes, + FederationError, + HttpResponseException, + RequestSendFailed, + SynapseError, +) +from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion, RoomVersions +from synapse.event_auth import ( + auth_types_for_event, + check_auth_rules_for_event, + validate_event_for_room_version, +) +from synapse.events import EventBase +from synapse.events.snapshot import EventContext +from synapse.federation.federation_client import InvalidResponseError +from synapse.logging.context import nested_logging_context, run_in_background +from synapse.logging.utils import log_function +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet +from synapse.replication.http.federation import ( + ReplicationFederationSendEventsRestServlet, +) +from synapse.state import StateResolutionStore +from synapse.storage.databases.main.events_worker import EventRedactBehaviour +from synapse.types import ( + PersistedEventPosition, + RoomStreamToken, + StateMap, + UserID, + get_domain_from_id, +) +from synapse.util.async_helpers import Linearizer, concurrently_execute +from synapse.util.iterutils import batch_iter +from synapse.util.retryutils import NotRetryingDestination +from synapse.util.stringutils import shortstr + +if TYPE_CHECKING: + from synapse.server import HomeServer + + +logger = logging.getLogger(__name__) + +soft_failed_event_counter = Counter( + "synapse_federation_soft_failed_events_total", + "Events received over federation that we marked as soft_failed", +) + + +class FederationEventHandler: + """Handles events that originated from federation. + + Responsible for handing incoming events and passing them on to the rest + of the homeserver (including auth and state conflict resolutions) + """ + + def __init__(self, hs: "HomeServer"): + self._store = hs.get_datastore() + self._storage = hs.get_storage() + self._state_store = self._storage.state + + self._state_handler = hs.get_state_handler() + self._event_creation_handler = hs.get_event_creation_handler() + self._event_auth_handler = hs.get_event_auth_handler() + self._message_handler = hs.get_message_handler() + self._action_generator = hs.get_action_generator() + self._state_resolution_handler = hs.get_state_resolution_handler() + # avoid a circular dependency by deferring execution here + self._get_room_member_handler = hs.get_room_member_handler + + self._federation_client = hs.get_federation_client() + self._third_party_event_rules = hs.get_third_party_event_rules() + self._notifier = hs.get_notifier() + + self._is_mine_id = hs.is_mine_id + self._server_name = hs.hostname + self._instance_name = hs.get_instance_name() + + self._config = hs.config + self._ephemeral_messages_enabled = hs.config.server.enable_ephemeral_messages + + self._send_events = ReplicationFederationSendEventsRestServlet.make_client(hs) + if hs.config.worker.worker_app: + self._user_device_resync = ( + ReplicationUserDevicesResyncRestServlet.make_client(hs) + ) + else: + self._device_list_updater = hs.get_device_handler().device_list_updater + + # When joining a room we need to queue any events for that room up. + # For each room, a list of (pdu, origin) tuples. + # TODO: replace this with something more elegant, probably based around the + # federation event staging area. + self.room_queues: Dict[str, List[Tuple[EventBase, str]]] = {} + + self._room_pdu_linearizer = Linearizer("fed_room_pdu") + + async def on_receive_pdu(self, origin: str, pdu: EventBase) -> None: + """Process a PDU received via a federation /send/ transaction + + Args: + origin: server which initiated the /send/ transaction. Will + be used to fetch missing events or state. + pdu: received PDU + """ + + # We should never see any outliers here. + assert not pdu.internal_metadata.outlier + + room_id = pdu.room_id + event_id = pdu.event_id + + # We reprocess pdus when we have seen them only as outliers + existing = await self._store.get_event( + event_id, allow_none=True, allow_rejected=True + ) + + # FIXME: Currently we fetch an event again when we already have it + # if it has been marked as an outlier. + if existing: + if not existing.internal_metadata.is_outlier(): + logger.info( + "Ignoring received event %s which we have already seen", event_id + ) + return + if pdu.internal_metadata.is_outlier(): + logger.info( + "Ignoring received outlier %s which we already have as an outlier", + event_id, + ) + return + logger.info("De-outliering event %s", event_id) + + # do some initial sanity-checking of the event. In particular, make + # sure it doesn't have hundreds of prev_events or auth_events, which + # could cause a huge state resolution or cascade of event fetches. + try: + self._sanity_check_event(pdu) + except SynapseError as err: + logger.warning("Received event failed sanity checks") + raise FederationError("ERROR", err.code, err.msg, affected=pdu.event_id) + + # If we are currently in the process of joining this room, then we + # queue up events for later processing. + if room_id in self.room_queues: + logger.info( + "Queuing PDU from %s for now: join in progress", + origin, + ) + self.room_queues[room_id].append((pdu, origin)) + return + + # If we're not in the room just ditch the event entirely. This is + # probably an old server that has come back and thinks we're still in + # the room (or we've been rejoined to the room by a state reset). + # + # Note that if we were never in the room then we would have already + # dropped the event, since we wouldn't know the room version. + is_in_room = await self._event_auth_handler.check_host_in_room( + room_id, self._server_name + ) + if not is_in_room: + logger.info( + "Ignoring PDU from %s as we're not in the room", + origin, + ) + return None + + # Try to fetch any missing prev events to fill in gaps in the graph + prevs = set(pdu.prev_event_ids()) + seen = await self._store.have_events_in_timeline(prevs) + missing_prevs = prevs - seen + + if missing_prevs: + # We only backfill backwards to the min depth. + min_depth = await self._store.get_min_depth(pdu.room_id) + logger.debug("min_depth: %d", min_depth) + + if min_depth is not None and pdu.depth > min_depth: + # If we're missing stuff, ensure we only fetch stuff one + # at a time. + logger.info( + "Acquiring room lock to fetch %d missing prev_events: %s", + len(missing_prevs), + shortstr(missing_prevs), + ) + with (await self._room_pdu_linearizer.queue(pdu.room_id)): + logger.info( + "Acquired room lock to fetch %d missing prev_events", + len(missing_prevs), + ) + + try: + await self._get_missing_events_for_pdu( + origin, pdu, prevs, min_depth + ) + except Exception as e: + raise Exception( + "Error fetching missing prev_events for %s: %s" + % (event_id, e) + ) from e + + # Update the set of things we've seen after trying to + # fetch the missing stuff + seen = await self._store.have_events_in_timeline(prevs) + missing_prevs = prevs - seen + + if not missing_prevs: + logger.info("Found all missing prev_events") + + if missing_prevs: + # since this event was pushed to us, it is possible for it to + # become the only forward-extremity in the room, and we would then + # trust its state to be the state for the whole room. This is very + # bad. Further, if the event was pushed to us, there is no excuse + # for us not to have all the prev_events. (XXX: apart from + # min_depth?) + # + # We therefore reject any such events. + logger.warning( + "Rejecting: failed to fetch %d prev events: %s", + len(missing_prevs), + shortstr(missing_prevs), + ) + raise FederationError( + "ERROR", + 403, + ( + "Your server isn't divulging details about prev_events " + "referenced in this event." + ), + affected=pdu.event_id, + ) + + await self._process_received_pdu(origin, pdu, state=None) + + @log_function + async def on_send_membership_event( + self, origin: str, event: EventBase + ) -> Tuple[EventBase, EventContext]: + """ + We have received a join/leave/knock event for a room via send_join/leave/knock. + + Verify that event and send it into the room on the remote homeserver's behalf. + + This is quite similar to on_receive_pdu, with the following principal + differences: + * only membership events are permitted (and only events with + sender==state_key -- ie, no kicks or bans) + * *We* send out the event on behalf of the remote server. + * We enforce the membership restrictions of restricted rooms. + * Rejected events result in an exception rather than being stored. + + There are also other differences, however it is not clear if these are by + design or omission. In particular, we do not attempt to backfill any missing + prev_events. + + Args: + origin: The homeserver of the remote (joining/invited/knocking) user. + event: The member event that has been signed by the remote homeserver. + + Returns: + The event and context of the event after inserting it into the room graph. + + Raises: + SynapseError if the event is not accepted into the room + """ + logger.debug( + "on_send_membership_event: Got event: %s, signatures: %s", + event.event_id, + event.signatures, + ) + + if get_domain_from_id(event.sender) != origin: + logger.info( + "Got send_membership request for user %r from different origin %s", + event.sender, + origin, + ) + raise SynapseError(403, "User not from origin", Codes.FORBIDDEN) + + if event.sender != event.state_key: + raise SynapseError(400, "state_key and sender must match", Codes.BAD_JSON) + + assert not event.internal_metadata.outlier + + # Send this event on behalf of the other server. + # + # The remote server isn't a full participant in the room at this point, so + # may not have an up-to-date list of the other homeservers participating in + # the room, so we send it on their behalf. + event.internal_metadata.send_on_behalf_of = origin + + context = await self._state_handler.compute_event_context(event) + context = await self._check_event_auth(origin, event, context) + if context.rejected: + raise SynapseError( + 403, f"{event.membership} event was rejected", Codes.FORBIDDEN + ) + + # for joins, we need to check the restrictions of restricted rooms + if event.membership == Membership.JOIN: + await self.check_join_restrictions(context, event) + + # for knock events, we run the third-party event rules. It's not entirely clear + # why we don't do this for other sorts of membership events. + if event.membership == Membership.KNOCK: + event_allowed, _ = await self._third_party_event_rules.check_event_allowed( + event, context + ) + if not event_allowed: + logger.info("Sending of knock %s forbidden by third-party rules", event) + raise SynapseError( + 403, "This event is not allowed in this context", Codes.FORBIDDEN + ) + + # all looks good, we can persist the event. + + # First, precalculate the joined hosts so that the federation sender doesn't + # need to. + await self._event_creation_handler.cache_joined_hosts_for_event(event, context) + + await self._check_for_soft_fail(event, None, origin=origin) + await self._run_push_actions_and_persist_event(event, context) + return event, context + + async def check_join_restrictions( + self, context: EventContext, event: EventBase + ) -> None: + """Check that restrictions in restricted join rules are matched + + Called when we receive a join event via send_join. + + Raises an auth error if the restrictions are not matched. + """ + prev_state_ids = await context.get_prev_state_ids() + + # Check if the user is already in the room or invited to the room. + user_id = event.state_key + prev_member_event_id = prev_state_ids.get((EventTypes.Member, user_id), None) + prev_member_event = None + if prev_member_event_id: + prev_member_event = await self._store.get_event(prev_member_event_id) + + # Check if the member should be allowed access via membership in a space. + await self._event_auth_handler.check_restricted_join_rules( + prev_state_ids, + event.room_version, + user_id, + prev_member_event, + ) + + async def process_remote_join( + self, + origin: str, + room_id: str, + auth_events: List[EventBase], + state: List[EventBase], + event: EventBase, + room_version: RoomVersion, + ) -> int: + """Persists the events returned by a send_join + + Checks the auth chain is valid (and passes auth checks) for the + state and event. Then persists all of the events. + Notifies about the persisted events where appropriate. + + Args: + origin: Where the events came from + room_id: + auth_events + state + event + room_version: The room version we expect this room to have, and + will raise if it doesn't match the version in the create event. + + Returns: + The stream ID after which all events have been persisted. + + Raises: + SynapseError if the response is in some way invalid. + """ + for e in itertools.chain(auth_events, state): + e.internal_metadata.outlier = True + + event_map = {e.event_id: e for e in itertools.chain(auth_events, state)} + + create_event = None + for e in auth_events: + if (e.type, e.state_key) == (EventTypes.Create, ""): + create_event = e + break + + if create_event is None: + # If the state doesn't have a create event then the room is + # invalid, and it would fail auth checks anyway. + raise SynapseError(400, "No create event in state") + + room_version_id = create_event.content.get( + "room_version", RoomVersions.V1.identifier + ) + + if room_version.identifier != room_version_id: + raise SynapseError(400, "Room version mismatch") + + # filter out any events we have already seen + seen_remotes = await self._store.have_seen_events(room_id, event_map.keys()) + for s in seen_remotes: + event_map.pop(s, None) + + # persist the auth chain and state events. + # + # any invalid events here will be marked as rejected, and we'll carry on. + # + # any events whose auth events are missing (ie, not in the send_join response, + # and not already in our db) will just be ignored. This is correct behaviour, + # because the reason that auth_events are missing might be due to us being + # unable to validate their signatures. The fact that we can't validate their + # signatures right now doesn't mean that we will *never* be able to, so it + # is premature to reject them. + # + await self._auth_and_persist_outliers(room_id, event_map.values()) + + # and now persist the join event itself. + logger.info("Peristing join-via-remote %s", event) + with nested_logging_context(suffix=event.event_id): + context = await self._state_handler.compute_event_context( + event, old_state=state + ) + + context = await self._check_event_auth(origin, event, context) + if context.rejected: + raise SynapseError(400, "Join event was rejected") + + return await self.persist_events_and_notify(room_id, [(event, context)]) + + @log_function + async def backfill( + self, dest: str, room_id: str, limit: int, extremities: Collection[str] + ) -> None: + """Trigger a backfill request to `dest` for the given `room_id` + + This will attempt to get more events from the remote. If the other side + has no new events to offer, this will return an empty list. + + As the events are received, we check their signatures, and also do some + sanity-checking on them. If any of the backfilled events are invalid, + this method throws a SynapseError. + + We might also raise an InvalidResponseError if the response from the remote + server is just bogus. + + TODO: make this more useful to distinguish failures of the remote + server from invalid events (there is probably no point in trying to + re-fetch invalid events from every other HS in the room.) + """ + if dest == self._server_name: + raise SynapseError(400, "Can't backfill from self.") + + events = await self._federation_client.backfill( + dest, room_id, limit=limit, extremities=extremities + ) + + if not events: + return + + # if there are any events in the wrong room, the remote server is buggy and + # should not be trusted. + for ev in events: + if ev.room_id != room_id: + raise InvalidResponseError( + f"Remote server {dest} returned event {ev.event_id} which is in " + f"room {ev.room_id}, when we were backfilling in {room_id}" + ) + + await self._process_pulled_events(dest, events, backfilled=True) + + async def _get_missing_events_for_pdu( + self, origin: str, pdu: EventBase, prevs: Set[str], min_depth: int + ) -> None: + """ + Args: + origin: Origin of the pdu. Will be called to get the missing events + pdu: received pdu + prevs: List of event ids which we are missing + min_depth: Minimum depth of events to return. + """ + + room_id = pdu.room_id + event_id = pdu.event_id + + seen = await self._store.have_events_in_timeline(prevs) + + if not prevs - seen: + return + + latest_list = await self._store.get_latest_event_ids_in_room(room_id) + + # We add the prev events that we have seen to the latest + # list to ensure the remote server doesn't give them to us + latest = set(latest_list) + latest |= seen + + logger.info( + "Requesting missing events between %s and %s", + shortstr(latest), + event_id, + ) + + # XXX: we set timeout to 10s to help workaround + # https://github.com/matrix-org/synapse/issues/1733. + # The reason is to avoid holding the linearizer lock + # whilst processing inbound /send transactions, causing + # FDs to stack up and block other inbound transactions + # which empirically can currently take up to 30 minutes. + # + # N.B. this explicitly disables retry attempts. + # + # N.B. this also increases our chances of falling back to + # fetching fresh state for the room if the missing event + # can't be found, which slightly reduces our security. + # it may also increase our DAG extremity count for the room, + # causing additional state resolution? See #1760. + # However, fetching state doesn't hold the linearizer lock + # apparently. + # + # see https://github.com/matrix-org/synapse/pull/1744 + # + # ---- + # + # Update richvdh 2018/09/18: There are a number of problems with timing this + # request out aggressively on the client side: + # + # - it plays badly with the server-side rate-limiter, which starts tarpitting you + # if you send too many requests at once, so you end up with the server carefully + # working through the backlog of your requests, which you have already timed + # out. + # + # - for this request in particular, we now (as of + # https://github.com/matrix-org/synapse/pull/3456) reject any PDUs where the + # server can't produce a plausible-looking set of prev_events - so we becone + # much more likely to reject the event. + # + # - contrary to what it says above, we do *not* fall back to fetching fresh state + # for the room if get_missing_events times out. Rather, we give up processing + # the PDU whose prevs we are missing, which then makes it much more likely that + # we'll end up back here for the *next* PDU in the list, which exacerbates the + # problem. + # + # - the aggressive 10s timeout was introduced to deal with incoming federation + # requests taking 8 hours to process. It's not entirely clear why that was going + # on; certainly there were other issues causing traffic storms which are now + # resolved, and I think in any case we may be more sensible about our locking + # now. We're *certainly* more sensible about our logging. + # + # All that said: Let's try increasing the timeout to 60s and see what happens. + + try: + missing_events = await self._federation_client.get_missing_events( + origin, + room_id, + earliest_events_ids=list(latest), + latest_events=[pdu], + limit=10, + min_depth=min_depth, + timeout=60000, + ) + except (RequestSendFailed, HttpResponseException, NotRetryingDestination) as e: + # We failed to get the missing events, but since we need to handle + # the case of `get_missing_events` not returning the necessary + # events anyway, it is safe to simply log the error and continue. + logger.warning("Failed to get prev_events: %s", e) + return + + logger.info("Got %d prev_events", len(missing_events)) + await self._process_pulled_events(origin, missing_events, backfilled=False) + + async def _process_pulled_events( + self, origin: str, events: Iterable[EventBase], backfilled: bool + ) -> None: + """Process a batch of events we have pulled from a remote server + + Pulls in any events required to auth the events, persists the received events, + and notifies clients, if appropriate. + + Assumes the events have already had their signatures and hashes checked. + + Params: + origin: The server we received these events from + events: The received events. + backfilled: True if this is part of a historical batch of events (inhibits + notification to clients, and validation of device keys.) + """ + + # We want to sort these by depth so we process them and + # tell clients about them in order. + sorted_events = sorted(events, key=lambda x: x.depth) + + for ev in sorted_events: + with nested_logging_context(ev.event_id): + await self._process_pulled_event(origin, ev, backfilled=backfilled) + + async def _process_pulled_event( + self, origin: str, event: EventBase, backfilled: bool + ) -> None: + """Process a single event that we have pulled from a remote server + + Pulls in any events required to auth the event, persists the received event, + and notifies clients, if appropriate. + + Assumes the event has already had its signatures and hashes checked. + + This is somewhat equivalent to on_receive_pdu, but applies somewhat different + logic in the case that we are missing prev_events (in particular, it just + requests the state at that point, rather than triggering a get_missing_events) - + so is appropriate when we have pulled the event from a remote server, rather + than having it pushed to us. + + Params: + origin: The server we received this event from + events: The received event + backfilled: True if this is part of a historical batch of events (inhibits + notification to clients, and validation of device keys.) + """ + logger.info("Processing pulled event %s", event) + + # these should not be outliers. + assert not event.internal_metadata.is_outlier() + + event_id = event.event_id + + existing = await self._store.get_event( + event_id, allow_none=True, allow_rejected=True + ) + if existing: + if not existing.internal_metadata.is_outlier(): + logger.info( + "Ignoring received event %s which we have already seen", + event_id, + ) + return + logger.info("De-outliering event %s", event_id) + + try: + self._sanity_check_event(event) + except SynapseError as err: + logger.warning("Event %s failed sanity check: %s", event_id, err) + return + + try: + state = await self._resolve_state_at_missing_prevs(origin, event) + await self._process_received_pdu( + origin, event, state=state, backfilled=backfilled + ) + except FederationError as e: + if e.code == 403: + logger.warning("Pulled event %s failed history check.", event_id) + else: + raise + + async def _resolve_state_at_missing_prevs( + self, dest: str, event: EventBase + ) -> Optional[Iterable[EventBase]]: + """Calculate the state at an event with missing prev_events. + + This is used when we have pulled a batch of events from a remote server, and + still don't have all the prev_events. + + If we already have all the prev_events for `event`, this method does nothing. + + Otherwise, the missing prevs become new backwards extremities, and we fall back + to asking the remote server for the state after each missing `prev_event`, + and resolving across them. + + That's ok provided we then resolve the state against other bits of the DAG + before using it - in other words, that the received event `event` is not going + to become the only forwards_extremity in the room (which will ensure that you + can't just take over a room by sending an event, withholding its prev_events, + and declaring yourself to be an admin in the subsequent state request). + + In other words: we should only call this method if `event` has been *pulled* + as part of a batch of missing prev events, or similar. + + Params: + dest: the remote server to ask for state at the missing prevs. Typically, + this will be the server we got `event` from. + event: an event to check for missing prevs. + + Returns: + if we already had all the prev events, `None`. Otherwise, returns a list of + the events in the state at `event`. + """ + room_id = event.room_id + event_id = event.event_id + + prevs = set(event.prev_event_ids()) + seen = await self._store.have_events_in_timeline(prevs) + missing_prevs = prevs - seen + + if not missing_prevs: + return None + + logger.info( + "Event %s is missing prev_events %s: calculating state for a " + "backwards extremity", + event_id, + shortstr(missing_prevs), + ) + # Calculate the state after each of the previous events, and + # resolve them to find the correct state at the current event. + event_map = {event_id: event} + try: + # Get the state of the events we know about + ours = await self._state_store.get_state_groups_ids(room_id, seen) + + # state_maps is a list of mappings from (type, state_key) to event_id + state_maps: List[StateMap[str]] = list(ours.values()) + + # we don't need this any more, let's delete it. + del ours + + # Ask the remote server for the states we don't + # know about + for p in missing_prevs: + logger.info("Requesting state after missing prev_event %s", p) + + with nested_logging_context(p): + # note that if any of the missing prevs share missing state or + # auth events, the requests to fetch those events are deduped + # by the get_pdu_cache in federation_client. + remote_state = await self._get_state_after_missing_prev_event( + dest, room_id, p + ) + + remote_state_map = { + (x.type, x.state_key): x.event_id for x in remote_state + } + state_maps.append(remote_state_map) + + for x in remote_state: + event_map[x.event_id] = x + + room_version = await self._store.get_room_version_id(room_id) + state_map = await self._state_resolution_handler.resolve_events_with_store( + room_id, + room_version, + state_maps, + event_map, + state_res_store=StateResolutionStore(self._store), + ) + + # We need to give _process_received_pdu the actual state events + # rather than event ids, so generate that now. + + # First though we need to fetch all the events that are in + # state_map, so we can build up the state below. + evs = await self._store.get_events( + list(state_map.values()), + get_prev_content=False, + redact_behaviour=EventRedactBehaviour.AS_IS, + ) + event_map.update(evs) + + state = [event_map[e] for e in state_map.values()] + except Exception: + logger.warning( + "Error attempting to resolve state at missing prev_events", + exc_info=True, + ) + raise FederationError( + "ERROR", + 403, + "We can't get valid state history.", + affected=event_id, + ) + return state + + async def _get_state_after_missing_prev_event( + self, + destination: str, + room_id: str, + event_id: str, + ) -> List[EventBase]: + """Requests all of the room state at a given event from a remote homeserver. + + Args: + destination: The remote homeserver to query for the state. + room_id: The id of the room we're interested in. + event_id: The id of the event we want the state at. + + Returns: + A list of events in the state, including the event itself + """ + ( + state_event_ids, + auth_event_ids, + ) = await self._federation_client.get_room_state_ids( + destination, room_id, event_id=event_id + ) + + logger.debug( + "state_ids returned %i state events, %i auth events", + len(state_event_ids), + len(auth_event_ids), + ) + + # start by just trying to fetch the events from the store + desired_events = set(state_event_ids) + desired_events.add(event_id) + logger.debug("Fetching %i events from cache/store", len(desired_events)) + fetched_events = await self._store.get_events( + desired_events, allow_rejected=True + ) + + missing_desired_events = desired_events - fetched_events.keys() + logger.debug( + "We are missing %i events (got %i)", + len(missing_desired_events), + len(fetched_events), + ) + + # We probably won't need most of the auth events, so let's just check which + # we have for now, rather than thrashing the event cache with them all + # unnecessarily. + + # TODO: we probably won't actually need all of the auth events, since we + # already have a bunch of the state events. It would be nice if the + # federation api gave us a way of finding out which we actually need. + + missing_auth_events = set(auth_event_ids) - fetched_events.keys() + missing_auth_events.difference_update( + await self._store.have_seen_events(room_id, missing_auth_events) + ) + logger.debug("We are also missing %i auth events", len(missing_auth_events)) + + missing_events = missing_desired_events | missing_auth_events + logger.debug("Fetching %i events from remote", len(missing_events)) + await self._get_events_and_persist( + destination=destination, room_id=room_id, event_ids=missing_events + ) + + # we need to make sure we re-load from the database to get the rejected + # state correct. + fetched_events.update( + await self._store.get_events(missing_desired_events, allow_rejected=True) + ) + + # check for events which were in the wrong room. + # + # this can happen if a remote server claims that the state or + # auth_events at an event in room A are actually events in room B + + bad_events = [ + (event_id, event.room_id) + for event_id, event in fetched_events.items() + if event.room_id != room_id + ] + + for bad_event_id, bad_room_id in bad_events: + # This is a bogus situation, but since we may only discover it a long time + # after it happened, we try our best to carry on, by just omitting the + # bad events from the returned state set. + logger.warning( + "Remote server %s claims event %s in room %s is an auth/state " + "event in room %s", + destination, + bad_event_id, + bad_room_id, + room_id, + ) + + del fetched_events[bad_event_id] + + # if we couldn't get the prev event in question, that's a problem. + remote_event = fetched_events.get(event_id) + if not remote_event: + raise Exception("Unable to get missing prev_event %s" % (event_id,)) + + # missing state at that event is a warning, not a blocker + # XXX: this doesn't sound right? it means that we'll end up with incomplete + # state. + failed_to_fetch = desired_events - fetched_events.keys() + if failed_to_fetch: + logger.warning( + "Failed to fetch missing state events for %s %s", + event_id, + failed_to_fetch, + ) + + remote_state = [ + fetched_events[e_id] for e_id in state_event_ids if e_id in fetched_events + ] + + if remote_event.is_state() and remote_event.rejected_reason is None: + remote_state.append(remote_event) + + return remote_state + + async def _process_received_pdu( + self, + origin: str, + event: EventBase, + state: Optional[Iterable[EventBase]], + backfilled: bool = False, + ) -> None: + """Called when we have a new non-outlier event. + + This is called when we have a new event to add to the room DAG. This can be + due to: + * events received directly via a /send request + * events retrieved via get_missing_events after a /send request + * events backfilled after a client request. + + It's not currently used for events received from incoming send_{join,knock,leave} + requests (which go via on_send_membership_event), nor for joins created by a + remote join dance (which go via process_remote_join). + + We need to do auth checks and put it through the StateHandler. + + Args: + origin: server sending the event + + event: event to be persisted + + state: Normally None, but if we are handling a gap in the graph + (ie, we are missing one or more prev_events), the resolved state at the + event + + backfilled: True if this is part of a historical batch of events (inhibits + notification to clients, and validation of device keys.) + """ + logger.debug("Processing event: %s", event) + assert not event.internal_metadata.outlier + + try: + context = await self._state_handler.compute_event_context( + event, old_state=state + ) + context = await self._check_event_auth( + origin, + event, + context, + ) + except AuthError as e: + # FIXME richvdh 2021/10/07 I don't think this is reachable. Let's log it + # for now + logger.exception("Unexpected AuthError from _check_event_auth") + raise FederationError("ERROR", e.code, e.msg, affected=event.event_id) + + if not backfilled and not context.rejected: + # For new (non-backfilled and non-outlier) events we check if the event + # passes auth based on the current state. If it doesn't then we + # "soft-fail" the event. + await self._check_for_soft_fail(event, state, origin=origin) + + await self._run_push_actions_and_persist_event(event, context, backfilled) + + if backfilled or context.rejected: + return + + await self._maybe_kick_guest_users(event) + + # For encrypted messages we check that we know about the sending device, + # if we don't then we mark the device cache for that user as stale. + if event.type == EventTypes.Encrypted: + device_id = event.content.get("device_id") + sender_key = event.content.get("sender_key") + + cached_devices = await self._store.get_cached_devices_for_user(event.sender) + + resync = False # Whether we should resync device lists. + + device = None + if device_id is not None: + device = cached_devices.get(device_id) + if device is None: + logger.info( + "Received event from remote device not in our cache: %s %s", + event.sender, + device_id, + ) + resync = True + + # We also check if the `sender_key` matches what we expect. + if sender_key is not None: + # Figure out what sender key we're expecting. If we know the + # device and recognize the algorithm then we can work out the + # exact key to expect. Otherwise check it matches any key we + # have for that device. + + current_keys: Container[str] = [] + + if device: + keys = device.get("keys", {}).get("keys", {}) + + if ( + event.content.get("algorithm") + == RoomEncryptionAlgorithms.MEGOLM_V1_AES_SHA2 + ): + # For this algorithm we expect a curve25519 key. + key_name = "curve25519:%s" % (device_id,) + current_keys = [keys.get(key_name)] + else: + # We don't know understand the algorithm, so we just + # check it matches a key for the device. + current_keys = keys.values() + elif device_id: + # We don't have any keys for the device ID. + pass + else: + # The event didn't include a device ID, so we just look for + # keys across all devices. + current_keys = [ + key + for device in cached_devices.values() + for key in device.get("keys", {}).get("keys", {}).values() + ] + + # We now check that the sender key matches (one of) the expected + # keys. + if sender_key not in current_keys: + logger.info( + "Received event from remote device with unexpected sender key: %s %s: %s", + event.sender, + device_id or "", + sender_key, + ) + resync = True + + if resync: + run_as_background_process( + "resync_device_due_to_pdu", + self._resync_device, + event.sender, + ) + + await self._handle_marker_event(origin, event) + + async def _resync_device(self, sender: str) -> None: + """We have detected that the device list for the given user may be out + of sync, so we try and resync them. + """ + + try: + await self._store.mark_remote_user_device_cache_as_stale(sender) + + # Immediately attempt a resync in the background + if self._config.worker.worker_app: + await self._user_device_resync(user_id=sender) + else: + await self._device_list_updater.user_device_resync(sender) + except Exception: + logger.exception("Failed to resync device for %s", sender) + + async def _handle_marker_event(self, origin: str, marker_event: EventBase) -> None: + """Handles backfilling the insertion event when we receive a marker + event that points to one. + + Args: + origin: Origin of the event. Will be called to get the insertion event + marker_event: The event to process + """ + + if marker_event.type != EventTypes.MSC2716_MARKER: + # Not a marker event + return + + if marker_event.rejected_reason is not None: + # Rejected event + return + + # Skip processing a marker event if the room version doesn't + # support it or the event is not from the room creator. + room_version = await self._store.get_room_version(marker_event.room_id) + create_event = await self._store.get_create_event_for_room(marker_event.room_id) + room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR) + if not room_version.msc2716_historical and ( + not self._config.experimental.msc2716_enabled + or marker_event.sender != room_creator + ): + return + + logger.debug("_handle_marker_event: received %s", marker_event) + + insertion_event_id = marker_event.content.get( + EventContentFields.MSC2716_MARKER_INSERTION + ) + + if insertion_event_id is None: + # Nothing to retrieve then (invalid marker) + return + + logger.debug( + "_handle_marker_event: backfilling insertion event %s", insertion_event_id + ) + + await self._get_events_and_persist( + origin, + marker_event.room_id, + [insertion_event_id], + ) + + insertion_event = await self._store.get_event( + insertion_event_id, allow_none=True + ) + if insertion_event is None: + logger.warning( + "_handle_marker_event: server %s didn't return insertion event %s for marker %s", + origin, + insertion_event_id, + marker_event.event_id, + ) + return + + logger.debug( + "_handle_marker_event: succesfully backfilled insertion event %s from marker event %s", + insertion_event, + marker_event, + ) + + await self._store.insert_insertion_extremity( + insertion_event_id, marker_event.room_id + ) + + logger.debug( + "_handle_marker_event: insertion extremity added for %s from marker event %s", + insertion_event, + marker_event, + ) + + async def _get_events_and_persist( + self, destination: str, room_id: str, event_ids: Collection[str] + ) -> None: + """Fetch the given events from a server, and persist them as outliers. + + This function *does not* recursively get missing auth events of the + newly fetched events. Callers must include in the `event_ids` argument + any missing events from the auth chain. + + Logs a warning if we can't find the given event. + """ + + room_version = await self._store.get_room_version(room_id) + + events: List[EventBase] = [] + + async def get_event(event_id: str) -> None: + with nested_logging_context(event_id): + try: + event = await self._federation_client.get_pdu( + [destination], + event_id, + room_version, + outlier=True, + ) + if event is None: + logger.warning( + "Server %s didn't return event %s", + destination, + event_id, + ) + return + events.append(event) + + except Exception as e: + logger.warning( + "Error fetching missing state/auth event %s: %s %s", + event_id, + type(e), + e, + ) + + await concurrently_execute(get_event, event_ids, 5) + logger.info("Fetched %i events of %i requested", len(events), len(event_ids)) + await self._auth_and_persist_outliers(room_id, events) + + async def _auth_and_persist_outliers( + self, room_id: str, events: Iterable[EventBase] + ) -> None: + """Persist a batch of outlier events fetched from remote servers. + + We first sort the events to make sure that we process each event's auth_events + before the event itself, and then auth and persist them. + + Notifies about the events where appropriate. + + Params: + room_id: the room that the events are meant to be in (though this has + not yet been checked) + events: the events that have been fetched + """ + event_map = {event.event_id: event for event in events} + + # XXX: it might be possible to kick this process off in parallel with fetching + # the events. + while event_map: + # build a list of events whose auth events are not in the queue. + roots = tuple( + ev + for ev in event_map.values() + if not any(aid in event_map for aid in ev.auth_event_ids()) + ) + + if not roots: + # if *none* of the remaining events are ready, that means + # we have a loop. This either means a bug in our logic, or that + # somebody has managed to create a loop (which requires finding a + # hash collision in room v2 and later). + logger.warning( + "Loop found in auth events while fetching missing state/auth " + "events: %s", + shortstr(event_map.keys()), + ) + return + + logger.info( + "Persisting %i of %i remaining outliers: %s", + len(roots), + len(event_map), + shortstr(e.event_id for e in roots), + ) + + await self._auth_and_persist_outliers_inner(room_id, roots) + + for ev in roots: + del event_map[ev.event_id] + + async def _auth_and_persist_outliers_inner( + self, room_id: str, fetched_events: Collection[EventBase] + ) -> None: + """Helper for _auth_and_persist_outliers + + Persists a batch of events where we have (theoretically) already persisted all + of their auth events. + + Notifies about the events where appropriate. + + Params: + origin: where the events came from + room_id: the room that the events are meant to be in (though this has + not yet been checked) + fetched_events: the events to persist + """ + # get all the auth events for all the events in this batch. By now, they should + # have been persisted. + auth_events = { + aid for event in fetched_events for aid in event.auth_event_ids() + } + persisted_events = await self._store.get_events( + auth_events, + allow_rejected=True, + ) + + room_version = await self._store.get_room_version_id(room_id) + room_version_obj = KNOWN_ROOM_VERSIONS[room_version] + + def prep(event: EventBase) -> Optional[Tuple[EventBase, EventContext]]: + with nested_logging_context(suffix=event.event_id): + auth = [] + for auth_event_id in event.auth_event_ids(): + ae = persisted_events.get(auth_event_id) + if not ae: + # the fact we can't find the auth event doesn't mean it doesn't + # exist, which means it is premature to reject `event`. Instead we + # just ignore it for now. + logger.warning( + "Dropping event %s, which relies on auth_event %s, which could not be found", + event, + auth_event_id, + ) + return None + auth.append(ae) + + context = EventContext.for_outlier() + try: + validate_event_for_room_version(room_version_obj, event) + check_auth_rules_for_event(room_version_obj, event, auth) + except AuthError as e: + logger.warning("Rejecting %r because %s", event, e) + context.rejected = RejectedReason.AUTH_ERROR + + return event, context + + events_to_persist = (x for x in (prep(event) for event in fetched_events) if x) + await self.persist_events_and_notify(room_id, tuple(events_to_persist)) + + async def _check_event_auth( + self, + origin: str, + event: EventBase, + context: EventContext, + ) -> EventContext: + """ + Checks whether an event should be rejected (for failing auth checks). + + Args: + origin: The host the event originates from. + event: The event itself. + context: + The event context. + + Returns: + The updated context object. + + Raises: + AuthError if we were unable to find copies of the event's auth events. + (Most other failures just cause us to set `context.rejected`.) + """ + # This method should only be used for non-outliers + assert not event.internal_metadata.outlier + + # first of all, check that the event itself is valid. + room_version = await self._store.get_room_version_id(event.room_id) + room_version_obj = KNOWN_ROOM_VERSIONS[room_version] + + try: + validate_event_for_room_version(room_version_obj, event) + except AuthError as e: + logger.warning("While validating received event %r: %s", event, e) + # TODO: use a different rejected reason here? + context.rejected = RejectedReason.AUTH_ERROR + return context + + # next, check that we have all of the event's auth events. + # + # Note that this can raise AuthError, which we want to propagate to the + # caller rather than swallow with `context.rejected` (since we cannot be + # certain that there is a permanent problem with the event). + claimed_auth_events = await self._load_or_fetch_auth_events_for_event( + origin, event + ) + + # ... and check that the event passes auth at those auth events. + try: + check_auth_rules_for_event(room_version_obj, event, claimed_auth_events) + except AuthError as e: + logger.warning( + "While checking auth of %r against auth_events: %s", event, e + ) + context.rejected = RejectedReason.AUTH_ERROR + return context + + # now check auth against what we think the auth events *should* be. + prev_state_ids = await context.get_prev_state_ids() + auth_events_ids = self._event_auth_handler.compute_auth_events( + event, prev_state_ids, for_verification=True + ) + auth_events_x = await self._store.get_events(auth_events_ids) + calculated_auth_event_map = { + (e.type, e.state_key): e for e in auth_events_x.values() + } + + try: + updated_auth_events = await self._update_auth_events_for_auth( + event, + calculated_auth_event_map=calculated_auth_event_map, + ) + except Exception: + # We don't really mind if the above fails, so lets not fail + # processing if it does. However, it really shouldn't fail so + # let's still log as an exception since we'll still want to fix + # any bugs. + logger.exception( + "Failed to double check auth events for %s with remote. " + "Ignoring failure and continuing processing of event.", + event.event_id, + ) + updated_auth_events = None + + if updated_auth_events: + context = await self._update_context_for_auth_events( + event, context, updated_auth_events + ) + auth_events_for_auth = updated_auth_events + else: + auth_events_for_auth = calculated_auth_event_map + + try: + check_auth_rules_for_event( + room_version_obj, event, auth_events_for_auth.values() + ) + except AuthError as e: + logger.warning("Failed auth resolution for %r because %s", event, e) + context.rejected = RejectedReason.AUTH_ERROR + + return context + + async def _maybe_kick_guest_users(self, event: EventBase) -> None: + if event.type != EventTypes.GuestAccess: + return + + guest_access = event.content.get(EventContentFields.GUEST_ACCESS) + if guest_access == GuestAccess.CAN_JOIN: + return + + current_state_map = await self._state_handler.get_current_state(event.room_id) + current_state = list(current_state_map.values()) + await self._get_room_member_handler().kick_guest_users(current_state) + + async def _check_for_soft_fail( + self, + event: EventBase, + state: Optional[Iterable[EventBase]], + origin: str, + ) -> None: + """Checks if we should soft fail the event; if so, marks the event as + such. + + Args: + event + state: The state at the event if we don't have all the event's prev events + origin: The host the event originates from. + """ + extrem_ids_list = await self._store.get_latest_event_ids_in_room(event.room_id) + extrem_ids = set(extrem_ids_list) + prev_event_ids = set(event.prev_event_ids()) + + if extrem_ids == prev_event_ids: + # If they're the same then the current state is the same as the + # state at the event, so no point rechecking auth for soft fail. + return + + room_version = await self._store.get_room_version_id(event.room_id) + room_version_obj = KNOWN_ROOM_VERSIONS[room_version] + + # Calculate the "current state". + if state is not None: + # If we're explicitly given the state then we won't have all the + # prev events, and so we have a gap in the graph. In this case + # we want to be a little careful as we might have been down for + # a while and have an incorrect view of the current state, + # however we still want to do checks as gaps are easy to + # maliciously manufacture. + # + # So we use a "current state" that is actually a state + # resolution across the current forward extremities and the + # given state at the event. This should correctly handle cases + # like bans, especially with state res v2. + + state_sets_d = await self._state_store.get_state_groups( + event.room_id, extrem_ids + ) + state_sets: List[Iterable[EventBase]] = list(state_sets_d.values()) + state_sets.append(state) + current_states = await self._state_handler.resolve_events( + room_version, state_sets, event + ) + current_state_ids: StateMap[str] = { + k: e.event_id for k, e in current_states.items() + } + else: + current_state_ids = await self._state_handler.get_current_state_ids( + event.room_id, latest_event_ids=extrem_ids + ) + + logger.debug( + "Doing soft-fail check for %s: state %s", + event.event_id, + current_state_ids, + ) + + # Now check if event pass auth against said current state + auth_types = auth_types_for_event(room_version_obj, event) + current_state_ids_list = [ + e for k, e in current_state_ids.items() if k in auth_types + ] + current_auth_events = await self._store.get_events_as_list( + current_state_ids_list + ) + + try: + check_auth_rules_for_event(room_version_obj, event, current_auth_events) + except AuthError as e: + logger.warning( + "Soft-failing %r (from %s) because %s", + event, + e, + origin, + extra={ + "room_id": event.room_id, + "mxid": event.sender, + "hs": origin, + }, + ) + soft_failed_event_counter.inc() + event.internal_metadata.soft_failed = True + + async def _update_auth_events_for_auth( + self, + event: EventBase, + calculated_auth_event_map: StateMap[EventBase], + ) -> Optional[StateMap[EventBase]]: + """Helper for _check_event_auth. See there for docs. + + Checks whether a given event has the expected auth events. If it + doesn't then we talk to the remote server to compare state to see if + we can come to a consensus (e.g. if one server missed some valid + state). + + This attempts to resolve any potential divergence of state between + servers, but is not essential and so failures should not block further + processing of the event. + + Args: + event: + + calculated_auth_event_map: + Our calculated auth_events based on the state of the room + at the event's position in the DAG. + + Returns: + updated auth event map, or None if no changes are needed. + + """ + assert not event.internal_metadata.outlier + + # check for events which are in the event's claimed auth_events, but not + # in our calculated event map. + event_auth_events = set(event.auth_event_ids()) + different_auth = event_auth_events.difference( + e.event_id for e in calculated_auth_event_map.values() + ) + + if not different_auth: + return None + + logger.info( + "auth_events refers to events which are not in our calculated auth " + "chain: %s", + different_auth, + ) + + # XXX: currently this checks for redactions but I'm not convinced that is + # necessary? + different_events = await self._store.get_events_as_list(different_auth) + + # double-check they're all in the same room - we should already have checked + # this but it doesn't hurt to check again. + for d in different_events: + assert ( + d.room_id == event.room_id + ), f"Event {event.event_id} refers to auth_event {d.event_id} which is in a different room" + + # now we state-resolve between our own idea of the auth events, and the remote's + # idea of them. + + local_state = calculated_auth_event_map.values() + remote_auth_events = dict(calculated_auth_event_map) + remote_auth_events.update({(d.type, d.state_key): d for d in different_events}) + remote_state = remote_auth_events.values() + + room_version = await self._store.get_room_version_id(event.room_id) + new_state = await self._state_handler.resolve_events( + room_version, (local_state, remote_state), event + ) + different_state = { + (d.type, d.state_key): d + for d in new_state.values() + if calculated_auth_event_map.get((d.type, d.state_key)) != d + } + if not different_state: + logger.info("State res returned no new state") + return None + + logger.info( + "After state res: updating auth_events with new state %s", + different_state.values(), + ) + + # take a copy of calculated_auth_event_map before we modify it. + auth_events = dict(calculated_auth_event_map) + auth_events.update(different_state) + return auth_events + + async def _load_or_fetch_auth_events_for_event( + self, destination: str, event: EventBase + ) -> Collection[EventBase]: + """Fetch this event's auth_events, from database or remote + + Loads any of the auth_events that we already have from the database/cache. If + there are any that are missing, calls /event_auth to get the complete auth + chain for the event (and then attempts to load the auth_events again). + + If any of the auth_events cannot be found, raises an AuthError. This can happen + for a number of reasons; eg: the events don't exist, or we were unable to talk + to `destination`, or we couldn't validate the signature on the event (which + in turn has multiple potential causes). + + Args: + destination: where to send the /event_auth request. Typically the server + that sent us `event` in the first place. + event: the event whose auth_events we want + + Returns: + all of the events listed in `event.auth_events_ids`, after deduplication + + Raises: + AuthError if we were unable to fetch the auth_events for any reason. + """ + event_auth_event_ids = set(event.auth_event_ids()) + event_auth_events = await self._store.get_events( + event_auth_event_ids, allow_rejected=True + ) + missing_auth_event_ids = event_auth_event_ids.difference( + event_auth_events.keys() + ) + if not missing_auth_event_ids: + return event_auth_events.values() + + logger.info( + "Event %s refers to unknown auth events %s: fetching auth chain", + event, + missing_auth_event_ids, + ) + try: + await self._get_remote_auth_chain_for_event( + destination, event.room_id, event.event_id + ) + except Exception as e: + logger.warning("Failed to get auth chain for %s: %s", event, e) + # in this case, it's very likely we still won't have all the auth + # events - but we pick that up below. + + # try to fetch the auth events we missed list time. + extra_auth_events = await self._store.get_events( + missing_auth_event_ids, allow_rejected=True + ) + missing_auth_event_ids.difference_update(extra_auth_events.keys()) + event_auth_events.update(extra_auth_events) + if not missing_auth_event_ids: + return event_auth_events.values() + + # we still don't have all the auth events. + logger.warning( + "Missing auth events for %s: %s", + event, + shortstr(missing_auth_event_ids), + ) + # the fact we can't find the auth event doesn't mean it doesn't + # exist, which means it is premature to store `event` as rejected. + # instead we raise an AuthError, which will make the caller ignore it. + raise AuthError(code=HTTPStatus.FORBIDDEN, msg="Auth events could not be found") + + async def _get_remote_auth_chain_for_event( + self, destination: str, room_id: str, event_id: str + ) -> None: + """If we are missing some of an event's auth events, attempt to request them + + Args: + destination: where to fetch the auth tree from + room_id: the room in which we are lacking auth events + event_id: the event for which we are lacking auth events + """ + try: + remote_event_map = { + e.event_id: e + for e in await self._federation_client.get_event_auth( + destination, room_id, event_id + ) + } + except RequestSendFailed as e1: + # The other side isn't around or doesn't implement the + # endpoint, so lets just bail out. + logger.info("Failed to get event auth from remote: %s", e1) + return + + logger.info("/event_auth returned %i events", len(remote_event_map)) + + # `event` may be returned, but we should not yet process it. + remote_event_map.pop(event_id, None) + + # nor should we reprocess any events we have already seen. + seen_remotes = await self._store.have_seen_events( + room_id, remote_event_map.keys() + ) + for s in seen_remotes: + remote_event_map.pop(s, None) + + await self._auth_and_persist_outliers(room_id, remote_event_map.values()) + + async def _update_context_for_auth_events( + self, event: EventBase, context: EventContext, auth_events: StateMap[EventBase] + ) -> EventContext: + """Update the state_ids in an event context after auth event resolution, + storing the changes as a new state group. + + Args: + event: The event we're handling the context for + + context: initial event context + + auth_events: Events to update in the event context. + + Returns: + new event context + """ + # exclude the state key of the new event from the current_state in the context. + if event.is_state(): + event_key: Optional[Tuple[str, str]] = (event.type, event.state_key) + else: + event_key = None + state_updates = { + k: a.event_id for k, a in auth_events.items() if k != event_key + } + + current_state_ids = await context.get_current_state_ids() + current_state_ids = dict(current_state_ids) # type: ignore + + current_state_ids.update(state_updates) + + prev_state_ids = await context.get_prev_state_ids() + prev_state_ids = dict(prev_state_ids) + + prev_state_ids.update({k: a.event_id for k, a in auth_events.items()}) + + # create a new state group as a delta from the existing one. + prev_group = context.state_group + state_group = await self._state_store.store_state_group( + event.event_id, + event.room_id, + prev_group=prev_group, + delta_ids=state_updates, + current_state_ids=current_state_ids, + ) + + return EventContext.with_state( + state_group=state_group, + state_group_before_event=context.state_group_before_event, + current_state_ids=current_state_ids, + prev_state_ids=prev_state_ids, + prev_group=prev_group, + delta_ids=state_updates, + ) + + async def _run_push_actions_and_persist_event( + self, event: EventBase, context: EventContext, backfilled: bool = False + ) -> None: + """Run the push actions for a received event, and persist it. + + Args: + event: The event itself. + context: The event context. + backfilled: True if the event was backfilled. + """ + # this method should not be called on outliers (those code paths call + # persist_events_and_notify directly.) + assert not event.internal_metadata.outlier + + if not backfilled and not context.rejected: + min_depth = await self._store.get_min_depth(event.room_id) + if min_depth is None or min_depth > event.depth: + # XXX richvdh 2021/10/07: I don't really understand what this + # condition is doing. I think it's trying not to send pushes + # for events that predate our join - but that's not really what + # min_depth means, and anyway ancient events are a more general + # problem. + # + # for now I'm just going to log about it. + logger.info( + "Skipping push actions for old event with depth %s < %s", + event.depth, + min_depth, + ) + else: + await self._action_generator.handle_push_actions_for_event( + event, context + ) + + try: + await self.persist_events_and_notify( + event.room_id, [(event, context)], backfilled=backfilled + ) + except Exception: + run_in_background( + self._store.remove_push_actions_from_staging, event.event_id + ) + raise + + async def persist_events_and_notify( + self, + room_id: str, + event_and_contexts: Sequence[Tuple[EventBase, EventContext]], + backfilled: bool = False, + ) -> int: + """Persists events and tells the notifier/pushers about them, if + necessary. + + Args: + room_id: The room ID of events being persisted. + event_and_contexts: Sequence of events with their associated + context that should be persisted. All events must belong to + the same room. + backfilled: Whether these events are a result of + backfilling or not + + Returns: + The stream ID after which all events have been persisted. + """ + if not event_and_contexts: + return self._store.get_current_events_token() + + instance = self._config.worker.events_shard_config.get_instance(room_id) + if instance != self._instance_name: + # Limit the number of events sent over replication. We choose 200 + # here as that is what we default to in `max_request_body_size(..)` + for batch in batch_iter(event_and_contexts, 200): + result = await self._send_events( + instance_name=instance, + store=self._store, + room_id=room_id, + event_and_contexts=batch, + backfilled=backfilled, + ) + return result["max_stream_id"] + else: + assert self._storage.persistence + + # Note that this returns the events that were persisted, which may not be + # the same as were passed in if some were deduplicated due to transaction IDs. + events, max_stream_token = await self._storage.persistence.persist_events( + event_and_contexts, backfilled=backfilled + ) + + if self._ephemeral_messages_enabled: + for event in events: + # If there's an expiry timestamp on the event, schedule its expiry. + self._message_handler.maybe_schedule_expiry(event) + + if not backfilled: # Never notify for backfilled events + for event in events: + await self._notify_persisted_event(event, max_stream_token) + + return max_stream_token.stream + + async def _notify_persisted_event( + self, event: EventBase, max_stream_token: RoomStreamToken + ) -> None: + """Checks to see if notifier/pushers should be notified about the + event or not. + + Args: + event: + max_stream_token: The max_stream_id returned by persist_events + """ + + extra_users = [] + if event.type == EventTypes.Member: + target_user_id = event.state_key + + # We notify for memberships if its an invite for one of our + # users + if event.internal_metadata.is_outlier(): + if event.membership != Membership.INVITE: + if not self._is_mine_id(target_user_id): + return + + target_user = UserID.from_string(target_user_id) + extra_users.append(target_user) + elif event.internal_metadata.is_outlier(): + return + + # the event has been persisted so it should have a stream ordering. + assert event.internal_metadata.stream_ordering + + event_pos = PersistedEventPosition( + self._instance_name, event.internal_metadata.stream_ordering + ) + await self._notifier.on_new_room_event( + event, event_pos, max_stream_token, extra_users=extra_users + ) + + def _sanity_check_event(self, ev: EventBase) -> None: + """ + Do some early sanity checks of a received event + + In particular, checks it doesn't have an excessive number of + prev_events or auth_events, which could cause a huge state resolution + or cascade of event fetches. + + Args: + ev: event to be checked + + Raises: + SynapseError if the event does not pass muster + """ + if len(ev.prev_event_ids()) > 20: + logger.warning( + "Rejecting event %s which has %i prev_events", + ev.event_id, + len(ev.prev_event_ids()), + ) + raise SynapseError(HTTPStatus.BAD_REQUEST, "Too many prev_events") + + if len(ev.auth_event_ids()) > 10: + logger.warning( + "Rejecting event %s which has %i auth_events", + ev.event_id, + len(ev.auth_event_ids()), + ) + raise SynapseError(HTTPStatus.BAD_REQUEST, "Too many auth_events") diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py index 1a6c5c64a278..9e270d461b2c 100644 --- a/synapse/handlers/groups_local.py +++ b/synapse/handlers/groups_local.py @@ -14,7 +14,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, Iterable, List, Set +from typing import TYPE_CHECKING, Any, Awaitable, Callable, Dict, Iterable, List, Set from synapse.api.errors import HttpResponseException, RequestSendFailed, SynapseError from synapse.types import GroupID, JsonDict, get_domain_from_id @@ -25,12 +25,14 @@ logger = logging.getLogger(__name__) -def _create_rerouter(func_name): +def _create_rerouter(func_name: str) -> Callable[..., Awaitable[JsonDict]]: """Returns an async function that looks at the group id and calls the function on federation or the local group server if the group is local """ - async def f(self, group_id, *args, **kwargs): + async def f( + self: "GroupsLocalWorkerHandler", group_id: str, *args: Any, **kwargs: Any + ) -> JsonDict: if not GroupID.is_valid(group_id): raise SynapseError(400, "%s is not a legal group ID" % (group_id,)) diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 0961dec5ab5c..c83eaea359e7 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -39,8 +39,6 @@ valid_id_server_location, ) -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer @@ -49,20 +47,21 @@ id_server_scheme = "https://" -class IdentityHandler(BaseHandler): +class IdentityHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) - + self.store = hs.get_datastore() # An HTTP client for contacting trusted URLs. self.http_client = SimpleHttpClient(hs) # An HTTP client for contacting identity servers specified by clients. self.blacklisting_http_client = SimpleHttpClient( - hs, ip_blacklist=hs.config.federation_ip_range_blacklist + hs, + ip_blacklist=hs.config.server.federation_ip_range_blacklist, + ip_whitelist=hs.config.server.federation_ip_range_whitelist, ) self.federation_http_client = hs.get_federation_http_client() self.hs = hs - self._web_client_location = hs.config.invite_client_location + self._web_client_location = hs.config.email.invite_client_location # Ratelimiters for `/requestToken` endpoints. self._3pid_validation_ratelimiter_ip = Ratelimiter( @@ -419,7 +418,7 @@ async def send_threepid_validation( token_expires = ( self.hs.get_clock().time_msec() - + self.hs.config.email_validation_token_lifetime + + self.hs.config.email.email_validation_token_lifetime ) await self.store.start_or_continue_validation_session( @@ -465,15 +464,6 @@ async def requestEmailToken( if next_link: params["next_link"] = next_link - if self.hs.config.using_identity_server_from_trusted_list: - # Warn that a deprecated config option is in use - logger.warning( - 'The config option "trust_identity_server_for_password_resets" ' - 'has been replaced by "account_threepid_delegate". ' - "Please consult the sample config at docs/sample_config.yaml for " - "details and update your config file." - ) - try: data = await self.http_client.post_json_get_json( id_server + "/_matrix/identity/api/v1/validate/email/requestToken", @@ -518,15 +508,6 @@ async def requestMsisdnToken( if next_link: params["next_link"] = next_link - if self.hs.config.using_identity_server_from_trusted_list: - # Warn that a deprecated config option is in use - logger.warning( - 'The config option "trust_identity_server_for_password_resets" ' - 'has been replaced by "account_threepid_delegate". ' - "Please consult the sample config at docs/sample_config.yaml for " - "details and update your config file." - ) - try: data = await self.http_client.post_json_get_json( id_server + "/_matrix/identity/api/v1/validate/msisdn/requestToken", @@ -538,15 +519,11 @@ async def requestMsisdnToken( except RequestTimedOutError: raise SynapseError(500, "Timed out contacting identity server") - # It is already checked that public_baseurl is configured since this code - # should only be used if account_threepid_delegate_msisdn is true. - assert self.hs.config.public_baseurl - # we need to tell the client to send the token back to us, since it doesn't # otherwise know where to send it, so add submit_url response parameter # (see also MSC2078) data["submit_url"] = ( - self.hs.config.public_baseurl + self.hs.config.server.public_baseurl + "_matrix/client/unstable/add_threepid/msisdn/submit_token" ) return data @@ -572,12 +549,18 @@ async def validate_threepid_session( validation_session = None # Try to validate as email - if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + # Remote emails will only be used if a valid identity server is provided. + assert ( + self.hs.config.registration.account_threepid_delegate_email is not None + ) + # Ask our delegated email identity server validation_session = await self.threepid_from_creds( - self.hs.config.account_threepid_delegate_email, threepid_creds + self.hs.config.registration.account_threepid_delegate_email, + threepid_creds, ) - elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + elif self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: # Get a validated session matching these details validation_session = await self.store.get_threepid_validation_session( "email", client_secret, sid=sid, validated=True @@ -587,10 +570,11 @@ async def validate_threepid_session( return validation_session # Try to validate as msisdn - if self.hs.config.account_threepid_delegate_msisdn: + if self.hs.config.registration.account_threepid_delegate_msisdn: # Ask our delegated msisdn identity server validation_session = await self.threepid_from_creds( - self.hs.config.account_threepid_delegate_msisdn, threepid_creds + self.hs.config.registration.account_threepid_delegate_msisdn, + threepid_creds, ) return validation_session @@ -824,6 +808,7 @@ async def ask_id_server_for_third_party_invite( room_avatar_url: str, room_join_rules: str, room_name: str, + room_type: Optional[str], inviter_display_name: str, inviter_avatar_url: str, id_access_token: Optional[str] = None, @@ -843,6 +828,7 @@ async def ask_id_server_for_third_party_invite( notifications. room_join_rules: The join rules of the email (e.g. "public"). room_name: The m.room.name of the room. + room_type: The type of the room from its m.room.create event (e.g "m.space"). inviter_display_name: The current display name of the inviter. inviter_avatar_url: The URL of the inviter's avatar. @@ -869,6 +855,12 @@ async def ask_id_server_for_third_party_invite( "sender_display_name": inviter_display_name, "sender_avatar_url": inviter_avatar_url, } + + if room_type is not None: + invite_config["room_type"] = room_type + # TODO The unstable field is deprecated and should be removed in the future. + invite_config["org.matrix.msc3288.room_type"] = room_type + # If a custom web client location is available, include it in the request. if self._web_client_location: invite_config["org.matrix.web_client_location"] = self._web_client_location diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index e1c544a3c981..9cd21e7f2b3c 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -13,7 +13,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING, List, Optional, Tuple from twisted.internet import defer @@ -31,8 +31,6 @@ from synapse.util.caches.response_cache import ResponseCache from synapse.visibility import filter_events_for_client -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer @@ -40,9 +38,11 @@ logger = logging.getLogger(__name__) -class InitialSyncHandler(BaseHandler): +class InitialSyncHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) + self.store = hs.get_datastore() + self.auth = hs.get_auth() + self.state_handler = hs.get_state_handler() self.hs = hs self.state = hs.get_state_handler() self.clock = hs.get_clock() @@ -125,7 +125,7 @@ async def _snapshot_all_rooms( now_token = self.hs.get_event_sources().get_current_token() - presence_stream = self.hs.get_event_sources().sources["presence"] + presence_stream = self.hs.get_event_sources().sources.presence presence, _ = await presence_stream.get_new_events( user, from_key=None, include_offline=False ) @@ -150,8 +150,8 @@ async def _snapshot_all_rooms( if limit is None: limit = 10 - async def handle_room(event: RoomsForUser): - d = { + async def handle_room(event: RoomsForUser) -> None: + d: JsonDict = { "room_id": event.room_id, "membership": event.membership, "visibility": ( @@ -165,7 +165,11 @@ async def handle_room(event: RoomsForUser): invite_event = await self.store.get_event(event.event_id) d["invite"] = await self._event_serializer.serialize_event( - invite_event, time_now, as_client_event + invite_event, + time_now, + # Don't bundle aggregations as this is a deprecated API. + bundle_aggregations=False, + as_client_event=as_client_event, ) rooms_ret.append(d) @@ -216,7 +220,11 @@ async def handle_room(event: RoomsForUser): d["messages"] = { "chunk": ( await self._event_serializer.serialize_events( - messages, time_now=time_now, as_client_event=as_client_event + messages, + time_now=time_now, + # Don't bundle aggregations as this is a deprecated API. + bundle_aggregations=False, + as_client_event=as_client_event, ) ), "start": await start_token.to_string(self.store), @@ -226,6 +234,8 @@ async def handle_room(event: RoomsForUser): d["state"] = await self._event_serializer.serialize_events( current_state.values(), time_now=time_now, + # Don't bundle aggregations as this is a deprecated API. + bundle_aggregations=False, as_client_event=as_client_event, ) @@ -366,14 +376,18 @@ async def _room_initial_sync_parted( "room_id": room_id, "messages": { "chunk": ( - await self._event_serializer.serialize_events(messages, time_now) + # Don't bundle aggregations as this is a deprecated API. + await self._event_serializer.serialize_events( + messages, time_now, bundle_aggregations=False + ) ), "start": await start_token.to_string(self.store), "end": await end_token.to_string(self.store), }, "state": ( + # Don't bundle aggregations as this is a deprecated API. await self._event_serializer.serialize_events( - room_state.values(), time_now + room_state.values(), time_now, bundle_aggregations=False ) ), "presence": [], @@ -392,8 +406,9 @@ async def _room_initial_sync_joined( # TODO: These concurrently time_now = self.clock.time_msec() + # Don't bundle aggregations as this is a deprecated API. state = await self._event_serializer.serialize_events( - current_state.values(), time_now + current_state.values(), time_now, bundle_aggregations=False ) now_token = self.hs.get_event_sources().get_current_token() @@ -411,9 +426,9 @@ async def _room_initial_sync_joined( presence_handler = self.hs.get_presence_handler() - async def get_presence(): + async def get_presence() -> List[JsonDict]: # If presence is disabled, return an empty list - if not self.hs.config.use_presence: + if not self.hs.config.server.use_presence: return [] states = await presence_handler.get_states( @@ -428,7 +443,7 @@ async def get_presence(): for s in states ] - async def get_receipts(): + async def get_receipts() -> List[JsonDict]: receipts = await self.store.get_linearized_receipts_for_room( room_id, to_key=now_token.receipt_key ) @@ -467,7 +482,10 @@ async def get_receipts(): "room_id": room_id, "messages": { "chunk": ( - await self._event_serializer.serialize_events(messages, time_now) + # Don't bundle aggregations as this is a deprecated API. + await self._event_serializer.serialize_events( + messages, time_now, bundle_aggregations=False + ) ), "start": await start_token.to_string(self.store), "end": await end_token.to_string(self.store), diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 8a0024ce8485..87f671708c4e 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -16,6 +16,7 @@ # limitations under the License. import logging import random +from http import HTTPStatus from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple from canonicaljson import encode_canonical_json @@ -27,6 +28,7 @@ from synapse.api.constants import ( EventContentFields, EventTypes, + GuestAccess, Membership, RelationTypes, UserTypes, @@ -38,13 +40,16 @@ NotFoundError, ShadowBanError, SynapseError, + UnsupportedRoomVersionError, ) from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersions from synapse.api.urls import ConsentURIBuilder +from synapse.event_auth import validate_event_for_room_version from synapse.events import EventBase from synapse.events.builder import EventBuilder from synapse.events.snapshot import EventContext from synapse.events.validator import EventValidator +from synapse.handlers.directory import DirectoryHandler from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.send_event import ReplicationSendEventRestServlet @@ -57,8 +62,6 @@ from synapse.util.metrics import measure_func from synapse.visibility import filter_events_for_client -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.events.third_party_rules import ThirdPartyEventRules from synapse.server import HomeServer @@ -77,13 +80,13 @@ def __init__(self, hs: "HomeServer"): self.storage = hs.get_storage() self.state_store = self.storage.state self._event_serializer = hs.get_event_client_serializer() - self._ephemeral_events_enabled = hs.config.enable_ephemeral_messages + self._ephemeral_events_enabled = hs.config.server.enable_ephemeral_messages # The scheduled call to self._expire_event. None if no call is currently # scheduled. self._scheduled_expiry: Optional[IDelayedCall] = None - if not hs.config.worker_app: + if not hs.config.worker.worker_app: run_as_background_process( "_schedule_next_expiry", self._schedule_next_expiry ) @@ -183,20 +186,37 @@ async def get_state_events( if not last_events: raise NotFoundError("Can't find event for token %s" % (at_token,)) + last_event = last_events[0] + + # check whether the user is in the room at that time to determine + # whether they should be treated as peeking. + state_map = await self.state_store.get_state_for_event( + last_event.event_id, + StateFilter.from_types([(EventTypes.Member, user_id)]), + ) + + joined = False + membership_event = state_map.get((EventTypes.Member, user_id)) + if membership_event: + joined = membership_event.membership == Membership.JOIN + + is_peeking = not joined visible_events = await filter_events_for_client( self.storage, user_id, last_events, filter_send_to_client=False, + is_peeking=is_peeking, ) - event = last_events[0] if visible_events: room_state_events = await self.state_store.get_state_for_events( - [event.event_id], state_filter=state_filter + [last_event.event_id], state_filter=state_filter ) - room_state: Mapping[Any, EventBase] = room_state_events[event.event_id] + room_state: Mapping[Any, EventBase] = room_state_events[ + last_event.event_id + ] else: raise AuthError( 403, @@ -227,13 +247,7 @@ async def get_state_events( room_state = room_state_events[membership_event_id] now = self.clock.time_msec() - events = await self._event_serializer.serialize_events( - room_state.values(), - now, - # We don't bother bundling aggregations in when asked for state - # events, as clients won't use them. - bundle_aggregations=False, - ) + events = await self._event_serializer.serialize_events(room_state.values(), now) return events async def get_joined_members(self, requester: Requester, room_id: str) -> dict: @@ -280,7 +294,7 @@ async def get_joined_members(self, requester: Requester, room_id: str) -> dict: for user_id, profile in users_with_profile.items() } - def maybe_schedule_expiry(self, event: EventBase): + def maybe_schedule_expiry(self, event: EventBase) -> None: """Schedule the expiry of an event if there's not already one scheduled, or if the one running is for an event that will expire after the provided timestamp. @@ -300,7 +314,7 @@ def maybe_schedule_expiry(self, event: EventBase): # a task scheduled for a timestamp that's sooner than the provided one. self._schedule_expiry_for_event(event.event_id, expiry_ts) - async def _schedule_next_expiry(self): + async def _schedule_next_expiry(self) -> None: """Retrieve the ID and the expiry timestamp of the next event to be expired, and schedule an expiry task for it. @@ -313,7 +327,7 @@ async def _schedule_next_expiry(self): event_id, expiry_ts = res self._schedule_expiry_for_event(event_id, expiry_ts) - def _schedule_expiry_for_event(self, event_id: str, expiry_ts: int): + def _schedule_expiry_for_event(self, event_id: str, expiry_ts: int) -> None: """Schedule an expiry task for the provided event if there's not already one scheduled at a timestamp that's sooner than the provided one. @@ -349,7 +363,7 @@ def _schedule_expiry_for_event(self, event_id: str, expiry_ts: int): event_id, ) - async def _expire_event(self, event_id: str): + async def _expire_event(self, event_id: str) -> None: """Retrieve and expire an event that needs to be expired from the database. If the event doesn't exist in the database, log it and delete the expiry date @@ -394,7 +408,9 @@ def __init__(self, hs: "HomeServer"): self.server_name = hs.hostname self.notifier = hs.get_notifier() self.config = hs.config - self.require_membership_for_aliases = hs.config.require_membership_for_aliases + self.require_membership_for_aliases = ( + hs.config.server.require_membership_for_aliases + ) self._events_shard_config = self.config.worker.events_shard_config self._instance_name = hs.get_instance_name() @@ -404,13 +420,12 @@ def __init__(self, hs: "HomeServer"): Membership.JOIN, Membership.KNOCK, } - if self.hs.config.include_profile_data_on_invite: + if self.hs.config.server.include_profile_data_on_invite: self.membership_types_to_include_profile_data_in.add(Membership.INVITE) self.send_event = ReplicationSendEventRestServlet.make_client(hs) - # This is only used to get at ratelimit function, and maybe_kick_guest_users - self.base_handler = BaseHandler(hs) + self.request_ratelimiter = hs.get_request_ratelimiter() # We arbitrarily limit concurrent event creation for a room to 5. # This is to stop us from diverging history *too* much. @@ -424,7 +439,7 @@ def __init__(self, hs: "HomeServer"): ) self._block_events_without_consent_error = ( - self.config.block_events_without_consent_error + self.config.consent.block_events_without_consent_error ) # we need to construct a ConsentURIBuilder here, as it checks that the necessary @@ -440,11 +455,11 @@ def __init__(self, hs: "HomeServer"): # self._rooms_to_exclude_from_dummy_event_insertion: Dict[str, int] = {} # The number of forward extremeities before a dummy event is sent. - self._dummy_events_threshold = hs.config.dummy_events_threshold + self._dummy_events_threshold = hs.config.server.dummy_events_threshold if ( - self.config.run_background_tasks - and self.config.cleanup_extremities_with_dummy_events + self.config.worker.run_background_tasks + and self.config.server.cleanup_extremities_with_dummy_events ): self.clock.looping_call( lambda: run_as_background_process( @@ -456,7 +471,7 @@ def __init__(self, hs: "HomeServer"): self._message_handler = hs.get_message_handler() - self._ephemeral_events_enabled = hs.config.enable_ephemeral_messages + self._ephemeral_events_enabled = hs.config.server.enable_ephemeral_messages self._external_cache = hs.get_external_cache() @@ -530,16 +545,22 @@ async def create_event( await self.auth.check_auth_blocking(requester=requester) if event_dict["type"] == EventTypes.Create and event_dict["state_key"] == "": - room_version = event_dict["content"]["room_version"] + room_version_id = event_dict["content"]["room_version"] + room_version_obj = KNOWN_ROOM_VERSIONS.get(room_version_id) + if not room_version_obj: + # this can happen if support is withdrawn for a room version + raise UnsupportedRoomVersionError(room_version_id) else: try: - room_version = await self.store.get_room_version_id( + room_version_obj = await self.store.get_room_version( event_dict["room_id"] ) except NotFoundError: raise AuthError(403, "Unknown room") - builder = self.event_builder_factory.new(room_version, event_dict) + builder = self.event_builder_factory.for_room_version( + room_version_obj, event_dict + ) self.validator.validate_builder(builder) @@ -580,29 +601,6 @@ async def create_event( builder.internal_metadata.historical = historical - # Strip down the auth_event_ids to only what we need to auth the event. - # For example, we don't need extra m.room.member that don't match event.sender - if auth_event_ids is not None: - # If auth events are provided, prev events must be also. - assert prev_event_ids is not None - - temp_event = await builder.build( - prev_event_ids=prev_event_ids, - auth_event_ids=auth_event_ids, - depth=depth, - ) - auth_events = await self.store.get_events_as_list(auth_event_ids) - # Create a StateMap[str] - auth_event_state_map = { - (e.type, e.state_key): e.event_id for e in auth_events - } - # Actually strip down and use the necessary auth events - auth_event_ids = self._event_auth_handler.compute_auth_events( - event=temp_event, - current_state_ids=auth_event_state_map, - for_verification=False, - ) - event, context = await self.create_new_client_event( builder=builder, requester=requester, @@ -647,7 +645,7 @@ async def create_event( self.validator.validate_new(event, self.config) - return (event, context) + return event, context async def _is_exempt_from_privacy_policy( self, builder: EventBuilder, requester: Requester @@ -673,10 +671,10 @@ async def _is_exempt_from_privacy_policy( return False async def _is_server_notices_room(self, room_id: str) -> bool: - if self.config.server_notices_mxid is None: + if self.config.servernotices.server_notices_mxid is None: return False user_ids = await self.store.get_users_in_room(room_id) - return self.config.server_notices_mxid in user_ids + return self.config.servernotices.server_notices_mxid in user_ids async def assert_accepted_privacy_policy(self, requester: Requester) -> None: """Check if a user has accepted the privacy policy @@ -712,8 +710,8 @@ async def assert_accepted_privacy_policy(self, requester: Requester) -> None: # exempt the system notices user if ( - self.config.server_notices_mxid is not None - and user_id == self.config.server_notices_mxid + self.config.servernotices.server_notices_mxid is not None + and user_id == self.config.servernotices.server_notices_mxid ): return @@ -725,7 +723,7 @@ async def assert_accepted_privacy_policy(self, requester: Requester) -> None: if u["appservice_id"] is not None: # users registered by an appservice are exempt return - if u["consent_version"] == self.config.user_consent_version: + if u["consent_version"] == self.config.consent.user_consent_version: return consent_uri = self._consent_uri_builder.build_user_consent_uri(user.localpart) @@ -909,6 +907,33 @@ async def create_new_client_event( Tuple of created event, context """ + # Strip down the auth_event_ids to only what we need to auth the event. + # For example, we don't need extra m.room.member that don't match event.sender + full_state_ids_at_event = None + if auth_event_ids is not None: + # If auth events are provided, prev events must be also. + assert prev_event_ids is not None + + # Copy the full auth state before it stripped down + full_state_ids_at_event = auth_event_ids.copy() + + temp_event = await builder.build( + prev_event_ids=prev_event_ids, + auth_event_ids=auth_event_ids, + depth=depth, + ) + auth_events = await self.store.get_events_as_list(auth_event_ids) + # Create a StateMap[str] + auth_event_state_map = { + (e.type, e.state_key): e.event_id for e in auth_events + } + # Actually strip down and use the necessary auth events + auth_event_ids = self._event_auth_handler.compute_auth_events( + event=temp_event, + current_state_ids=auth_event_state_map, + for_verification=False, + ) + if prev_event_ids is not None: assert ( len(prev_event_ids) <= 10 @@ -933,18 +958,20 @@ async def create_new_client_event( depth=depth, ) - old_state = None - # Pass on the outlier property from the builder to the event # after it is created if builder.internal_metadata.outlier: - event.internal_metadata.outlier = builder.internal_metadata.outlier - - # Calculate the state for outliers that pass in their own `auth_event_ids` - if auth_event_ids: - old_state = await self.store.get_events_as_list(auth_event_ids) - - context = await self.state.compute_event_context(event, old_state=old_state) + event.internal_metadata.outlier = True + context = EventContext.for_outlier() + elif ( + event.type == EventTypes.MSC2716_INSERTION + and full_state_ids_at_event + and builder.internal_metadata.is_historical() + ): + old_state = await self.store.get_events_as_list(full_state_ids_at_event) + context = await self.state.compute_event_context(event, old_state=old_state) + else: + context = await self.state.compute_event_context(event) if requester: context.app_service = requester.app_service @@ -968,13 +995,52 @@ async def create_new_client_event( ) self.validator.validate_new(event, self.config) + await self._validate_event_relation(event) + logger.debug("Created event %s", event.event_id) + + return event, context + + async def _validate_event_relation(self, event: EventBase) -> None: + """ + Ensure the relation data on a new event is not bogus. + + Args: + event: The event being created. + + Raises: + SynapseError if the event is invalid. + """ + + relation = event.content.get("m.relates_to") + if not relation: + return + + relation_type = relation.get("rel_type") + if not relation_type: + return + + # Ensure the parent is real. + relates_to = relation.get("event_id") + if not relates_to: + return + + parent_event = await self.store.get_event(relates_to, allow_none=True) + if parent_event: + # And in the same room. + if parent_event.room_id != event.room_id: + raise SynapseError(400, "Relations must be in the same room") + + else: + # There must be some reason that the client knows the event exists, + # see if there are existing relations. If so, assume everything is fine. + if not await self.store.event_is_target_of_relation(relates_to): + # Otherwise, the client can't know about the parent event! + raise SynapseError(400, "Can't send relation to unknown event") # If this event is an annotation then we check that that the sender # can't annotate the same way twice (e.g. stops users from liking an # event multiple times). - relation = event.content.get("m.relates_to", {}) - if relation.get("rel_type") == RelationTypes.ANNOTATION: - relates_to = relation["event_id"] + if relation_type == RelationTypes.ANNOTATION: aggregation_key = relation["key"] already_exists = await self.store.has_user_annotated_event( @@ -983,9 +1049,12 @@ async def create_new_client_event( if already_exists: raise SynapseError(400, "Can't send same reaction twice") - logger.debug("Created event %s", event.event_id) - - return (event, context) + # Don't attempt to start a thread if the parent event is a relation. + elif relation_type == RelationTypes.THREAD: + if await self.store.event_includes_relation(relates_to): + raise SynapseError( + 400, "Cannot start threads from an event with a relation" + ) @measure_func("handle_new_client_event") async def handle_new_client_event( @@ -1050,9 +1119,17 @@ async def handle_new_client_event( EventTypes.Create, "", ): - room_version = event.content.get("room_version", RoomVersions.V1.identifier) + room_version_id = event.content.get( + "room_version", RoomVersions.V1.identifier + ) + room_version_obj = KNOWN_ROOM_VERSIONS.get(room_version_id) + if not room_version_obj: + raise UnsupportedRoomVersionError( + "Attempt to create a room with unsupported room version %s" + % (room_version_id,) + ) else: - room_version = await self.store.get_room_version_id(event.room_id) + room_version_obj = await self.store.get_room_version(event.room_id) if event.internal_metadata.is_out_of_band_membership(): # the only sort of out-of-band-membership events we expect to see here are @@ -1061,8 +1138,9 @@ async def handle_new_client_event( assert event.content["membership"] == Membership.LEAVE else: try: - await self._event_auth_handler.check_from_context( - room_version, event, context + validate_event_for_room_version(room_version_obj, event) + await self._event_auth_handler.check_auth_rules_from_context( + room_version_obj, event, context ) except AuthError as err: logger.warning("Denying new event %r because %s", event, err) @@ -1211,7 +1289,10 @@ async def cache_joined_hosts_for_event( self._external_cache_joined_hosts_updates[state_entry.state_group] = None async def _validate_canonical_alias( - self, directory_handler, room_alias_str: str, expected_room_id: str + self, + directory_handler: DirectoryHandler, + room_alias_str: str, + expected_room_id: str, ) -> None: """ Ensure that the given room alias points to the expected room ID. @@ -1273,6 +1354,8 @@ async def persist_and_notify_client_event( # user is actually admin or not). is_admin_redaction = False if event.type == EventTypes.Redaction: + assert event.redacts is not None + original_event = await self.store.get_event( event.redacts, redact_behaviour=EventRedactBehaviour.AS_IS, @@ -1285,11 +1368,11 @@ async def persist_and_notify_client_event( original_event and event.sender != original_event.sender ) - await self.base_handler.ratelimit( + await self.request_ratelimiter.ratelimit( requester, is_admin_redaction=is_admin_redaction ) - await self.base_handler.maybe_kick_guest_users(event, context) + await self._maybe_kick_guest_users(event, context) if event.type == EventTypes.CanonicalAlias: # Validate a newly added alias or newly added alt_aliases. @@ -1368,6 +1451,8 @@ async def persist_and_notify_client_event( ) if event.type == EventTypes.Redaction: + assert event.redacts is not None + original_event = await self.store.get_event( event.redacts, redact_behaviour=EventRedactBehaviour.AS_IS, @@ -1376,6 +1461,9 @@ async def persist_and_notify_client_event( allow_none=True, ) + room_version = await self.store.get_room_version_id(event.room_id) + room_version_obj = KNOWN_ROOM_VERSIONS[room_version] + # we can make some additional checks now if we have the original event. if original_event: if original_event.type == EventTypes.Create: @@ -1387,6 +1475,28 @@ async def persist_and_notify_client_event( if original_event.type == EventTypes.ServerACL: raise AuthError(403, "Redacting server ACL events is not permitted") + # Add a little safety stop-gap to prevent people from trying to + # redact MSC2716 related events when they're in a room version + # which does not support it yet. We allow people to use MSC2716 + # events in existing room versions but only from the room + # creator since it does not require any changes to the auth + # rules and in effect, the redaction algorithm . In the + # supported room version, we add the `historical` power level to + # auth the MSC2716 related events and adjust the redaction + # algorthim to keep the `historical` field around (redacting an + # event should only strip fields which don't affect the + # structural protocol level). + is_msc2716_event = ( + original_event.type == EventTypes.MSC2716_INSERTION + or original_event.type == EventTypes.MSC2716_BATCH + or original_event.type == EventTypes.MSC2716_MARKER + ) + if not room_version_obj.msc2716_historical and is_msc2716_event: + raise AuthError( + 403, + "Redacting MSC2716 events is not supported in this room version", + ) + prev_state_ids = await context.get_prev_state_ids() auth_events_ids = self._event_auth_handler.compute_auth_events( event, prev_state_ids, for_verification=True @@ -1394,9 +1504,6 @@ async def persist_and_notify_client_event( auth_events_map = await self.store.get_events(auth_events_ids) auth_events = {(e.type, e.state_key): e for e in auth_events_map.values()} - room_version = await self.store.get_room_version_id(event.room_id) - room_version_obj = KNOWN_ROOM_VERSIONS[room_version] - if event_auth.check_redaction( room_version_obj, event, auth_events=auth_events ): @@ -1417,6 +1524,41 @@ async def persist_and_notify_client_event( if prev_state_ids: raise AuthError(403, "Changing the room create event is forbidden") + if event.type == EventTypes.MSC2716_INSERTION: + room_version = await self.store.get_room_version_id(event.room_id) + room_version_obj = KNOWN_ROOM_VERSIONS[room_version] + + create_event = await self.store.get_create_event_for_room(event.room_id) + room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR) + + # Only check an insertion event if the room version + # supports it or the event is from the room creator. + if room_version_obj.msc2716_historical or ( + self.config.experimental.msc2716_enabled + and event.sender == room_creator + ): + next_batch_id = event.content.get( + EventContentFields.MSC2716_NEXT_BATCH_ID + ) + conflicting_insertion_event_id = None + if next_batch_id: + conflicting_insertion_event_id = ( + await self.store.get_insertion_event_id_by_batch_id( + event.room_id, next_batch_id + ) + ) + if conflicting_insertion_event_id is not None: + # The current insertion event that we're processing is invalid + # because an insertion event already exists in the room with the + # same next_batch_id. We can't allow multiple because the batch + # pointing will get weird, e.g. we can't determine which insertion + # event the batch event is pointing to. + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Another insertion event already exists with the same next_batch_id", + errcode=Codes.INVALID_PARAM, + ) + # Mark any `m.historical` messages as backfilled so they don't appear # in `/sync` and have the proper decrementing `stream_ordering` as we import backfilled = False @@ -1437,13 +1579,16 @@ async def persist_and_notify_client_event( # If there's an expiry timestamp on the event, schedule its expiry. self._message_handler.maybe_schedule_expiry(event) - def _notify(): + async def _notify() -> None: try: - self.notifier.on_new_room_event( + await self.notifier.on_new_room_event( event, event_pos, max_stream_token, extra_users=extra_users ) except Exception: - logger.exception("Error notifying about new room event") + logger.exception( + "Error notifying about new room event %s", + event.event_id, + ) run_in_background(_notify) @@ -1454,6 +1599,28 @@ def _notify(): return event + async def _maybe_kick_guest_users( + self, event: EventBase, context: EventContext + ) -> None: + if event.type != EventTypes.GuestAccess: + return + + guest_access = event.content.get(EventContentFields.GUEST_ACCESS) + if guest_access == GuestAccess.CAN_JOIN: + return + + current_state_ids = await context.get_current_state_ids() + + # since this is a client-generated event, it cannot be an outlier and we must + # therefore have the state ids. + assert current_state_ids is not None + current_state_dict = await self.store.get_events( + list(current_state_ids.values()) + ) + current_state = list(current_state_dict.values()) + logger.info("maybe_kick_guest_users %r", current_state) + await self.hs.get_room_member_handler().kick_guest_users(current_state) + async def _bump_active_time(self, user: UserID) -> None: try: presence = self.hs.get_presence_handler() @@ -1461,7 +1628,7 @@ async def _bump_active_time(self, user: UserID) -> None: except Exception: logger.exception("Error bumping presence active time") - async def _send_dummy_events_to_fill_extremities(self): + async def _send_dummy_events_to_fill_extremities(self) -> None: """Background task to send dummy events into rooms that have a large number of extremities """ @@ -1538,7 +1705,7 @@ async def _send_dummy_event_for_room(self, room_id: str) -> bool: ) return False - def _expire_rooms_to_exclude_from_dummy_event_insertion(self): + def _expire_rooms_to_exclude_from_dummy_event_insertion(self) -> None: expire_before = self.clock.time_msec() - _DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY to_expire = set() for room_id, time in self._rooms_to_exclude_from_dummy_event_insertion.items(): diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py index eca8f1604001..deb353975143 100644 --- a/synapse/handlers/oidc.py +++ b/synapse/handlers/oidc.py @@ -14,7 +14,7 @@ # limitations under the License. import inspect import logging -from typing import TYPE_CHECKING, Dict, Generic, List, Optional, TypeVar, Union +from typing import TYPE_CHECKING, Any, Dict, Generic, List, Optional, TypeVar, Union from urllib.parse import urlencode, urlparse import attr @@ -23,7 +23,7 @@ from authlib.jose import JsonWebToken, jwt from authlib.oauth2.auth import ClientAuth from authlib.oauth2.rfc6749.parameters import prepare_grant_uri -from authlib.oidc.core import CodeIDToken, ImplicitIDToken, UserInfo +from authlib.oidc.core import CodeIDToken, UserInfo from authlib.oidc.discovery import OpenIDProviderMetadata, get_well_known_url from jinja2 import Environment, Template from pymacaroons.exceptions import ( @@ -117,7 +117,8 @@ async def load_metadata(self) -> None: for idp_id, p in self._providers.items(): try: await p.load_metadata() - await p.load_jwks() + if not p._uses_userinfo: + await p.load_jwks() except Exception as e: raise Exception( "Error while initialising OIDC provider %r" % (idp_id,) @@ -249,11 +250,11 @@ async def handle_oidc_callback(self, request: SynapseRequest) -> None: class OidcError(Exception): """Used to catch errors when calling the token_endpoint""" - def __init__(self, error, error_description=None): + def __init__(self, error: str, error_description: Optional[str] = None): self.error = error self.error_description = error_description - def __str__(self): + def __str__(self) -> str: if self.error_description: return f"{self.error}: {self.error_description}" return self.error @@ -277,7 +278,7 @@ def __init__( self._token_generator = token_generator self._config = provider - self._callback_url: str = hs.config.oidc_callback_url + self._callback_url: str = hs.config.oidc.oidc_callback_url # Calculate the prefix for OIDC callback paths based on the public_baseurl. # We'll insert this into the Path= parameter of any session cookies we set. @@ -324,7 +325,7 @@ def __init__( self._allow_existing_users = provider.allow_existing_users self._http_client = hs.get_proxied_http_client() - self._server_name: str = hs.config.server_name + self._server_name: str = hs.config.server.server_name # identifier for the external_ids table self.idp_id = provider.idp_id @@ -338,9 +339,6 @@ def __init__( # optional brand identifier for this auth provider self.idp_brand = provider.idp_brand - # Optional brand identifier for the unstable API (see MSC2858). - self.unstable_idp_brand = provider.unstable_idp_brand - self._sso_handler = hs.get_sso_handler() self._sso_handler.register_identity_provider(self) @@ -501,10 +499,6 @@ async def load_jwks(self, force: bool = False) -> JWKS: return await self._jwks.get() async def _load_jwks(self) -> JWKS: - if self._uses_userinfo: - # We're not using jwt signing, return an empty jwk set - return {"keys": []} - metadata = await self.load_metadata() # Load the JWKS using the `jwks_uri` metadata. @@ -666,7 +660,7 @@ async def _fetch_userinfo(self, token: Token) -> UserInfo: return UserInfo(resp) - async def _parse_id_token(self, token: Token, nonce: str) -> UserInfo: + async def _parse_id_token(self, token: Token, nonce: str) -> CodeIDToken: """Return an instance of UserInfo from token's ``id_token``. Args: @@ -676,7 +670,7 @@ async def _parse_id_token(self, token: Token, nonce: str) -> UserInfo: request. This value should match the one inside the token. Returns: - An object representing the user. + The decoded claims in the ID token. """ metadata = await self.load_metadata() claims_params = { @@ -687,9 +681,6 @@ async def _parse_id_token(self, token: Token, nonce: str) -> UserInfo: # If we got an `access_token`, there should be an `at_hash` claim # in the `id_token` that we can check against. claims_params["access_token"] = token["access_token"] - claims_cls = CodeIDToken - else: - claims_cls = ImplicitIDToken alg_values = metadata.get("id_token_signing_alg_values_supported", ["RS256"]) jwt = JsonWebToken(alg_values) @@ -706,7 +697,7 @@ async def _parse_id_token(self, token: Token, nonce: str) -> UserInfo: claims = jwt.decode( id_token, key=jwk_set, - claims_cls=claims_cls, + claims_cls=CodeIDToken, claims_options=claim_options, claims_params=claims_params, ) @@ -716,7 +707,7 @@ async def _parse_id_token(self, token: Token, nonce: str) -> UserInfo: claims = jwt.decode( id_token, key=jwk_set, - claims_cls=claims_cls, + claims_cls=CodeIDToken, claims_options=claim_options, claims_params=claims_params, ) @@ -724,7 +715,8 @@ async def _parse_id_token(self, token: Token, nonce: str) -> UserInfo: logger.debug("Decoded id_token JWT %r; validating", claims) claims.validate(leeway=120) # allows 2 min of clock skew - return UserInfo(claims) + + return claims async def handle_redirect_request( self, @@ -840,8 +832,22 @@ async def handle_oidc_callback( logger.debug("Successfully obtained OAuth2 token data: %r", token) - # Now that we have a token, get the userinfo, either by decoding the - # `id_token` or by fetching the `userinfo_endpoint`. + # If there is an id_token, it should be validated, regardless of the + # userinfo endpoint is used or not. + if token.get("id_token") is not None: + try: + id_token = await self._parse_id_token(token, nonce=session_data.nonce) + sid = id_token.get("sid") + except Exception as e: + logger.exception("Invalid id_token") + self._sso_handler.render_error(request, "invalid_token", str(e)) + return + else: + id_token = None + sid = None + + # Now that we have a token, get the userinfo either from the `id_token` + # claims or by fetching the `userinfo_endpoint`. if self._uses_userinfo: try: userinfo = await self._fetch_userinfo(token) @@ -849,13 +855,14 @@ async def handle_oidc_callback( logger.exception("Could not fetch userinfo") self._sso_handler.render_error(request, "fetch_error", str(e)) return + elif id_token is not None: + userinfo = UserInfo(id_token) else: - try: - userinfo = await self._parse_id_token(token, nonce=session_data.nonce) - except Exception as e: - logger.exception("Invalid id_token") - self._sso_handler.render_error(request, "invalid_token", str(e)) - return + logger.error("Missing id_token in token response") + self._sso_handler.render_error( + request, "invalid_token", "Missing id_token in token response" + ) + return # first check if we're doing a UIA if session_data.ui_auth_session_id: @@ -887,7 +894,7 @@ async def handle_oidc_callback( # Call the mapper to register/login the user try: await self._complete_oidc_login( - userinfo, token, request, session_data.client_redirect_url + userinfo, token, request, session_data.client_redirect_url, sid ) except MappingException as e: logger.exception("Could not map user") @@ -899,6 +906,7 @@ async def _complete_oidc_login( token: Token, request: SynapseRequest, client_redirect_url: str, + sid: Optional[str], ) -> None: """Given a UserInfo response, complete the login flow @@ -1011,6 +1019,7 @@ async def grandfather_existing_users() -> Optional[str]: oidc_response_to_user_attributes, grandfather_existing_users, extra_attributes, + auth_provider_session_id=sid, ) def _remote_id_from_userinfo(self, userinfo: UserInfo) -> str: @@ -1060,13 +1069,13 @@ def __init__( self._cached_secret = b"" self._cached_secret_replacement_time = 0 - def __str__(self): + def __str__(self) -> str: # if client_auth_method is client_secret_basic, then ClientAuth.prepare calls # encode_client_secret_basic, which calls "{}".format(secret), which ends up # here. return self._get_secret().decode("ascii") - def __bytes__(self): + def __bytes__(self) -> bytes: # if client_auth_method is client_secret_post, then ClientAuth.prepare calls # encode_client_secret_post, which ends up here. return self._get_secret() @@ -1200,21 +1209,21 @@ def verify_oidc_session_token( ) -@attr.s(frozen=True, slots=True) +@attr.s(frozen=True, slots=True, auto_attribs=True) class OidcSessionData: """The attributes which are stored in a OIDC session cookie""" # the Identity Provider being used - idp_id = attr.ib(type=str) + idp_id: str # The `nonce` parameter passed to the OIDC provider. - nonce = attr.ib(type=str) + nonce: str # The URL the client gave when it initiated the flow. ("" if this is a UI Auth) - client_redirect_url = attr.ib(type=str) + client_redirect_url: str # The session ID of the ongoing UI Auth ("" if this is a login) - ui_auth_session_id = attr.ib(type=str) + ui_auth_session_id: str class UserAttributeDict(TypedDict): @@ -1293,20 +1302,20 @@ async def get_extra_attributes(self, userinfo: UserInfo, token: Token) -> JsonDi # Used to clear out "None" values in templates -def jinja_finalize(thing): +def jinja_finalize(thing: Any) -> Any: return thing if thing is not None else "" env = Environment(finalize=jinja_finalize) -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class JinjaOidcMappingConfig: - subject_claim = attr.ib(type=str) - localpart_template = attr.ib(type=Optional[Template]) - display_name_template = attr.ib(type=Optional[Template]) - email_template = attr.ib(type=Optional[Template]) - extra_attributes = attr.ib(type=Dict[str, Template]) + subject_claim: str + localpart_template: Optional[Template] + display_name_template: Optional[Template] + email_template: Optional[Template] + extra_attributes: Dict[str, Template] class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]): diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 1dbafd253d37..4f424380533b 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -13,18 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Any, Dict, Optional, Set +from typing import TYPE_CHECKING, Any, Collection, Dict, List, Optional, Set + +import attr from twisted.python.failure import Failure from synapse.api.constants import EventTypes, Membership from synapse.api.errors import SynapseError from synapse.api.filtering import Filter -from synapse.logging.context import run_in_background +from synapse.handlers.room import ShutdownRoomResponse from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig -from synapse.types import Requester +from synapse.types import JsonDict, Requester from synapse.util.async_helpers import ReadWriteLock from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client @@ -36,15 +38,12 @@ logger = logging.getLogger(__name__) +@attr.s(slots=True, auto_attribs=True) class PurgeStatus: """Object tracking the status of a purge request This class contains information on the progress of a purge request, for return by get_purge_status. - - Attributes: - status (int): Tracks whether this request has completed. One of - STATUS_{ACTIVE,COMPLETE,FAILED} """ STATUS_ACTIVE = 0 @@ -57,11 +56,62 @@ class PurgeStatus: STATUS_FAILED: "failed", } - def __init__(self): - self.status = PurgeStatus.STATUS_ACTIVE + # Save the error message if an error occurs + error: str = "" + + # Tracks whether this request has completed. One of STATUS_{ACTIVE,COMPLETE,FAILED}. + status: int = STATUS_ACTIVE + + def asdict(self) -> JsonDict: + ret = {"status": PurgeStatus.STATUS_TEXT[self.status]} + if self.error: + ret["error"] = self.error + return ret + + +@attr.s(slots=True, auto_attribs=True) +class DeleteStatus: + """Object tracking the status of a delete room request + + This class contains information on the progress of a delete room request, for + return by get_delete_status. + """ + + STATUS_PURGING = 0 + STATUS_COMPLETE = 1 + STATUS_FAILED = 2 + STATUS_SHUTTING_DOWN = 3 + + STATUS_TEXT = { + STATUS_PURGING: "purging", + STATUS_COMPLETE: "complete", + STATUS_FAILED: "failed", + STATUS_SHUTTING_DOWN: "shutting_down", + } + + # Tracks whether this request has completed. + # One of STATUS_{PURGING,COMPLETE,FAILED,SHUTTING_DOWN}. + status: int = STATUS_PURGING - def asdict(self): - return {"status": PurgeStatus.STATUS_TEXT[self.status]} + # Save the error message if an error occurs + error: str = "" + + # Saves the result of an action to give it back to REST API + shutdown_room: ShutdownRoomResponse = { + "kicked_users": [], + "failed_to_kick_users": [], + "local_aliases": [], + "new_room_id": None, + } + + def asdict(self) -> JsonDict: + ret = { + "status": DeleteStatus.STATUS_TEXT[self.status], + "shutdown_room": self.shutdown_room, + } + if self.error: + ret["error"] = self.error + return ret class PaginationHandler: @@ -71,6 +121,9 @@ class PaginationHandler: paginating during a purge. """ + # when to remove a completed deletion/purge from the results map + CLEAR_PURGE_AFTER_MS = 1000 * 3600 * 24 # 24 hours + def __init__(self, hs: "HomeServer"): self.hs = hs self.auth = hs.get_auth() @@ -79,35 +132,51 @@ def __init__(self, hs: "HomeServer"): self.state_store = self.storage.state self.clock = hs.get_clock() self._server_name = hs.hostname + self._room_shutdown_handler = hs.get_room_shutdown_handler() self.pagination_lock = ReadWriteLock() + # IDs of rooms in which there currently an active purge *or delete* operation. self._purges_in_progress_by_room: Set[str] = set() # map from purge id to PurgeStatus self._purges_by_id: Dict[str, PurgeStatus] = {} + # map from purge id to DeleteStatus + self._delete_by_id: Dict[str, DeleteStatus] = {} + # map from room id to delete ids + # Dict[`room_id`, List[`delete_id`]] + self._delete_by_room: Dict[str, List[str]] = {} self._event_serializer = hs.get_event_client_serializer() - self._retention_default_max_lifetime = hs.config.retention_default_max_lifetime + self._retention_default_max_lifetime = ( + hs.config.retention.retention_default_max_lifetime + ) - self._retention_allowed_lifetime_min = hs.config.retention_allowed_lifetime_min - self._retention_allowed_lifetime_max = hs.config.retention_allowed_lifetime_max + self._retention_allowed_lifetime_min = ( + hs.config.retention.retention_allowed_lifetime_min + ) + self._retention_allowed_lifetime_max = ( + hs.config.retention.retention_allowed_lifetime_max + ) - if hs.config.run_background_tasks and hs.config.retention_enabled: + if ( + hs.config.worker.run_background_tasks + and hs.config.retention.retention_enabled + ): # Run the purge jobs described in the configuration file. - for job in hs.config.retention_purge_jobs: + for job in hs.config.retention.retention_purge_jobs: logger.info("Setting up purge job with config: %s", job) self.clock.looping_call( run_as_background_process, - job["interval"], + job.interval, "purge_history_for_rooms_in_range", self.purge_history_for_rooms_in_range, - job["shortest_max_lifetime"], - job["longest_max_lifetime"], + job.shortest_max_lifetime, + job.longest_max_lifetime, ) async def purge_history_for_rooms_in_range( self, min_ms: Optional[int], max_ms: Optional[int] - ): + ) -> None: """Purge outdated events from rooms within the given retention range. If a default retention policy is defined in the server's configuration and its @@ -257,8 +326,13 @@ def start_purge_history( logger.info("[purge] starting purge_id %s", purge_id) self._purges_by_id[purge_id] = PurgeStatus() - run_in_background( - self._purge_history, purge_id, room_id, token, delete_local_events + run_as_background_process( + "purge_history", + self._purge_history, + purge_id, + room_id, + token, + delete_local_events, ) return purge_id @@ -268,7 +342,7 @@ async def _purge_history( """Carry out a history purge on a room. Args: - purge_id: The id for this purge + purge_id: The ID for this purge. room_id: The room to purge from token: topological token to delete events before delete_local_events: True to delete local events as well as remote ones @@ -287,14 +361,17 @@ async def _purge_history( "[purge] failed", exc_info=(f.type, f.value, f.getTracebackObject()) # type: ignore ) self._purges_by_id[purge_id].status = PurgeStatus.STATUS_FAILED + self._purges_by_id[purge_id].error = f.getErrorMessage() finally: self._purges_in_progress_by_room.discard(room_id) # remove the purge from the list 24 hours after it completes - def clear_purge(): + def clear_purge() -> None: del self._purges_by_id[purge_id] - self.hs.get_reactor().callLater(24 * 3600, clear_purge) + self.hs.get_reactor().callLater( + PaginationHandler.CLEAR_PURGE_AFTER_MS / 1000, clear_purge + ) def get_purge_status(self, purge_id: str) -> Optional[PurgeStatus]: """Get the current status of an active purge @@ -304,17 +381,31 @@ def get_purge_status(self, purge_id: str) -> Optional[PurgeStatus]: """ return self._purges_by_id.get(purge_id) + def get_delete_status(self, delete_id: str) -> Optional[DeleteStatus]: + """Get the current status of an active deleting + + Args: + delete_id: delete_id returned by start_shutdown_and_purge_room + """ + return self._delete_by_id.get(delete_id) + + def get_delete_ids_by_room(self, room_id: str) -> Optional[Collection[str]]: + """Get all active delete ids by room + + Args: + room_id: room_id that is deleted + """ + return self._delete_by_room.get(room_id) + async def purge_room(self, room_id: str, force: bool = False) -> None: """Purge the given room from the database. + This function is part the delete room v1 API. Args: room_id: room to be purged force: set true to skip checking for joined users. """ with await self.pagination_lock.write(room_id): - # check we know about the room - await self.store.get_room_version_id(room_id) - # first check that we have no users in this room if not force: joined = await self.store.is_host_joined(room_id, self._server_name) @@ -416,7 +507,7 @@ async def get_messages( if events: if event_filter: - events = event_filter.filter(events) + events = await event_filter.filter(events) events = await filter_events_for_client( self.storage, user_id, events, is_peeking=(member_event_id is None) @@ -430,7 +521,7 @@ async def get_messages( } state = None - if event_filter and event_filter.lazy_load_members() and len(events) > 0: + if event_filter and event_filter.lazy_load_members and len(events) > 0: # TODO: remove redundant members # FIXME: we also care about invite targets etc. @@ -464,3 +555,192 @@ async def get_messages( ) return chunk + + async def _shutdown_and_purge_room( + self, + delete_id: str, + room_id: str, + requester_user_id: str, + new_room_user_id: Optional[str] = None, + new_room_name: Optional[str] = None, + message: Optional[str] = None, + block: bool = False, + purge: bool = True, + force_purge: bool = False, + ) -> None: + """ + Shuts down and purges a room. + + See `RoomShutdownHandler.shutdown_room` for details of creation of the new room + + Args: + delete_id: The ID for this delete. + room_id: The ID of the room to shut down. + requester_user_id: + User who requested the action. Will be recorded as putting the room on the + blocking list. + new_room_user_id: + If set, a new room will be created with this user ID + as the creator and admin, and all users in the old room will be + moved into that room. If not set, no new room will be created + and the users will just be removed from the old room. + new_room_name: + A string representing the name of the room that new users will + be invited to. Defaults to `Content Violation Notification` + message: + A string containing the first message that will be sent as + `new_room_user_id` in the new room. Ideally this will clearly + convey why the original room was shut down. + Defaults to `Sharing illegal content on this server is not + permitted and rooms in violation will be blocked.` + block: + If set to `true`, this room will be added to a blocking list, + preventing future attempts to join the room. Defaults to `false`. + purge: + If set to `true`, purge the given room from the database. + force_purge: + If set to `true`, the room will be purged from database + also if it fails to remove some users from room. + + Saves a `RoomShutdownHandler.ShutdownRoomResponse` in `DeleteStatus`: + """ + + self._purges_in_progress_by_room.add(room_id) + try: + with await self.pagination_lock.write(room_id): + self._delete_by_id[delete_id].status = DeleteStatus.STATUS_SHUTTING_DOWN + self._delete_by_id[ + delete_id + ].shutdown_room = await self._room_shutdown_handler.shutdown_room( + room_id=room_id, + requester_user_id=requester_user_id, + new_room_user_id=new_room_user_id, + new_room_name=new_room_name, + message=message, + block=block, + ) + self._delete_by_id[delete_id].status = DeleteStatus.STATUS_PURGING + + if purge: + logger.info("starting purge room_id %s", room_id) + + # first check that we have no users in this room + if not force_purge: + joined = await self.store.is_host_joined( + room_id, self._server_name + ) + if joined: + raise SynapseError( + 400, "Users are still joined to this room" + ) + + await self.storage.purge_events.purge_room(room_id) + + logger.info("complete") + self._delete_by_id[delete_id].status = DeleteStatus.STATUS_COMPLETE + except Exception: + f = Failure() + logger.error( + "failed", + exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore + ) + self._delete_by_id[delete_id].status = DeleteStatus.STATUS_FAILED + self._delete_by_id[delete_id].error = f.getErrorMessage() + finally: + self._purges_in_progress_by_room.discard(room_id) + + # remove the delete from the list 24 hours after it completes + def clear_delete() -> None: + del self._delete_by_id[delete_id] + self._delete_by_room[room_id].remove(delete_id) + if not self._delete_by_room[room_id]: + del self._delete_by_room[room_id] + + self.hs.get_reactor().callLater( + PaginationHandler.CLEAR_PURGE_AFTER_MS / 1000, clear_delete + ) + + def start_shutdown_and_purge_room( + self, + room_id: str, + requester_user_id: str, + new_room_user_id: Optional[str] = None, + new_room_name: Optional[str] = None, + message: Optional[str] = None, + block: bool = False, + purge: bool = True, + force_purge: bool = False, + ) -> str: + """Start off shut down and purge on a room. + + Args: + room_id: The ID of the room to shut down. + requester_user_id: + User who requested the action and put the room on the + blocking list. + new_room_user_id: + If set, a new room will be created with this user ID + as the creator and admin, and all users in the old room will be + moved into that room. If not set, no new room will be created + and the users will just be removed from the old room. + new_room_name: + A string representing the name of the room that new users will + be invited to. Defaults to `Content Violation Notification` + message: + A string containing the first message that will be sent as + `new_room_user_id` in the new room. Ideally this will clearly + convey why the original room was shut down. + Defaults to `Sharing illegal content on this server is not + permitted and rooms in violation will be blocked.` + block: + If set to `true`, this room will be added to a blocking list, + preventing future attempts to join the room. Defaults to `false`. + purge: + If set to `true`, purge the given room from the database. + force_purge: + If set to `true`, the room will be purged from database + also if it fails to remove some users from room. + + Returns: + unique ID for this delete transaction. + """ + if room_id in self._purges_in_progress_by_room: + raise SynapseError( + 400, "History purge already in progress for %s" % (room_id,) + ) + + # This check is double to `RoomShutdownHandler.shutdown_room` + # But here the requester get a direct response / error with HTTP request + # and do not have to check the purge status + if new_room_user_id is not None: + if not self.hs.is_mine_id(new_room_user_id): + raise SynapseError( + 400, "User must be our own: %s" % (new_room_user_id,) + ) + + delete_id = random_string(16) + + # we log the delete_id here so that it can be tied back to the + # request id in the log lines. + logger.info( + "starting shutdown room_id %s with delete_id %s", + room_id, + delete_id, + ) + + self._delete_by_id[delete_id] = DeleteStatus() + self._delete_by_room.setdefault(room_id, []).append(delete_id) + run_as_background_process( + "shutdown_and_purge_room", + self._shutdown_and_purge_room, + delete_id, + room_id, + requester_user_id, + new_room_user_id, + new_room_name, + message, + block, + purge, + force_purge, + ) + return delete_id diff --git a/synapse/handlers/password_policy.py b/synapse/handlers/password_policy.py index cd21efdcc642..eadd7ced0957 100644 --- a/synapse/handlers/password_policy.py +++ b/synapse/handlers/password_policy.py @@ -27,8 +27,8 @@ class PasswordPolicyHandler: def __init__(self, hs: "HomeServer"): - self.policy = hs.config.password_policy - self.enabled = hs.config.password_policy_enabled + self.policy = hs.config.auth.password_policy + self.enabled = hs.config.auth.password_policy_enabled # Regexps for the spec'd policy parameters. self.regexp_digit = re.compile("[0-9]") diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 016c5df2ca75..454d06c9733d 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -26,17 +26,22 @@ import logging from bisect import bisect from contextlib import contextmanager +from types import TracebackType from typing import ( TYPE_CHECKING, + Any, + Awaitable, Callable, Collection, Dict, FrozenSet, + Generator, Iterable, List, Optional, Set, Tuple, + Type, Union, ) @@ -47,6 +52,7 @@ from synapse.api.constants import EventTypes, Membership, PresenceState from synapse.api.errors import SynapseError from synapse.api.presence import UserPresenceState +from synapse.appservice import ApplicationService from synapse.events.presence_router import PresenceRouter from synapse.logging.context import run_in_background from synapse.logging.utils import log_function @@ -60,6 +66,7 @@ from synapse.replication.tcp.commands import ClearUserSyncsCommand from synapse.replication.tcp.streams import PresenceFederationStream, PresenceStream from synapse.storage.databases.main import DataStore +from synapse.streams import EventSource from synapse.types import JsonDict, UserID, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches.descriptors import _CacheContext, cached @@ -239,7 +246,7 @@ async def set_state( """ @abc.abstractmethod - async def bump_presence_active_time(self, user: UserID): + async def bump_presence_active_time(self, user: UserID) -> None: """We've seen the user do something that indicates they're interacting with the app. """ @@ -273,7 +280,7 @@ async def update_external_syncs_clear(self, process_id: str) -> None: async def process_replication_rows( self, stream_name: str, instance_name: str, token: int, rows: list - ): + ) -> None: """Process streams received over replication.""" await self._federation_queue.process_replication_rows( stream_name, instance_name, token, rows @@ -285,7 +292,7 @@ def get_federation_queue(self) -> "PresenceFederationQueue": async def maybe_send_presence_to_interested_destinations( self, states: List[UserPresenceState] - ): + ) -> None: """If this instance is a federation sender, send the states to all destinations that are interested. Filters out any states for remote users. @@ -308,7 +315,7 @@ async def maybe_send_presence_to_interested_destinations( for destination, host_states in hosts_to_states.items(): self._federation.send_presence_to_destinations(host_states, [destination]) - async def send_full_presence_to_users(self, user_ids: Collection[str]): + async def send_full_presence_to_users(self, user_ids: Collection[str]) -> None: """ Adds to the list of users who should receive a full snapshot of presence upon their next sync. Note that this only works for local users. @@ -353,11 +360,21 @@ async def send_full_presence_to_users(self, user_ids: Collection[str]): # otherwise would not do). await self.set_state(UserID.from_string(user_id), state, force_notify=True) + async def is_visible(self, observed_user: UserID, observer_user: UserID) -> bool: + raise NotImplementedError( + "Attempting to check presence on a non-presence worker." + ) + class _NullContextManager(ContextManager[None]): """A context manager which does nothing.""" - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: pass @@ -368,7 +385,7 @@ def __init__(self, hs: "HomeServer"): self._presence_writer_instance = hs.config.worker.writers.presence[0] - self._presence_enabled = hs.config.use_presence + self._presence_enabled = hs.config.server.use_presence # Route presence EDUs to the right worker hs.get_federation_registry().register_instances_for_edu( @@ -404,7 +421,7 @@ def __init__(self, hs: "HomeServer"): self._on_shutdown, ) - def _on_shutdown(self) -> None: + async def _on_shutdown(self) -> None: if self._presence_enabled: self.hs.get_tcp_replication().send_command( ClearUserSyncsCommand(self.instance_id) @@ -462,7 +479,7 @@ async def user_syncing( if self._user_to_num_current_syncs[user_id] == 1: self.mark_as_coming_online(user_id) - def _end(): + def _end() -> None: # We check that the user_id is in user_to_num_current_syncs because # user_to_num_current_syncs may have been cleared if we are # shutting down. @@ -474,7 +491,7 @@ def _end(): self.mark_as_going_offline(user_id) @contextlib.contextmanager - def _user_syncing(): + def _user_syncing() -> Generator[None, None, None]: try: yield finally: @@ -497,7 +514,7 @@ async def notify_from_replication( async def process_replication_rows( self, stream_name: str, instance_name: str, token: int, rows: list - ): + ) -> None: await super().process_replication_rows(stream_name, instance_name, token, rows) if stream_name != PresenceStream.NAME: @@ -578,7 +595,7 @@ async def set_state( user_id = target_user.to_string() # If presence is disabled, no-op - if not self.hs.config.use_presence: + if not self.hs.config.server.use_presence: return # Proxy request to instance that writes presence @@ -595,7 +612,7 @@ async def bump_presence_active_time(self, user: UserID) -> None: with the app. """ # If presence is disabled, no-op - if not self.hs.config.use_presence: + if not self.hs.config.server.use_presence: return # Proxy request to instance that writes presence @@ -610,9 +627,9 @@ def __init__(self, hs: "HomeServer"): super().__init__(hs) self.hs = hs self.server_name = hs.hostname - self.wheel_timer = WheelTimer() + self.wheel_timer: WheelTimer[str] = WheelTimer() self.notifier = hs.get_notifier() - self._presence_enabled = hs.config.use_presence + self._presence_enabled = hs.config.server.use_presence federation_registry = hs.get_federation_registry() @@ -683,7 +700,7 @@ def __init__(self, hs: "HomeServer"): # Start a LoopingCall in 30s that fires every 5s. # The initial delay is to allow disconnected clients a chance to # reconnect before we treat them as offline. - def run_timeout_handler(): + def run_timeout_handler() -> Awaitable[None]: return run_as_background_process( "handle_presence_timeouts", self._handle_timeouts ) @@ -692,7 +709,7 @@ def run_timeout_handler(): 30, self.clock.looping_call, run_timeout_handler, 5000 ) - def run_persister(): + def run_persister() -> Awaitable[None]: return run_as_background_process( "persist_presence_changes", self._persist_unpersisted_changes ) @@ -910,7 +927,7 @@ async def bump_presence_active_time(self, user: UserID) -> None: with the app. """ # If presence is disabled, no-op - if not self.hs.config.use_presence: + if not self.hs.config.server.use_presence: return user_id = user.to_string() @@ -919,7 +936,7 @@ async def bump_presence_active_time(self, user: UserID) -> None: prev_state = await self.current_state_for_user(user_id) - new_fields = {"last_active_ts": self.clock.time_msec()} + new_fields: Dict[str, Any] = {"last_active_ts": self.clock.time_msec()} if prev_state.state == PresenceState.UNAVAILABLE: new_fields["state"] = PresenceState.ONLINE @@ -936,14 +953,14 @@ async def user_syncing( when users disconnect/reconnect. Args: - user_id (str) - affect_presence (bool): If false this function will be a no-op. + user_id + affect_presence: If false this function will be a no-op. Useful for streams that are not associated with an actual client that is being used by a user. """ # Override if it should affect the user's presence, if presence is # disabled. - if not self.hs.config.use_presence: + if not self.hs.config.server.use_presence: affect_presence = False if affect_presence: @@ -972,7 +989,7 @@ async def user_syncing( ] ) - async def _end(): + async def _end() -> None: try: self.user_to_num_current_syncs[user_id] -= 1 @@ -988,7 +1005,7 @@ async def _end(): logger.exception("Error updating presence after sync") @contextmanager - def _user_syncing(): + def _user_syncing() -> Generator[None, None, None]: try: yield finally: @@ -1184,8 +1201,7 @@ async def set_state( new_fields = {"state": presence} if not ignore_status_msg: - msg = status_msg if presence != PresenceState.OFFLINE else None - new_fields["status_msg"] = msg + new_fields["status_msg"] = status_msg if presence == PresenceState.ONLINE or ( presence == PresenceState.BUSY and self._busy_presence_enabled @@ -1259,7 +1275,7 @@ def notify_new_event(self) -> None: if self._event_processing: return - async def _process_presence(): + async def _process_presence() -> None: assert not self._event_processing self._event_processing = True @@ -1467,18 +1483,44 @@ def should_notify(old_state: UserPresenceState, new_state: UserPresenceState) -> def format_user_presence_state( state: UserPresenceState, now: int, include_user_id: bool = True ) -> JsonDict: - """Convert UserPresenceState to a format that can be sent down to clients + """Convert UserPresenceState to a JSON format that can be sent down to clients and to other servers. - The "user_id" is optional so that this function can be used to format presence - updates for client /sync responses and for federation /send requests. + Args: + state: The user presence state to format. + now: The current timestamp since the epoch in ms. + include_user_id: Whether to include `user_id` in the returned dictionary. + As this function can be used both to format presence updates for client /sync + responses and for federation /send requests, only the latter needs the include + the `user_id` field. + + Returns: + A JSON dictionary with the following keys: + * presence: The presence state as a str. + * user_id: Optional. Included if `include_user_id` is truthy. The canonical + Matrix ID of the user. + * last_active_ago: Optional. Included if `last_active_ts` is set on `state`. + The timestamp that the user was last active. + * status_msg: Optional. Included if `status_msg` is set on `state`. The user's + status. + * currently_active: Optional. Included only if `state.state` is "online". + + Example: + + { + "presence": "online", + "user_id": "@alice:example.com", + "last_active_ago": 16783813918, + "status_msg": "Hello world!", + "currently_active": True + } """ - content = {"presence": state.state} + content: JsonDict = {"presence": state.state} if include_user_id: content["user_id"] = state.user_id if state.last_active_ts: content["last_active_ago"] = now - state.last_active_ts - if state.status_msg and state.state != PresenceState.OFFLINE: + if state.status_msg: content["status_msg"] = state.status_msg if state.state == PresenceState.ONLINE: content["currently_active"] = state.currently_active @@ -1486,7 +1528,7 @@ def format_user_presence_state( return content -class PresenceEventSource: +class PresenceEventSource(EventSource[int, UserPresenceState]): def __init__(self, hs: "HomeServer"): # We can't call get_presence_handler here because there's a cycle: # @@ -1505,10 +1547,12 @@ async def get_new_events( self, user: UserID, from_key: Optional[int], - room_ids: Optional[List[str]] = None, - include_offline: bool = True, + limit: Optional[int] = None, + room_ids: Optional[Collection[str]] = None, + is_guest: bool = False, explicit_room_id: Optional[str] = None, - **kwargs, + include_offline: bool = True, + service: Optional[ApplicationService] = None, ) -> Tuple[List[UserPresenceState], int]: # The process for getting presence events are: # 1. Get the rooms the user is in. @@ -1840,9 +1884,7 @@ def handle_timeout( # don't set them as offline. sync_or_active = max(state.last_user_sync_ts, state.last_active_ts) if now - sync_or_active > SYNC_ONLINE_TIMEOUT: - state = state.copy_and_replace( - state=PresenceState.OFFLINE, status_msg=None - ) + state = state.copy_and_replace(state=PresenceState.OFFLINE) changed = True else: # We expect to be poked occasionally by the other side. @@ -1850,7 +1892,7 @@ def handle_timeout( # no one gets stuck online forever. if now - state.last_federation_update_ts > FEDERATION_TIMEOUT: # The other side seems to have disappeared. - state = state.copy_and_replace(state=PresenceState.OFFLINE, status_msg=None) + state = state.copy_and_replace(state=PresenceState.OFFLINE) changed = True return state if changed else None @@ -2071,7 +2113,7 @@ def __init__(self, hs: "HomeServer", presence_handler: BasePresenceHandler): if self._queue_presence_updates: self._clock.looping_call(self._clear_queue, self._CLEAR_ITEMS_EVERY_MS) - def _clear_queue(self): + def _clear_queue(self) -> None: """Clear out older entries from the queue.""" clear_before = self._clock.time_msec() - self._KEEP_ITEMS_IN_QUEUE_FOR_MS @@ -2202,7 +2244,7 @@ async def get_replication_rows( async def process_replication_rows( self, stream_name: str, instance_name: str, token: int, rows: list - ): + ) -> None: if stream_name != PresenceFederationStream.NAME: return diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 20a033d0ba5f..6b5a6ded8b29 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -32,8 +32,6 @@ get_domain_from_id, ) -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer @@ -43,7 +41,7 @@ MAX_AVATAR_URL_LEN = 1000 -class ProfileHandler(BaseHandler): +class ProfileHandler: """Handles fetching and updating user profile information. ProfileHandler can be instantiated directly on workers and will @@ -54,7 +52,9 @@ class ProfileHandler(BaseHandler): PROFILE_UPDATE_EVERY_MS = 24 * 60 * 60 * 1000 def __init__(self, hs: "HomeServer"): - super().__init__(hs) + self.store = hs.get_datastore() + self.clock = hs.get_clock() + self.hs = hs self.federation = hs.get_federation_client() hs.get_federation_registry().register_query_handler( @@ -62,8 +62,9 @@ def __init__(self, hs: "HomeServer"): ) self.user_directory_handler = hs.get_user_directory_handler() + self.request_ratelimiter = hs.get_request_ratelimiter() - if hs.config.run_background_tasks: + if hs.config.worker.run_background_tasks: self.clock.looping_call( self._update_remote_profile_cache, self.PROFILE_UPDATE_MS ) @@ -178,7 +179,7 @@ async def set_displayname( if not by_admin and target_user != requester.user: raise AuthError(400, "Cannot set another user's displayname") - if not by_admin and not self.hs.config.enable_set_displayname: + if not by_admin and not self.hs.config.registration.enable_set_displayname: profile = await self.store.get_profileinfo(target_user.localpart) if profile.display_name: raise SynapseError( @@ -214,11 +215,10 @@ async def set_displayname( target_user.localpart, displayname_to_set ) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(target_user.localpart) - await self.user_directory_handler.handle_local_profile_change( - target_user.to_string(), profile - ) + profile = await self.store.get_profileinfo(target_user.localpart) + await self.user_directory_handler.handle_local_profile_change( + target_user.to_string(), profile + ) await self._update_join_states(requester, target_user) @@ -254,7 +254,7 @@ async def set_avatar_url( requester: Requester, new_avatar_url: str, by_admin: bool = False, - ): + ) -> None: """Set a new avatar URL for a user. Args: @@ -269,7 +269,7 @@ async def set_avatar_url( if not by_admin and target_user != requester.user: raise AuthError(400, "Cannot set another user's avatar_url") - if not by_admin and not self.hs.config.enable_set_avatar_url: + if not by_admin and not self.hs.config.registration.enable_set_avatar_url: profile = await self.store.get_profileinfo(target_user.localpart) if profile.avatar_url: raise SynapseError( @@ -300,18 +300,17 @@ async def set_avatar_url( target_user.localpart, avatar_url_to_set ) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(target_user.localpart) - await self.user_directory_handler.handle_local_profile_change( - target_user.to_string(), profile - ) + profile = await self.store.get_profileinfo(target_user.localpart) + await self.user_directory_handler.handle_local_profile_change( + target_user.to_string(), profile + ) await self._update_join_states(requester, target_user) async def on_profile_query(self, args: JsonDict) -> JsonDict: """Handles federation profile query requests.""" - if not self.hs.config.allow_profile_lookup_over_federation: + if not self.hs.config.federation.allow_profile_lookup_over_federation: raise SynapseError( 403, "Profile lookup over federation is disabled on this homeserver", @@ -348,7 +347,7 @@ async def _update_join_states( if not self.hs.is_mine(target_user): return - await self.ratelimit(requester) + await self.request_ratelimiter.ratelimit(requester) # Do not actually update the room state for shadow-banned users. if requester.shadow_banned: @@ -399,7 +398,7 @@ async def check_profile_query_allowed( # when building a membership event. In this case, we must allow the # lookup. if ( - not self.hs.config.limit_profile_requests_to_users_who_share_rooms + not self.hs.config.server.limit_profile_requests_to_users_who_share_rooms or not requester ): return @@ -425,7 +424,7 @@ async def check_profile_query_allowed( raise @wrap_as_background_process("Update remote profile") - async def _update_remote_profile_cache(self): + async def _update_remote_profile_cache(self) -> None: """Called periodically to check profiles of remote users we haven't checked in a while. """ @@ -457,7 +456,11 @@ async def _update_remote_profile_cache(self): continue new_name = profile.get("displayname") + if not isinstance(new_name, str): + new_name = None new_avatar = profile.get("avatar_url") + if not isinstance(new_avatar, str): + new_avatar = None # We always hit update to update the last_check timestamp await self.store.update_remote_profile_cache(user_id, new_name, new_avatar) diff --git a/synapse/handlers/read_marker.py b/synapse/handlers/read_marker.py index c679a8303ed4..58593e570e1d 100644 --- a/synapse/handlers/read_marker.py +++ b/synapse/handlers/read_marker.py @@ -17,18 +17,15 @@ from synapse.util.async_helpers import Linearizer -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer logger = logging.getLogger(__name__) -class ReadMarkerHandler(BaseHandler): +class ReadMarkerHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) - self.server_name = hs.config.server_name + self.server_name = hs.config.server.server_name self.store = hs.get_datastore() self.account_data_handler = hs.get_account_data_handler() self.read_marker_linearizer = Linearizer(name="read_marker") diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index b9085bbccb39..4911a1153519 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, List, Optional, Tuple +from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple from synapse.api.constants import ReadReceiptEventFields from synapse.appservice import ApplicationService -from synapse.handlers._base import BaseHandler +from synapse.streams import EventSource from synapse.types import JsonDict, ReadReceipt, UserID, get_domain_from_id if TYPE_CHECKING: @@ -25,11 +25,10 @@ logger = logging.getLogger(__name__) -class ReceiptsHandler(BaseHandler): +class ReceiptsHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) - - self.server_name = hs.config.server_name + self.notifier = hs.get_notifier() + self.server_name = hs.config.server.server_name self.store = hs.get_datastore() self.event_auth_handler = hs.get_event_auth_handler() @@ -70,7 +69,8 @@ async def _received_remote_receipt(self, origin: str, content: JsonDict) -> None ) if not is_in_room: logger.info( - "Ignoring receipt from %s as we're not in the room", + "Ignoring receipt for room %r from server %s as we're not in the room", + room_id, origin, ) continue @@ -161,7 +161,7 @@ async def received_client_receipt( await self.federation_sender.send_read_receipt(receipt) -class ReceiptEventSource: +class ReceiptEventSource(EventSource[int, JsonDict]): def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() self.config = hs.config @@ -187,7 +187,14 @@ def filter_out_hidden(events: List[JsonDict], user_id: str) -> List[JsonDict]: new_users = {} for rr_user_id, user_rr in m_read.items(): - hidden = user_rr.get("hidden", None) + try: + hidden = user_rr.get("hidden") + except AttributeError: + # Due to https://github.com/matrix-org/synapse/issues/10376 + # there are cases where user_rr is a string, in those cases + # we just ignore the read receipt + continue + if hidden is not True or rr_user_id == user_id: new_users[rr_user_id] = user_rr.copy() # If hidden has a value replace hidden with the correct prefixed key @@ -208,7 +215,13 @@ def filter_out_hidden(events: List[JsonDict], user_id: str) -> List[JsonDict]: return visible_events async def get_new_events( - self, from_key: int, room_ids: List[str], user: UserID, **kwargs + self, + user: UserID, + from_key: int, + limit: Optional[int], + room_ids: Iterable[str], + is_guest: bool, + explicit_room_id: Optional[str] = None, ) -> Tuple[List[JsonDict], int]: from_key = int(from_key) to_key = self.get_current_key() @@ -223,17 +236,23 @@ async def get_new_events( if self.config.experimental.msc2285_enabled: events = ReceiptEventSource.filter_out_hidden(events, user.to_string()) - return (events, to_key) + return events, to_key async def get_new_events_as( self, from_key: int, service: ApplicationService ) -> Tuple[List[JsonDict], int]: - """Returns a set of new receipt events that an appservice + """Returns a set of new read receipt events that an appservice may be interested in. Args: from_key: the stream position at which events should be fetched from service: The appservice which may be interested + + Returns: + A two-tuple containing the following: + * A list of json dictionaries derived from read receipts that the + appservice may be interested in. + * The current read receipt stream token. """ from_key = int(from_key) to_key = self.get_current_key() @@ -255,7 +274,7 @@ async def get_new_events_as( events.append(event) - return (events, to_key) + return events, to_key def get_current_key(self, direction: str = "f") -> int: return self.store.get_max_receipt_stream_id() diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 8cf614136ebb..f08a516a7588 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -1,4 +1,5 @@ # Copyright 2014 - 2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,7 +22,13 @@ from typing_extensions import TypedDict from synapse import types -from synapse.api.constants import MAX_USERID_LENGTH, EventTypes, JoinRules, LoginType +from synapse.api.constants import ( + MAX_USERID_LENGTH, + EventContentFields, + EventTypes, + JoinRules, + LoginType, +) from synapse.api.errors import AuthError, Codes, ConsentNotGivenError, SynapseError from synapse.appservice import ApplicationService from synapse.config.server import is_threepid_reserved @@ -35,8 +42,6 @@ from synapse.storage.state import StateFilter from synapse.types import RoomAlias, UserID, create_requester -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer @@ -56,6 +61,22 @@ ) +def init_counters_for_auth_provider(auth_provider_id: str) -> None: + """Ensure the prometheus counters for the given auth provider are initialised + + This fixes a problem where the counters are not reported for a given auth provider + until the user first logs in/registers. + """ + for is_guest in (True, False): + login_counter.labels(guest=is_guest, auth_provider=auth_provider_id) + for shadow_banned in (True, False): + registration_counter.labels( + guest=is_guest, + shadow_banned=shadow_banned, + auth_provider=auth_provider_id, + ) + + class LoginDict(TypedDict): device_id: str access_token: str @@ -63,9 +84,10 @@ class LoginDict(TypedDict): refresh_token: Optional[str] -class RegistrationHandler(BaseHandler): +class RegistrationHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) + self.store = hs.get_datastore() + self.clock = hs.get_clock() self.hs = hs self.auth = hs.get_auth() self._auth_handler = hs.get_auth_handler() @@ -75,12 +97,13 @@ def __init__(self, hs: "HomeServer"): self.ratelimiter = hs.get_registration_ratelimiter() self.macaroon_gen = hs.get_macaroon_generator() self._account_validity_handler = hs.get_account_validity_handler() - self._server_notices_mxid = hs.config.server_notices_mxid + self._user_consent_version = self.hs.config.consent.user_consent_version + self._server_notices_mxid = hs.config.servernotices.server_notices_mxid self._server_name = hs.hostname self.spam_checker = hs.get_spam_checker() - if hs.config.worker_app: + if hs.config.worker.worker_app: self._register_client = ReplicationRegisterServlet.make_client(hs) self._register_device_client = RegisterDeviceReplicationServlet.make_client( hs @@ -93,15 +116,23 @@ def __init__(self, hs: "HomeServer"): self._register_device_client = self.register_device_inner self.pusher_pool = hs.get_pusherpool() - self.session_lifetime = hs.config.session_lifetime - self.access_token_lifetime = hs.config.access_token_lifetime + self.session_lifetime = hs.config.registration.session_lifetime + self.nonrefreshable_access_token_lifetime = ( + hs.config.registration.nonrefreshable_access_token_lifetime + ) + self.refreshable_access_token_lifetime = ( + hs.config.registration.refreshable_access_token_lifetime + ) + self.refresh_token_lifetime = hs.config.registration.refresh_token_lifetime + + init_counters_for_auth_provider("") async def check_username( self, localpart: str, guest_access_token: Optional[str] = None, assigned_user_id: Optional[str] = None, - ): + ) -> None: if types.contains_invalid_mxid_characters(localpart): raise SynapseError( 400, @@ -271,11 +302,10 @@ async def register_user( shadow_banned=shadow_banned, ) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(localpart) - await self.user_directory_handler.handle_local_profile_change( - user_id, profile - ) + profile = await self.store.get_profileinfo(localpart) + await self.user_directory_handler.handle_local_profile_change( + user_id, profile + ) else: # autogen a sequential user ID @@ -316,8 +346,13 @@ async def register_user( auth_provider=(auth_provider_id or ""), ).inc() - if not self.hs.config.user_consent_at_registration: - if not self.hs.config.auto_join_rooms_for_guests and make_guest: + # If the user does not need to consent at registration, auto-join any + # configured rooms. + if not self.hs.config.consent.user_consent_at_registration: + if ( + not self.hs.config.registration.auto_join_rooms_for_guests + and make_guest + ): logger.info( "Skipping auto-join for %s because auto-join for guests is disabled", user_id, @@ -363,7 +398,7 @@ async def _create_and_join_rooms(self, user_id: str) -> None: "preset": self.hs.config.registration.autocreate_auto_join_room_preset, } - # If the configuration providers a user ID to create rooms with, use + # If the configuration provides a user ID to create rooms with, use # that instead of the first user registered. requires_join = False if self.hs.config.registration.auto_join_user_id: @@ -387,7 +422,7 @@ async def _create_and_join_rooms(self, user_id: str) -> None: # Choose whether to federate the new room. if not self.hs.config.registration.autocreate_auto_join_rooms_federated: - stub_config["creation_content"] = {"m.federate": False} + stub_config["creation_content"] = {EventContentFields.FEDERATE: False} for r in self.hs.config.registration.auto_join_rooms: logger.info("Auto-joining %s to %s", user_id, r) @@ -486,7 +521,7 @@ async def _join_rooms(self, user_id: str) -> None: # we don't have a local user in the room to craft up an invite with. requires_invite = await self.store.is_host_joined( room_id, - self.server_name, + self._server_name, ) if requires_invite: @@ -672,7 +707,7 @@ async def register_with_store( address: the IP address used to perform the registration. shadow_banned: Whether to shadow-ban the user """ - if self.hs.config.worker_app: + if self.hs.config.worker.worker_app: await self._register_client( user_id=user_id, password_hash=password_hash, @@ -711,6 +746,7 @@ async def register_device( is_appservice_ghost: bool = False, auth_provider_id: Optional[str] = None, should_issue_refresh_token: bool = False, + auth_provider_session_id: Optional[str] = None, ) -> Tuple[str, str, Optional[int], Optional[str]]: """Register a device for a user and generate an access token. @@ -721,9 +757,9 @@ async def register_device( device_id: The device ID to check, or None to generate a new one. initial_display_name: An optional display name for the device. is_guest: Whether this is a guest account - auth_provider_id: The SSO IdP the user used, if any (just used for the - prometheus metrics). + auth_provider_id: The SSO IdP the user used, if any. should_issue_refresh_token: Whether it should also issue a refresh token + auth_provider_session_id: The session ID received during login from the SSO IdP. Returns: Tuple of device ID, access token, access token expiration time and refresh token """ @@ -734,6 +770,8 @@ async def register_device( is_guest=is_guest, is_appservice_ghost=is_appservice_ghost, should_issue_refresh_token=should_issue_refresh_token, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, ) login_counter.labels( @@ -756,45 +794,95 @@ async def register_device_inner( is_guest: bool = False, is_appservice_ghost: bool = False, should_issue_refresh_token: bool = False, + auth_provider_id: Optional[str] = None, + auth_provider_session_id: Optional[str] = None, ) -> LoginDict: """Helper for register_device Does the bits that need doing on the main process. Not for use outside this class and RegisterDeviceReplicationServlet. """ - assert not self.hs.config.worker_app - valid_until_ms = None + assert not self.hs.config.worker.worker_app + now_ms = self.clock.time_msec() + access_token_expiry = None if self.session_lifetime is not None: if is_guest: raise Exception( "session_lifetime is not currently implemented for guest access" ) - valid_until_ms = self.clock.time_msec() + self.session_lifetime + access_token_expiry = now_ms + self.session_lifetime + + if self.nonrefreshable_access_token_lifetime is not None: + if access_token_expiry is not None: + # Don't allow the non-refreshable access token to outlive the + # session. + access_token_expiry = min( + now_ms + self.nonrefreshable_access_token_lifetime, + access_token_expiry, + ) + else: + access_token_expiry = now_ms + self.nonrefreshable_access_token_lifetime refresh_token = None refresh_token_id = None registered_device_id = await self.device_handler.check_device_registered( - user_id, device_id, initial_display_name + user_id, + device_id, + initial_display_name, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, ) if is_guest: - assert valid_until_ms is None + assert access_token_expiry is None access_token = self.macaroon_gen.generate_guest_access_token(user_id) else: if should_issue_refresh_token: + # A refreshable access token lifetime must be configured + # since we're told to issue a refresh token (the caller checks + # that this value is set before setting this flag). + assert self.refreshable_access_token_lifetime is not None + + # Set the expiry time of the refreshable access token + access_token_expiry = now_ms + self.refreshable_access_token_lifetime + + # Set the refresh token expiry time (if configured) + refresh_token_expiry = None + if self.refresh_token_lifetime is not None: + refresh_token_expiry = now_ms + self.refresh_token_lifetime + + # Set an ultimate session expiry time (if configured) + ultimate_session_expiry_ts = None + if self.session_lifetime is not None: + ultimate_session_expiry_ts = now_ms + self.session_lifetime + + # Also ensure that the issued tokens don't outlive the + # session. + # (It would be weird to configure a homeserver with a shorter + # session lifetime than token lifetime, but may as well handle + # it.) + access_token_expiry = min( + access_token_expiry, ultimate_session_expiry_ts + ) + if refresh_token_expiry is not None: + refresh_token_expiry = min( + refresh_token_expiry, ultimate_session_expiry_ts + ) + ( refresh_token, refresh_token_id, - ) = await self._auth_handler.get_refresh_token_for_user_id( + ) = await self._auth_handler.create_refresh_token_for_user_id( user_id, device_id=registered_device_id, + expiry_ts=refresh_token_expiry, + ultimate_session_expiry_ts=ultimate_session_expiry_ts, ) - valid_until_ms = self.clock.time_msec() + self.access_token_lifetime - access_token = await self._auth_handler.get_access_token_for_user_id( + access_token = await self._auth_handler.create_access_token_for_user_id( user_id, device_id=registered_device_id, - valid_until_ms=valid_until_ms, + valid_until_ms=access_token_expiry, is_appservice_ghost=is_appservice_ghost, refresh_token_id=refresh_token_id, ) @@ -802,7 +890,7 @@ class and RegisterDeviceReplicationServlet. return { "device_id": registered_device_id, "access_token": access_token, - "valid_until_ms": valid_until_ms, + "valid_until_ms": access_token_expiry, "refresh_token": refresh_token, } @@ -819,7 +907,7 @@ async def post_registration_actions( """ # TODO: 3pid registration can actually happen on the workers. Consider # refactoring it. - if self.hs.config.worker_app: + if self.hs.config.worker.worker_app: await self._post_registration_client( user_id=user_id, auth_result=auth_result, access_token=access_token ) @@ -830,7 +918,7 @@ async def post_registration_actions( # Necessary due to auth checks prior to the threepid being # written to the db if is_threepid_reserved( - self.hs.config.mau_limits_reserved_threepids, threepid + self.hs.config.server.mau_limits_reserved_threepids, threepid ): await self.store.upsert_monthly_active_user(user_id) @@ -841,7 +929,9 @@ async def post_registration_actions( await self._register_msisdn_threepid(user_id, threepid) if auth_result and LoginType.TERMS in auth_result: - await self._on_user_consented(user_id, self.hs.config.user_consent_version) + # The terms type should only exist if consent is enabled. + assert self._user_consent_version is not None + await self._on_user_consented(user_id, self._user_consent_version) async def _on_user_consented(self, user_id: str, consent_version: str) -> None: """A user consented to the terms on registration @@ -887,8 +977,8 @@ async def _register_email_threepid( # getting mail spam where they weren't before if email # notifs are set up on a homeserver) if ( - self.hs.config.email_enable_notifs - and self.hs.config.email_notif_for_new_users + self.hs.config.email.email_enable_notifs + and self.hs.config.email.email_notif_for_new_users and token ): # Pull the ID of the access token back out of the db diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 77dab206db24..402ac830c56e 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1,6 +1,4 @@ -# Copyright 2014 - 2016 OpenMarket Ltd -# Copyright 2018-2019 New Vector Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2016-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,27 +12,41 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Contains functions for performing events on rooms.""" - +"""Contains functions for performing actions on rooms.""" import itertools import logging import math import random import string from collections import OrderedDict -from typing import TYPE_CHECKING, Any, Awaitable, Dict, List, Optional, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Collection, + Dict, + List, + Optional, + Tuple, +) + +from typing_extensions import TypedDict from synapse.api.constants import ( + EventContentFields, EventTypes, + GuestAccess, HistoryVisibility, JoinRules, Membership, RoomCreationPreset, RoomEncryptionAlgorithms, + RoomTypes, ) from synapse.api.errors import ( AuthError, Codes, + HttpResponseException, LimitExceededError, NotFoundError, StoreError, @@ -42,10 +54,14 @@ ) from synapse.api.filtering import Filter from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion +from synapse.event_auth import validate_event_for_room_version from synapse.events import EventBase from synapse.events.utils import copy_power_levels_contents +from synapse.federation.federation_client import InvalidResponseError +from synapse.handlers.federation import get_domains_from_state from synapse.rest.admin._base import assert_user_is_admin from synapse.storage.state import StateFilter +from synapse.streams import EventSource from synapse.types import ( JsonDict, MutableStateMap, @@ -64,8 +80,6 @@ from synapse.util.stringutils import parse_and_validate_server_name from synapse.visibility import filter_events_for_client -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer @@ -76,15 +90,18 @@ FIVE_MINUTES_IN_MS = 5 * 60 * 1000 -class RoomCreationHandler(BaseHandler): +class RoomCreationHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) - + self.store = hs.get_datastore() + self.auth = hs.get_auth() + self.clock = hs.get_clock() + self.hs = hs self.spam_checker = hs.get_spam_checker() self.event_creation_handler = hs.get_event_creation_handler() self.room_member_handler = hs.get_room_member_handler() self._event_auth_handler = hs.get_event_auth_handler() self.config = hs.config + self.request_ratelimiter = hs.get_request_ratelimiter() # Room state based off defined presets self._presets_dict: Dict[str, Dict[str, Any]] = { @@ -115,7 +132,7 @@ def __init__(self, hs: "HomeServer"): for preset_name, preset_config in self._presets_dict.items(): encrypted = ( preset_name - in self.config.encryption_enabled_by_default_for_room_presets + in self.config.room.encryption_enabled_by_default_for_room_presets ) preset_config["encrypted"] = encrypted @@ -130,7 +147,7 @@ def __init__(self, hs: "HomeServer"): self._upgrade_response_cache: ResponseCache[Tuple[str, str]] = ResponseCache( hs.get_clock(), "room_upgrade", timeout_ms=FIVE_MINUTES_IN_MS ) - self._server_notices_mxid = hs.config.server_notices_mxid + self._server_notices_mxid = hs.config.servernotices.server_notices_mxid self.third_party_event_rules = hs.get_third_party_event_rules() @@ -150,7 +167,7 @@ async def upgrade_room( Raises: ShadowBanError if the requester is shadow-banned. """ - await self.ratelimit(requester) + await self.request_ratelimiter.ratelimit(requester) user_id = requester.user.to_string() @@ -183,7 +200,7 @@ async def upgrade_room( async def _upgrade_room( self, requester: Requester, old_room_id: str, new_version: RoomVersion - ): + ) -> str: """ Args: requester: the user requesting the upgrade @@ -226,8 +243,9 @@ async def _upgrade_room( }, }, ) - old_room_version = await self.store.get_room_version_id(old_room_id) - await self._event_auth_handler.check_from_context( + old_room_version = await self.store.get_room_version(old_room_id) + validate_event_for_room_version(old_room_version, tombstone_event) + await self._event_auth_handler.check_auth_rules_from_context( old_room_version, tombstone_event, tombstone_context ) @@ -388,14 +406,14 @@ async def clone_existing_room( old_room_create_event = await self.store.get_create_event_for_room(old_room_id) # Check if the create event specified a non-federatable room - if not old_room_create_event.content.get("m.federate", True): + if not old_room_create_event.content.get(EventContentFields.FEDERATE, True): # If so, mark the new room as non-federatable as well - creation_content["m.federate"] = False + creation_content[EventContentFields.FEDERATE] = False initial_state = {} # Replicate relevant room events - types_to_copy = ( + types_to_copy: List[Tuple[str, Optional[str]]] = [ (EventTypes.JoinRules, ""), (EventTypes.Name, ""), (EventTypes.Topic, ""), @@ -406,7 +424,16 @@ async def clone_existing_room( (EventTypes.ServerACL, ""), (EventTypes.RelatedGroups, ""), (EventTypes.PowerLevels, ""), - ) + ] + + # If the old room was a space, copy over the room type and the rooms in + # the space. + if ( + old_room_create_event.content.get(EventContentFields.ROOM_TYPE) + == RoomTypes.SPACE + ): + creation_content[EventContentFields.ROOM_TYPE] = RoomTypes.SPACE + types_to_copy.append((EventTypes.SpaceChild, None)) old_room_state_ids = await self.store.get_filtered_current_state_ids( old_room_id, StateFilter.from_types(types_to_copy) @@ -417,6 +444,11 @@ async def clone_existing_room( for k, old_event_id in old_room_state_ids.items(): old_event = old_room_state_events.get(old_event_id) if old_event: + # If the event is an space child event with empty content, it was + # removed from the space and should be ignored. + if k[0] == EventTypes.SpaceChild and not old_event.content: + continue + initial_state[k] = old_event.content # deep-copy the power-levels event before we start modifying it @@ -437,17 +469,35 @@ async def clone_existing_room( # the room has been created # Calculate the minimum power level needed to clone the room event_power_levels = power_levels.get("events", {}) + if not isinstance(event_power_levels, dict): + event_power_levels = {} state_default = power_levels.get("state_default", 50) + try: + state_default_int = int(state_default) # type: ignore[arg-type] + except (TypeError, ValueError): + state_default_int = 50 ban = power_levels.get("ban", 50) - needed_power_level = max(state_default, ban, max(event_power_levels.values())) + try: + ban = int(ban) # type: ignore[arg-type] + except (TypeError, ValueError): + ban = 50 + needed_power_level = max( + state_default_int, ban, max(event_power_levels.values()) + ) # Get the user's current power level, this matches the logic in get_user_power_level, # but without the entire state map. user_power_levels = power_levels.setdefault("users", {}) + if not isinstance(user_power_levels, dict): + user_power_levels = {} users_default = power_levels.get("users_default", 0) current_power_level = user_power_levels.get(user_id, users_default) + try: + current_power_level_int = int(current_power_level) # type: ignore[arg-type] + except (TypeError, ValueError): + current_power_level_int = 0 # Raise the requester's power level in the new room if necessary - if current_power_level < needed_power_level: + if current_power_level_int < needed_power_level: user_power_levels[user_id] = needed_power_level await self._send_events_for_new_room( @@ -479,7 +529,7 @@ async def clone_existing_room( ): await self.room_member_handler.update_membership( requester, - UserID.from_string(old_event["state_key"]), + UserID.from_string(old_event.state_key), new_room_id, "ban", ratelimit=False, @@ -495,7 +545,7 @@ async def _move_aliases_to_new_room( old_room_id: str, new_room_id: str, old_room_state: StateMap[str], - ): + ) -> None: # check to see if we have a canonical alias. canonical_alias_event = None canonical_alias_event_id = old_room_state.get((EventTypes.CanonicalAlias, "")) @@ -633,16 +683,24 @@ async def create_room( requester, config, is_requester_admin=is_requester_admin ) - if not is_requester_admin and not await self.spam_checker.user_may_create_room( - user_id + invite_3pid_list = config.get("invite_3pid", []) + invite_list = config.get("invite", []) + + if not is_requester_admin and not ( + await self.spam_checker.user_may_create_room(user_id) + and await self.spam_checker.user_may_create_room_with_invites( + user_id, + invite_list, + invite_3pid_list, + ) ): raise SynapseError(403, "You are not permitted to create rooms") if ratelimit: - await self.ratelimit(requester) + await self.request_ratelimiter.ratelimit(requester) room_version_id = config.get( - "room_version", self.config.default_room_version.identifier + "room_version", self.config.server.default_room_version.identifier ) if not isinstance(room_version_id, str): @@ -668,8 +726,6 @@ async def create_room( if mapping: raise SynapseError(400, "Room alias already taken", Codes.ROOM_IN_USE) - invite_3pid_list = config.get("invite_3pid", []) - invite_list = config.get("invite", []) for i in invite_list: try: uid = UserID.from_string(i) @@ -730,6 +786,18 @@ async def create_room( if not allowed_by_third_party_rules: raise SynapseError(403, "Room visibility value not allowed.") + if is_public: + room_aliases = [] + if room_alias: + room_aliases.append(room_alias.to_string()) + if not self.config.roomdirectory.is_publishing_room_allowed( + user_id, room_id, room_aliases + ): + # Let's just return a generic message, as there may be all sorts of + # reasons why we said no. TODO: Allow configurable error messages + # per alias creation rule? + raise SynapseError(403, "Not allowed to publish room") + directory_handler = self.hs.get_directory_handler() if room_alias: await directory_handler.create_association( @@ -740,13 +808,6 @@ async def create_room( check_membership=False, ) - if is_public: - if not self.config.is_publishing_room_allowed(user_id, room_id, room_alias): - # Lets just return a generic message, as there may be all sorts of - # reasons why we said no. TODO: Allow configurable error messages - # per alias creation rule? - raise SynapseError(403, "Not allowed to publish room") - preset_config = config.get( "preset", RoomCreationPreset.PRIVATE_CHAT @@ -835,6 +896,7 @@ async def create_room( "invite", ratelimit=False, content=content, + new_room=True, ) for invite_3pid in invite_3pid_list: @@ -897,7 +959,7 @@ async def _send_events_for_new_room( event_keys = {"room_id": room_id, "sender": creator_id, "state_key": ""} - def create(etype: str, content: JsonDict, **kwargs) -> JsonDict: + def create(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict: e = {"type": etype, "content": content} e.update(event_keys) @@ -905,7 +967,7 @@ def create(etype: str, content: JsonDict, **kwargs) -> JsonDict: return e - async def send(etype: str, content: JsonDict, **kwargs) -> int: + async def send(etype: str, content: JsonDict, **kwargs: Any) -> int: event = create(etype, content, **kwargs) logger.debug("Sending %s in new room", etype) # Allow these events to be sent even if the user is shadow-banned to @@ -921,7 +983,12 @@ async def send(etype: str, content: JsonDict, **kwargs) -> int: ) return last_stream_id - config = self._presets_dict[preset_config] + try: + config = self._presets_dict[preset_config] + except KeyError: + raise SynapseError( + 400, f"'{preset_config}' is not a valid preset", errcode=Codes.BAD_JSON + ) creation_content.update({"creator": creator_id}) await send(etype=EventTypes.Create, content=creation_content) @@ -934,6 +1001,7 @@ async def send(etype: str, content: JsonDict, **kwargs) -> int: "join", ratelimit=ratelimit, content=creator_join_profile, + new_room=True, ) # We treat the power levels override specially as this needs to be one @@ -1000,7 +1068,8 @@ async def send(etype: str, content: JsonDict, **kwargs) -> int: if config["guest_can_join"]: if (EventTypes.GuestAccess, "") not in initial_state: last_sent_stream_id = await send( - etype=EventTypes.GuestAccess, content={"guest_access": "can_join"} + etype=EventTypes.GuestAccess, + content={EventContentFields.GUEST_ACCESS: GuestAccess.CAN_JOIN}, ) for (etype, state_key), content in initial_state.items(): @@ -1022,7 +1091,7 @@ async def _generate_room_id( creator_id: str, is_public: bool, room_version: RoomVersion, - ): + ) -> str: # autogen room IDs and try to create it. We may clash, so just # try a few times till one goes through, giving up eventually. attempts = 0 @@ -1086,7 +1155,7 @@ async def get_event_context( users = await self.store.get_users_in_room(room_id) is_peeking = user.to_string() not in users - async def filter_evts(events): + async def filter_evts(events: List[EventBase]) -> List[EventBase]: if use_admin_priviledge: return events return await filter_events_for_client( @@ -1108,8 +1177,10 @@ async def filter_evts(events): ) if event_filter: - results["events_before"] = event_filter.filter(results["events_before"]) - results["events_after"] = event_filter.filter(results["events_after"]) + results["events_before"] = await event_filter.filter( + results["events_before"] + ) + results["events_after"] = await event_filter.filter(results["events_after"]) results["events_before"] = await filter_evts(results["events_before"]) results["events_after"] = await filter_evts(results["events_after"]) @@ -1123,7 +1194,7 @@ async def filter_evts(events): else: last_event_id = event_id - if event_filter and event_filter.lazy_load_members(): + if event_filter and event_filter.lazy_load_members: state_filter = StateFilter.from_lazy_load_member_list( ev.sender for ev in itertools.chain( @@ -1145,7 +1216,7 @@ async def filter_evts(events): state_events = list(state[last_event_id].values()) if event_filter: - state_events = event_filter.filter(state_events) + state_events = await event_filter.filter(state_events) results["state"] = await filter_evts(state_events) @@ -1164,7 +1235,148 @@ async def filter_evts(events): return results -class RoomEventSource: +class TimestampLookupHandler: + def __init__(self, hs: "HomeServer"): + self.server_name = hs.hostname + self.store = hs.get_datastore() + self.state_handler = hs.get_state_handler() + self.federation_client = hs.get_federation_client() + + async def get_event_for_timestamp( + self, + requester: Requester, + room_id: str, + timestamp: int, + direction: str, + ) -> Tuple[str, int]: + """Find the closest event to the given timestamp in the given direction. + If we can't find an event locally or the event we have locally is next to a gap, + it will ask other federated homeservers for an event. + + Args: + requester: The user making the request according to the access token + room_id: Room to fetch the event from + timestamp: The point in time (inclusive) we should navigate from in + the given direction to find the closest event. + direction: ["f"|"b"] to indicate whether we should navigate forward + or backward from the given timestamp to find the closest event. + + Returns: + A tuple containing the `event_id` closest to the given timestamp in + the given direction and the `origin_server_ts`. + + Raises: + SynapseError if unable to find any event locally in the given direction + """ + + local_event_id = await self.store.get_event_id_for_timestamp( + room_id, timestamp, direction + ) + logger.debug( + "get_event_for_timestamp: locally, we found event_id=%s closest to timestamp=%s", + local_event_id, + timestamp, + ) + + # Check for gaps in the history where events could be hiding in between + # the timestamp given and the event we were able to find locally + is_event_next_to_backward_gap = False + is_event_next_to_forward_gap = False + if local_event_id: + local_event = await self.store.get_event( + local_event_id, allow_none=False, allow_rejected=False + ) + + if direction == "f": + # We only need to check for a backward gap if we're looking forwards + # to ensure there is nothing in between. + is_event_next_to_backward_gap = ( + await self.store.is_event_next_to_backward_gap(local_event) + ) + elif direction == "b": + # We only need to check for a forward gap if we're looking backwards + # to ensure there is nothing in between + is_event_next_to_forward_gap = ( + await self.store.is_event_next_to_forward_gap(local_event) + ) + + # If we found a gap, we should probably ask another homeserver first + # about more history in between + if ( + not local_event_id + or is_event_next_to_backward_gap + or is_event_next_to_forward_gap + ): + logger.debug( + "get_event_for_timestamp: locally, we found event_id=%s closest to timestamp=%s which is next to a gap in event history so we're asking other homeservers first", + local_event_id, + timestamp, + ) + + # Find other homeservers from the given state in the room + curr_state = await self.state_handler.get_current_state(room_id) + curr_domains = get_domains_from_state(curr_state) + likely_domains = [ + domain for domain, depth in curr_domains if domain != self.server_name + ] + + # Loop through each homeserver candidate until we get a succesful response + for domain in likely_domains: + try: + remote_response = await self.federation_client.timestamp_to_event( + domain, room_id, timestamp, direction + ) + logger.debug( + "get_event_for_timestamp: response from domain(%s)=%s", + domain, + remote_response, + ) + + # TODO: Do we want to persist this as an extremity? + # TODO: I think ideally, we would try to backfill from + # this event and run this whole + # `get_event_for_timestamp` function again to make sure + # they didn't give us an event from their gappy history. + remote_event_id = remote_response.event_id + origin_server_ts = remote_response.origin_server_ts + + # Only return the remote event if it's closer than the local event + if not local_event or ( + abs(origin_server_ts - timestamp) + < abs(local_event.origin_server_ts - timestamp) + ): + return remote_event_id, origin_server_ts + except (HttpResponseException, InvalidResponseError) as ex: + # Let's not put a high priority on some other homeserver + # failing to respond or giving a random response + logger.debug( + "Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s", + domain, + type(ex).__name__, + ex, + ex.args, + ) + except Exception as ex: + # But we do want to see some exceptions in our code + logger.warning( + "Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s", + domain, + type(ex).__name__, + ex, + ex.args, + ) + + if not local_event_id: + raise SynapseError( + 404, + "Unable to find event from %s in direction %s" % (timestamp, direction), + errcode=Codes.NOT_FOUND, + ) + + return local_event_id, local_event.origin_server_ts + + +class RoomEventSource(EventSource[RoomStreamToken, EventBase]): def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() @@ -1172,8 +1384,8 @@ async def get_new_events( self, user: UserID, from_key: RoomStreamToken, - limit: int, - room_ids: List[str], + limit: Optional[int], + room_ids: Collection[str], is_guest: bool, explicit_room_id: Optional[str] = None, ) -> Tuple[List[EventBase], RoomStreamToken]: @@ -1216,7 +1428,7 @@ async def get_new_events( else: end_key = to_key - return (events, end_key) + return events, end_key def get_current_key(self) -> RoomStreamToken: return self.store.get_room_max_token() @@ -1225,8 +1437,25 @@ def get_current_key_for_room(self, room_id: str) -> Awaitable[str]: return self.store.get_room_events_max_id(room_id) -class RoomShutdownHandler: +class ShutdownRoomResponse(TypedDict): + """ + Attributes: + kicked_users: An array of users (`user_id`) that were kicked. + failed_to_kick_users: + An array of users (`user_id`) that that were not kicked. + local_aliases: + An array of strings representing the local aliases that were + migrated from the old room to the new. + new_room_id: A string representing the room ID of the new room. + """ + + kicked_users: List[str] + failed_to_kick_users: List[str] + local_aliases: List[str] + new_room_id: Optional[str] + +class RoomShutdownHandler: DEFAULT_MESSAGE = ( "Sharing illegal content on this server is not permitted and rooms in" " violation will be blocked." @@ -1239,7 +1468,6 @@ def __init__(self, hs: "HomeServer"): self._room_creation_handler = hs.get_room_creation_handler() self._replication = hs.get_replication_data_handler() self.event_creation_handler = hs.get_event_creation_handler() - self.state = hs.get_state_handler() self.store = hs.get_datastore() async def shutdown_room( @@ -1250,7 +1478,7 @@ async def shutdown_room( new_room_name: Optional[str] = None, message: Optional[str] = None, block: bool = False, - ) -> dict: + ) -> ShutdownRoomResponse: """ Shuts down a room. Moves all local users and room aliases automatically to a new room if `new_room_user_id` is set. Otherwise local users only @@ -1284,8 +1512,13 @@ async def shutdown_room( Defaults to `Sharing illegal content on this server is not permitted and rooms in violation will be blocked.` block: - If set to `true`, this room will be added to a blocking list, - preventing future attempts to join the room. Defaults to `false`. + If set to `True`, users will be prevented from joining the old + room. This option can also be used to pre-emptively block a room, + even if it's unknown to this homeserver. In this case, the room + will be blocked, and no further action will be taken. If `False`, + attempting to delete an unknown room is invalid. + + Defaults to `False`. Returns: a dict containing the following keys: kicked_users: An array of users (`user_id`) that were kicked. @@ -1294,7 +1527,9 @@ async def shutdown_room( local_aliases: An array of strings representing the local aliases that were migrated from the old room to the new. - new_room_id: A string representing the room ID of the new room. + new_room_id: + A string representing the room ID of the new room, or None if + no such room was created. """ if not new_room_name: @@ -1305,14 +1540,21 @@ async def shutdown_room( if not RoomID.is_valid(room_id): raise SynapseError(400, "%s is not a legal room ID" % (room_id,)) - if not await self.store.get_room(room_id): - raise NotFoundError("Unknown room id %s" % (room_id,)) - - # This will work even if the room is already blocked, but that is - # desirable in case the first attempt at blocking the room failed below. + # Action the block first (even if the room doesn't exist yet) if block: + # This will work even if the room is already blocked, but that is + # desirable in case the first attempt at blocking the room failed below. await self.store.block_room(room_id, requester_user_id) + if not await self.store.get_room(room_id): + # if we don't know about the room, there is nothing left to do. + return { + "kicked_users": [], + "failed_to_kick_users": [], + "local_aliases": [], + "new_room_id": None, + } + if new_room_user_id is not None: if not self.hs.is_mine_id(new_room_user_id): raise SynapseError( diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py new file mode 100644 index 000000000000..f880aa93d29a --- /dev/null +++ b/synapse/handlers/room_batch.py @@ -0,0 +1,445 @@ +import logging +from typing import TYPE_CHECKING, List, Tuple + +from synapse.api.constants import EventContentFields, EventTypes +from synapse.appservice import ApplicationService +from synapse.http.servlet import assert_params_in_dict +from synapse.types import JsonDict, Requester, UserID, create_requester +from synapse.util.stringutils import random_string + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +def generate_fake_event_id() -> str: + return "$fake_" + random_string(43) + + +class RoomBatchHandler: + def __init__(self, hs: "HomeServer"): + self.hs = hs + self.store = hs.get_datastore() + self.state_store = hs.get_storage().state + self.event_creation_handler = hs.get_event_creation_handler() + self.room_member_handler = hs.get_room_member_handler() + self.auth = hs.get_auth() + + async def inherit_depth_from_prev_ids(self, prev_event_ids: List[str]) -> int: + """Finds the depth which would sort it after the most-recent + prev_event_id but before the successors of those events. If no + successors are found, we assume it's an historical extremity part of the + current batch and use the same depth of the prev_event_ids. + + Args: + prev_event_ids: List of prev event IDs + + Returns: + Inherited depth + """ + ( + most_recent_prev_event_id, + most_recent_prev_event_depth, + ) = await self.store.get_max_depth_of(prev_event_ids) + + # We want to insert the historical event after the `prev_event` but before the successor event + # + # We inherit depth from the successor event instead of the `prev_event` + # because events returned from `/messages` are first sorted by `topological_ordering` + # which is just the `depth` and then tie-break with `stream_ordering`. + # + # We mark these inserted historical events as "backfilled" which gives them a + # negative `stream_ordering`. If we use the same depth as the `prev_event`, + # then our historical event will tie-break and be sorted before the `prev_event` + # when it should come after. + # + # We want to use the successor event depth so they appear after `prev_event` because + # it has a larger `depth` but before the successor event because the `stream_ordering` + # is negative before the successor event. + successor_event_ids = await self.store.get_successor_events( + [most_recent_prev_event_id] + ) + + # If we can't find any successor events, then it's a forward extremity of + # historical messages and we can just inherit from the previous historical + # event which we can already assume has the correct depth where we want + # to insert into. + if not successor_event_ids: + depth = most_recent_prev_event_depth + else: + ( + _, + oldest_successor_depth, + ) = await self.store.get_min_depth_of(successor_event_ids) + + depth = oldest_successor_depth + + return depth + + def create_insertion_event_dict( + self, sender: str, room_id: str, origin_server_ts: int + ) -> JsonDict: + """Creates an event dict for an "insertion" event with the proper fields + and a random batch ID. + + Args: + sender: The event author MXID + room_id: The room ID that the event belongs to + origin_server_ts: Timestamp when the event was sent + + Returns: + The new event dictionary to insert. + """ + + next_batch_id = random_string(8) + insertion_event = { + "type": EventTypes.MSC2716_INSERTION, + "sender": sender, + "room_id": room_id, + "content": { + EventContentFields.MSC2716_NEXT_BATCH_ID: next_batch_id, + EventContentFields.MSC2716_HISTORICAL: True, + }, + "origin_server_ts": origin_server_ts, + } + + return insertion_event + + async def create_requester_for_user_id_from_app_service( + self, user_id: str, app_service: ApplicationService + ) -> Requester: + """Creates a new requester for the given user_id + and validates that the app service is allowed to control + the given user. + + Args: + user_id: The author MXID that the app service is controlling + app_service: The app service that controls the user + + Returns: + Requester object + """ + + await self.auth.validate_appservice_can_control_user_id(app_service, user_id) + + return create_requester(user_id, app_service=app_service) + + async def get_most_recent_auth_event_ids_from_event_id_list( + self, event_ids: List[str] + ) -> List[str]: + """Find the most recent auth event ids (derived from state events) that + allowed that message to be sent. We will use this as a base + to auth our historical messages against. + + Args: + event_ids: List of event ID's to look at + + Returns: + List of event ID's + """ + + ( + most_recent_prev_event_id, + _, + ) = await self.store.get_max_depth_of(event_ids) + # mapping from (type, state_key) -> state_event_id + prev_state_map = await self.state_store.get_state_ids_for_event( + most_recent_prev_event_id + ) + # List of state event ID's + prev_state_ids = list(prev_state_map.values()) + auth_event_ids = prev_state_ids + + return auth_event_ids + + async def persist_state_events_at_start( + self, + state_events_at_start: List[JsonDict], + room_id: str, + initial_auth_event_ids: List[str], + app_service_requester: Requester, + ) -> List[str]: + """Takes all `state_events_at_start` event dictionaries and creates/persists + them as floating state events which don't resolve into the current room state. + They are floating because they reference a fake prev_event which doesn't connect + to the normal DAG at all. + + Args: + state_events_at_start: + room_id: Room where you want the events persisted in. + initial_auth_event_ids: These will be the auth_events for the first + state event created. Each event created afterwards will be + added to the list of auth events for the next state event + created. + app_service_requester: The requester of an application service. + + Returns: + List of state event ID's we just persisted + """ + assert app_service_requester.app_service + + state_event_ids_at_start = [] + auth_event_ids = initial_auth_event_ids.copy() + + # Make the state events float off on their own so we don't have a + # bunch of `@mxid joined the room` noise between each batch + prev_event_id_for_state_chain = generate_fake_event_id() + + for state_event in state_events_at_start: + assert_params_in_dict( + state_event, ["type", "origin_server_ts", "content", "sender"] + ) + + logger.debug( + "RoomBatchSendEventRestServlet inserting state_event=%s, auth_event_ids=%s", + state_event, + auth_event_ids, + ) + + event_dict = { + "type": state_event["type"], + "origin_server_ts": state_event["origin_server_ts"], + "content": state_event["content"], + "room_id": room_id, + "sender": state_event["sender"], + "state_key": state_event["state_key"], + } + + # Mark all events as historical + event_dict["content"][EventContentFields.MSC2716_HISTORICAL] = True + + # TODO: This is pretty much the same as some other code to handle inserting state in this file + if event_dict["type"] == EventTypes.Member: + membership = event_dict["content"].get("membership", None) + event_id, _ = await self.room_member_handler.update_membership( + await self.create_requester_for_user_id_from_app_service( + state_event["sender"], app_service_requester.app_service + ), + target=UserID.from_string(event_dict["state_key"]), + room_id=room_id, + action=membership, + content=event_dict["content"], + outlier=True, + historical=True, + prev_event_ids=[prev_event_id_for_state_chain], + # Make sure to use a copy of this list because we modify it + # later in the loop here. Otherwise it will be the same + # reference and also update in the event when we append later. + auth_event_ids=auth_event_ids.copy(), + ) + else: + # TODO: Add some complement tests that adds state that is not member joins + # and will use this code path. Maybe we only want to support join state events + # and can get rid of this `else`? + ( + event, + _, + ) = await self.event_creation_handler.create_and_send_nonmember_event( + await self.create_requester_for_user_id_from_app_service( + state_event["sender"], app_service_requester.app_service + ), + event_dict, + outlier=True, + historical=True, + prev_event_ids=[prev_event_id_for_state_chain], + # Make sure to use a copy of this list because we modify it + # later in the loop here. Otherwise it will be the same + # reference and also update in the event when we append later. + auth_event_ids=auth_event_ids.copy(), + ) + event_id = event.event_id + + state_event_ids_at_start.append(event_id) + auth_event_ids.append(event_id) + # Connect all the state in a floating chain + prev_event_id_for_state_chain = event_id + + return state_event_ids_at_start + + async def persist_historical_events( + self, + events_to_create: List[JsonDict], + room_id: str, + initial_prev_event_ids: List[str], + inherited_depth: int, + auth_event_ids: List[str], + app_service_requester: Requester, + ) -> List[str]: + """Create and persists all events provided sequentially. Handles the + complexity of creating events in chronological order so they can + reference each other by prev_event but still persists in + reverse-chronoloical order so they have the correct + (topological_ordering, stream_ordering) and sort correctly from + /messages. + + Args: + events_to_create: List of historical events to create in JSON + dictionary format. + room_id: Room where you want the events persisted in. + initial_prev_event_ids: These will be the prev_events for the first + event created. Each event created afterwards will point to the + previous event created. + inherited_depth: The depth to create the events at (you will + probably by calling inherit_depth_from_prev_ids(...)). + auth_event_ids: Define which events allow you to create the given + event in the room. + app_service_requester: The requester of an application service. + + Returns: + List of persisted event IDs + """ + assert app_service_requester.app_service + + prev_event_ids = initial_prev_event_ids.copy() + + event_ids = [] + events_to_persist = [] + for ev in events_to_create: + assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"]) + + assert self.hs.is_mine_id(ev["sender"]), "User must be our own: %s" % ( + ev["sender"], + ) + + event_dict = { + "type": ev["type"], + "origin_server_ts": ev["origin_server_ts"], + "content": ev["content"], + "room_id": room_id, + "sender": ev["sender"], # requester.user.to_string(), + "prev_events": prev_event_ids.copy(), + } + + # Mark all events as historical + event_dict["content"][EventContentFields.MSC2716_HISTORICAL] = True + + event, context = await self.event_creation_handler.create_event( + await self.create_requester_for_user_id_from_app_service( + ev["sender"], app_service_requester.app_service + ), + event_dict, + prev_event_ids=event_dict.get("prev_events"), + auth_event_ids=auth_event_ids, + historical=True, + depth=inherited_depth, + ) + + assert context._state_group + + # Normally this is done when persisting the event but we have to + # pre-emptively do it here because we create all the events first, + # then persist them in another pass below. And we want to share + # state_groups across the whole batch so this lookup needs to work + # for the next event in the batch in this loop. + await self.store.store_state_group_id_for_event_id( + event_id=event.event_id, + state_group_id=context._state_group, + ) + + logger.debug( + "RoomBatchSendEventRestServlet inserting event=%s, prev_event_ids=%s, auth_event_ids=%s", + event, + prev_event_ids, + auth_event_ids, + ) + + events_to_persist.append((event, context)) + event_id = event.event_id + + event_ids.append(event_id) + prev_event_ids = [event_id] + + # Persist events in reverse-chronological order so they have the + # correct stream_ordering as they are backfilled (which decrements). + # Events are sorted by (topological_ordering, stream_ordering) + # where topological_ordering is just depth. + for (event, context) in reversed(events_to_persist): + await self.event_creation_handler.handle_new_client_event( + await self.create_requester_for_user_id_from_app_service( + event.sender, app_service_requester.app_service + ), + event=event, + context=context, + ) + + return event_ids + + async def handle_batch_of_events( + self, + events_to_create: List[JsonDict], + room_id: str, + batch_id_to_connect_to: str, + initial_prev_event_ids: List[str], + inherited_depth: int, + auth_event_ids: List[str], + app_service_requester: Requester, + ) -> Tuple[List[str], str]: + """ + Handles creating and persisting all of the historical events as well + as insertion and batch meta events to make the batch navigable in the DAG. + + Args: + events_to_create: List of historical events to create in JSON + dictionary format. + room_id: Room where you want the events created in. + batch_id_to_connect_to: The batch_id from the insertion event you + want this batch to connect to. + initial_prev_event_ids: These will be the prev_events for the first + event created. Each event created afterwards will point to the + previous event created. + inherited_depth: The depth to create the events at (you will + probably by calling inherit_depth_from_prev_ids(...)). + auth_event_ids: Define which events allow you to create the given + event in the room. + app_service_requester: The requester of an application service. + + Returns: + Tuple containing a list of created events and the next_batch_id + """ + + # Connect this current batch to the insertion event from the previous batch + last_event_in_batch = events_to_create[-1] + batch_event = { + "type": EventTypes.MSC2716_BATCH, + "sender": app_service_requester.user.to_string(), + "room_id": room_id, + "content": { + EventContentFields.MSC2716_BATCH_ID: batch_id_to_connect_to, + EventContentFields.MSC2716_HISTORICAL: True, + }, + # Since the batch event is put at the end of the batch, + # where the newest-in-time event is, copy the origin_server_ts from + # the last event we're inserting + "origin_server_ts": last_event_in_batch["origin_server_ts"], + } + # Add the batch event to the end of the batch (newest-in-time) + events_to_create.append(batch_event) + + # Add an "insertion" event to the start of each batch (next to the oldest-in-time + # event in the batch) so the next batch can be connected to this one. + insertion_event = self.create_insertion_event_dict( + sender=app_service_requester.user.to_string(), + room_id=room_id, + # Since the insertion event is put at the start of the batch, + # where the oldest-in-time event is, copy the origin_server_ts from + # the first event we're inserting + origin_server_ts=events_to_create[0]["origin_server_ts"], + ) + next_batch_id = insertion_event["content"][ + EventContentFields.MSC2716_NEXT_BATCH_ID + ] + # Prepend the insertion event to the start of the batch (oldest-in-time) + events_to_create = [insertion_event] + events_to_create + + # Create and persist all of the historical events + event_ids = await self.persist_historical_events( + events_to_create=events_to_create, + room_id=room_id, + initial_prev_event_ids=initial_prev_event_ids, + inherited_depth=inherited_depth, + auth_event_ids=auth_event_ids, + app_service_requester=app_service_requester, + ) + + return event_ids, next_batch_id diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index fae2c098e32e..ba7a14d651c5 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -14,12 +14,18 @@ import logging from collections import namedtuple -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING, Any, Optional, Tuple import msgpack from unpaddedbase64 import decode_base64, encode_base64 -from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules +from synapse.api.constants import ( + EventContentFields, + EventTypes, + GuestAccess, + HistoryVisibility, + JoinRules, +) from synapse.api.errors import ( Codes, HttpResponseException, @@ -27,11 +33,9 @@ SynapseError, ) from synapse.types import JsonDict, ThirdPartyInstanceID -from synapse.util.caches.descriptors import cached +from synapse.util.caches.descriptors import _CacheContext, cached from synapse.util.caches.response_cache import ResponseCache -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer @@ -43,10 +47,11 @@ EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) -class RoomListHandler(BaseHandler): +class RoomListHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) - self.enable_room_list_search = hs.config.enable_room_list_search + self.store = hs.get_datastore() + self.hs = hs + self.enable_room_list_search = hs.config.roomdirectory.enable_room_list_search self.response_cache: ResponseCache[ Tuple[Optional[int], Optional[str], Optional[ThirdPartyInstanceID]] ] = ResponseCache(hs.get_clock(), "room_list") @@ -163,7 +168,7 @@ async def _get_public_room_list( ignore_non_federatable=from_federation, ) - def build_room_entry(room): + def build_room_entry(room: JsonDict) -> JsonDict: entry = { "room_id": room["room_id"], "name": room["name"], @@ -243,10 +248,10 @@ async def generate_room_entry( self, room_id: str, num_joined_users: int, - cache_context, + cache_context: _CacheContext, with_alias: bool = True, allow_private: bool = False, - ) -> Optional[dict]: + ) -> Optional[JsonDict]: """Returns the entry for a room Args: @@ -307,7 +312,9 @@ async def generate_room_entry( # Return whether this room is open to federation users or not create_event = current_state[EventTypes.Create, ""] - result["m.federate"] = create_event.content.get("m.federate", True) + result["m.federate"] = create_event.content.get( + EventContentFields.FEDERATE, True + ) name_event = current_state.get((EventTypes.Name, "")) if name_event: @@ -336,8 +343,8 @@ async def generate_room_entry( guest_event = current_state.get((EventTypes.GuestAccess, "")) guest = None if guest_event: - guest = guest_event.content.get("guest_access", None) - result["guest_can_join"] = guest == "can_join" + guest = guest_event.content.get(EventContentFields.GUEST_ACCESS) + result["guest_can_join"] = guest == GuestAccess.CAN_JOIN avatar_event = current_state.get(("m.room.avatar", "")) if avatar_event: @@ -356,6 +363,12 @@ async def get_remote_public_room_list( include_all_networks: bool = False, third_party_instance_id: Optional[str] = None, ) -> JsonDict: + """Get the public room list from remote server + + Raises: + SynapseError + """ + if not self.enable_room_list_search: return {"chunk": [], "total_room_count_estimate": 0} @@ -395,13 +408,16 @@ async def get_remote_public_room_list( limit = None since_token = None - res = await self._get_remote_list_cached( - server_name, - limit=limit, - since_token=since_token, - include_all_networks=include_all_networks, - third_party_instance_id=third_party_instance_id, - ) + try: + res = await self._get_remote_list_cached( + server_name, + limit=limit, + since_token=since_token, + include_all_networks=include_all_networks, + third_party_instance_id=third_party_instance_id, + ) + except (RequestSendFailed, HttpResponseException): + raise SynapseError(502, "Failed to fetch room list") if search_filter: res = { @@ -423,20 +439,21 @@ async def _get_remote_list_cached( include_all_networks: bool = False, third_party_instance_id: Optional[str] = None, ) -> JsonDict: + """Wrapper around FederationClient.get_public_rooms that caches the + result. + """ + repl_layer = self.hs.get_federation_client() if search_filter: # We can't cache when asking for search - try: - return await repl_layer.get_public_rooms( - server_name, - limit=limit, - since_token=since_token, - search_filter=search_filter, - include_all_networks=include_all_networks, - third_party_instance_id=third_party_instance_id, - ) - except (RequestSendFailed, HttpResponseException): - raise SynapseError(502, "Failed to fetch room list") + return await repl_layer.get_public_rooms( + server_name, + limit=limit, + since_token=since_token, + search_filter=search_filter, + include_all_networks=include_all_networks, + third_party_instance_id=third_party_instance_id, + ) key = ( server_name, @@ -489,7 +506,7 @@ def to_token(self) -> str: ) ) - def copy_and_replace(self, **kwds) -> "RoomListNextBatch": + def copy_and_replace(self, **kwds: Any) -> "RoomListNextBatch": return self._replace(**kwds) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 65ad3efa6a60..a6dbff637f54 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -19,7 +19,13 @@ from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple from synapse import types -from synapse.api.constants import AccountDataTypes, EventTypes, Membership +from synapse.api.constants import ( + AccountDataTypes, + EventContentFields, + EventTypes, + GuestAccess, + Membership, +) from synapse.api.errors import ( AuthError, Codes, @@ -31,6 +37,7 @@ from synapse.event_auth import get_named_level, get_power_level_event from synapse.events import EventBase from synapse.events.snapshot import EventContext +from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN from synapse.types import ( JsonDict, Requester, @@ -38,13 +45,12 @@ RoomID, StateMap, UserID, + create_requester, get_domain_from_id, ) from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_left_room -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer @@ -64,6 +70,7 @@ def __init__(self, hs: "HomeServer"): self.auth = hs.get_auth() self.state_handler = hs.get_state_handler() self.config = hs.config + self._server_name = hs.hostname self.federation_handler = hs.get_federation_handler() self.directory_handler = hs.get_directory_handler() @@ -74,14 +81,14 @@ def __init__(self, hs: "HomeServer"): self.account_data_handler = hs.get_account_data_handler() self.event_auth_handler = hs.get_event_auth_handler() - self.member_linearizer = Linearizer(name="member") + self.member_linearizer: Linearizer = Linearizer(name="member") self.clock = hs.get_clock() self.spam_checker = hs.get_spam_checker() self.third_party_event_rules = hs.get_third_party_event_rules() - self._server_notices_mxid = self.config.server_notices_mxid - self._enable_lookup = hs.config.enable_3pid_lookup - self.allow_per_room_profiles = self.config.allow_per_room_profiles + self._server_notices_mxid = self.config.servernotices.server_notices_mxid + self._enable_lookup = hs.config.registration.enable_3pid_lookup + self.allow_per_room_profiles = self.config.server.allow_per_room_profiles self._join_rate_limiter_local = Ratelimiter( store=self.store, @@ -109,10 +116,7 @@ def __init__(self, hs: "HomeServer"): burst_count=hs.config.ratelimiting.rc_invites_per_user.burst_count, ) - # This is only used to get at ratelimit function, and - # maybe_kick_guest_users. It's fine there are multiple of these as - # it doesn't store state. - self.base_handler = BaseHandler(hs) + self.request_ratelimiter = hs.get_request_ratelimiter() @abc.abstractmethod async def _remote_join( @@ -217,7 +221,7 @@ async def ratelimit_multiple_invites( room_id: Optional[str], n_invites: int, update: bool = True, - ): + ) -> None: """Ratelimit more than one invite sent by the given requester in the given room. Args: @@ -241,7 +245,7 @@ async def ratelimit_invite( requester: Optional[Requester], room_id: Optional[str], invitee_user_id: str, - ): + ) -> None: """Ratelimit invites by room and by target user. If room ID is missing then we just rate limit by target user. @@ -264,6 +268,7 @@ async def _local_membership_update( content: Optional[dict] = None, require_consent: bool = True, outlier: bool = False, + historical: bool = False, ) -> Tuple[str, int]: """ Internal membership update function to get an existing event or create @@ -289,6 +294,9 @@ async def _local_membership_update( outlier: Indicates whether the event is an `outlier`, i.e. if it's from an arbitrary point and floating in the DAG as opposed to being inline with the current DAG. + historical: Indicates whether the message is being inserted + back in time around some existing events. This is used to skip + a few checks and mark the event as backfilled. Returns: Tuple of event ID and stream ordering position @@ -333,6 +341,7 @@ async def _local_membership_update( auth_event_ids=auth_event_ids, require_consent=require_consent, outlier=outlier, + historical=historical, ) prev_state_ids = await context.get_prev_state_ids() @@ -378,7 +387,7 @@ async def _local_membership_update( return result_event.event_id, result_event.internal_metadata.stream_ordering async def copy_room_tags_and_direct_to_room( - self, old_room_id, new_room_id, user_id + self, old_room_id: str, new_room_id: str, user_id: str ) -> None: """Copies the tags and direct room state from one room to another. @@ -426,8 +435,10 @@ async def update_membership( third_party_signed: Optional[dict] = None, ratelimit: bool = True, content: Optional[dict] = None, + new_room: bool = False, require_consent: bool = True, outlier: bool = False, + historical: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, ) -> Tuple[str, int]: @@ -443,10 +454,15 @@ async def update_membership( third_party_signed: Information from a 3PID invite. ratelimit: Whether to rate limit the request. content: The content of the created event. + new_room: Whether the membership update is happening in the context of a room + creation. require_consent: Whether consent is required. outlier: Indicates whether the event is an `outlier`, i.e. if it's from an arbitrary point and floating in the DAG as opposed to being inline with the current DAG. + historical: Indicates whether the message is being inserted + back in time around some existing events. This is used to skip + a few checks and mark the event as backfilled. prev_event_ids: The event IDs to use as the prev events auth_event_ids: The event ids to use as the auth_events for the new event. @@ -477,8 +493,10 @@ async def update_membership( third_party_signed=third_party_signed, ratelimit=ratelimit, content=content, + new_room=new_room, require_consent=require_consent, outlier=outlier, + historical=historical, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, ) @@ -496,8 +514,10 @@ async def update_membership_locked( third_party_signed: Optional[dict] = None, ratelimit: bool = True, content: Optional[dict] = None, + new_room: bool = False, require_consent: bool = True, outlier: bool = False, + historical: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, ) -> Tuple[str, int]: @@ -515,10 +535,15 @@ async def update_membership_locked( third_party_signed: ratelimit: content: + new_room: Whether the membership update is happening in the context of a room + creation. require_consent: outlier: Indicates whether the event is an `outlier`, i.e. if it's from an arbitrary point and floating in the DAG as opposed to being inline with the current DAG. + historical: Indicates whether the message is being inserted + back in time around some existing events. This is used to skip + a few checks and mark the event as backfilled. prev_event_ids: The event IDs to use as the prev events auth_event_ids: The event ids to use as the auth_events for the new event. @@ -551,6 +576,28 @@ async def update_membership_locked( content.pop("displayname", None) content.pop("avatar_url", None) + if len(content.get("displayname") or "") > MAX_DISPLAYNAME_LEN: + raise SynapseError( + 400, + f"Displayname is too long (max {MAX_DISPLAYNAME_LEN})", + errcode=Codes.BAD_JSON, + ) + + if len(content.get("avatar_url") or "") > MAX_AVATAR_URL_LEN: + raise SynapseError( + 400, + f"Avatar URL is too long (max {MAX_AVATAR_URL_LEN})", + errcode=Codes.BAD_JSON, + ) + + # The event content should *not* include the authorising user as + # it won't be properly signed. Strip it out since it might come + # back from a client updating a display name / avatar. + # + # This only applies to restricted rooms, but there should be no reason + # for a client to include it. Unconditionally remove it. + content.pop(EventContentFields.AUTHORISING_USER, None) + effective_membership_state = action if action in ["kick", "unban"]: effective_membership_state = "leave" @@ -595,7 +642,7 @@ async def update_membership_locked( is_requester_admin = await self.auth.is_server_admin(requester.user) if not is_requester_admin: - if self.config.block_non_admin_invites: + if self.config.server.block_non_admin_invites: logger.info( "Blocking invite: user is not admin and non-admin " "invites disabled" @@ -624,6 +671,7 @@ async def update_membership_locked( content=content, require_consent=require_consent, outlier=outlier, + historical=historical, ) latest_event_ids = await self.store.get_prev_events_for_room(room_id) @@ -646,7 +694,7 @@ async def update_membership_locked( " (membership=%s)" % old_membership, errcode=Codes.BAD_STATE, ) - if old_membership == "ban" and action != "unban": + if old_membership == "ban" and action not in ["ban", "unban", "leave"]: raise SynapseError( 403, "Cannot %s user who was banned" % (action,), @@ -696,6 +744,30 @@ async def update_membership_locked( # so don't really fit into the general auth process. raise AuthError(403, "Guest access not allowed") + # Figure out whether the user is a server admin to determine whether they + # should be able to bypass the spam checker. + if ( + self._server_notices_mxid is not None + and requester.user.to_string() == self._server_notices_mxid + ): + # allow the server notices mxid to join rooms + bypass_spam_checker = True + + else: + bypass_spam_checker = await self.auth.is_server_admin(requester.user) + + inviter = await self._get_inviter(target.to_string(), room_id) + if ( + not bypass_spam_checker + # We assume that if the spam checker allowed the user to create + # a room then they're allowed to join it. + and not new_room + and not await self.spam_checker.user_may_join_room( + target.to_string(), room_id, is_invited=inviter is not None + ) + ): + raise SynapseError(403, "Not allowed to join this room") + # Check if a remote join should be performed. remote_join, remote_room_hosts = await self._should_perform_remote_join( target.to_string(), room_id, remote_room_hosts, content, is_host_in_room @@ -917,7 +989,7 @@ async def _should_perform_remote_join( # be included in the event content in order to efficiently validate # the event. content[ - "join_authorised_via_users_server" + EventContentFields.AUTHORISING_USER ] = await self.event_auth_handler.get_user_which_could_invite( room_id, current_state_ids, @@ -1008,7 +1080,7 @@ async def send_membership_event( event: EventBase, context: EventContext, ratelimit: bool = True, - ): + ) -> None: """ Change the membership status of a user in a room. @@ -1075,10 +1147,62 @@ async def _can_guest_join(self, current_state_ids: StateMap[str]) -> bool: return bool( guest_access and guest_access.content - and "guest_access" in guest_access.content - and guest_access.content["guest_access"] == "can_join" + and guest_access.content.get(EventContentFields.GUEST_ACCESS) + == GuestAccess.CAN_JOIN ) + async def kick_guest_users(self, current_state: Iterable[EventBase]) -> None: + """Kick any local guest users from the room. + + This is called when the room state changes from guests allowed to not-allowed. + + Params: + current_state: the current state of the room. We will iterate this to look + for guest users to kick. + """ + for member_event in current_state: + try: + if member_event.type != EventTypes.Member: + continue + + if not self.hs.is_mine_id(member_event.state_key): + continue + + if member_event.content["membership"] not in { + Membership.JOIN, + Membership.INVITE, + }: + continue + + if ( + "kind" not in member_event.content + or member_event.content["kind"] != "guest" + ): + continue + + # We make the user choose to leave, rather than have the + # event-sender kick them. This is partially because we don't + # need to worry about power levels, and partially because guest + # users are a concept which doesn't hugely work over federation, + # and having homeservers have their own users leave keeps more + # of that decision-making and control local to the guest-having + # homeserver. + target_user = UserID.from_string(member_event.state_key) + requester = create_requester( + target_user, is_guest=True, authenticated_entity=self._server_name + ) + handler = self.hs.get_room_member_handler() + await handler.update_membership( + requester, + target_user, + member_event.room_id, + "leave", + ratelimit=False, + require_consent=False, + ) + except Exception as e: + logger.exception("Error kicking guest user: %s" % (e,)) + async def lookup_room_alias( self, room_alias: RoomAlias ) -> Tuple[RoomID, List[str]]: @@ -1148,7 +1272,7 @@ async def do_3pid_invite( Raises: ShadowBanError if the requester has been shadow-banned. """ - if self.config.block_non_admin_invites: + if self.config.server.block_non_admin_invites: is_requester_admin = await self.auth.is_server_admin(requester.user) if not is_requester_admin: raise SynapseError( @@ -1162,7 +1286,7 @@ async def do_3pid_invite( # We need to rate limit *before* we send out any 3PID invites, so we # can't just rely on the standard ratelimiting of events. - await self.base_handler.ratelimit(requester) + await self.request_ratelimiter.ratelimit(requester) can_invite = await self.third_party_event_rules.check_threepid_can_be_invited( medium, address, room_id @@ -1186,10 +1310,22 @@ async def do_3pid_invite( if invitee: # Note that update_membership with an action of "invite" can raise # a ShadowBanError, but this was done above already. + # We don't check the invite against the spamchecker(s) here (through + # user_may_invite) because we'll do it further down the line anyway (in + # update_membership_locked). _, stream_id = await self.update_membership( requester, UserID.from_string(invitee), room_id, "invite", txn_id=txn_id ) else: + # Check if the spamchecker(s) allow this invite to go through. + if not await self.spam_checker.user_may_send_3pid_invite( + inviter_userid=requester.user.to_string(), + medium=medium, + address=address, + room_id=room_id, + ): + raise SynapseError(403, "Cannot send threepid invite") + stream_id = await self._make_and_store_3pid_invite( requester, id_server, @@ -1237,6 +1373,11 @@ async def _make_and_store_3pid_invite( if room_name_event: room_name = room_name_event.content.get("name", "") + room_type = None + room_create_event = room_state.get((EventTypes.Create, "")) + if room_create_event: + room_type = room_create_event.content.get(EventContentFields.ROOM_TYPE) + room_join_rules = "" join_rules_event = room_state.get((EventTypes.JoinRules, "")) if join_rules_event: @@ -1263,6 +1404,7 @@ async def _make_and_store_3pid_invite( room_avatar_url=room_avatar_url, room_join_rules=room_join_rules, room_name=room_name, + room_type=room_type, inviter_display_name=inviter_display_name, inviter_avatar_url=inviter_avatar_url, id_access_token=id_access_token, @@ -1326,7 +1468,6 @@ def __init__(self, hs: "HomeServer"): self.distributor = hs.get_distributor() self.distributor.declare("user_left_room") - self._server_name = hs.hostname async def _is_remote_room_too_complex( self, room_id: str, remote_room_hosts: List[str] @@ -1341,7 +1482,7 @@ async def _is_remote_room_too_complex( Returns: bool of whether the complexity is too great, or None if unable to be fetched """ - max_complexity = self.hs.config.limit_remote_rooms.complexity + max_complexity = self.hs.config.server.limit_remote_rooms.complexity complexity = await self.federation_handler.get_room_complexity( remote_room_hosts, room_id ) @@ -1357,7 +1498,7 @@ async def _is_local_room_too_complex(self, room_id: str) -> bool: Args: room_id: The room ID to check for complexity. """ - max_complexity = self.hs.config.limit_remote_rooms.complexity + max_complexity = self.hs.config.server.limit_remote_rooms.complexity complexity = await self.store.get_room_complexity(room_id) return complexity["v1"] > max_complexity @@ -1381,8 +1522,11 @@ async def _remote_join( if len(remote_room_hosts) == 0: raise SynapseError(404, "No known servers") - check_complexity = self.hs.config.limit_remote_rooms.enabled - if check_complexity and self.hs.config.limit_remote_rooms.admins_can_join: + check_complexity = self.hs.config.server.limit_remote_rooms.enabled + if ( + check_complexity + and self.hs.config.server.limit_remote_rooms.admins_can_join + ): check_complexity = not await self.auth.is_server_admin(user) if check_complexity: @@ -1393,7 +1537,7 @@ async def _remote_join( if too_complex is True: raise SynapseError( code=400, - msg=self.hs.config.limit_remote_rooms.complexity_error, + msg=self.hs.config.server.limit_remote_rooms.complexity_error, errcode=Codes.RESOURCE_LIMIT_EXCEEDED, ) @@ -1428,7 +1572,7 @@ async def _remote_join( ) raise SynapseError( code=400, - msg=self.hs.config.limit_remote_rooms.complexity_error, + msg=self.hs.config.server.limit_remote_rooms.complexity_error, errcode=Codes.RESOURCE_LIMIT_EXCEEDED, ) @@ -1540,7 +1684,9 @@ async def _generate_local_out_of_band_leave( # # the prev_events consist solely of the previous membership event. prev_event_ids = [previous_membership_event.event_id] - auth_event_ids = previous_membership_event.auth_event_ids() + prev_event_ids + auth_event_ids = ( + list(previous_membership_event.auth_event_ids()) + prev_event_ids + ) event, context = await self.event_creation_handler.create_event( requester, diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py new file mode 100644 index 000000000000..b2cfe537dfb1 --- /dev/null +++ b/synapse/handlers/room_summary.py @@ -0,0 +1,1195 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +import logging +import re +from collections import deque +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Sequence, Set, Tuple + +import attr + +from synapse.api.constants import ( + EventContentFields, + EventTypes, + HistoryVisibility, + JoinRules, + Membership, + RoomTypes, +) +from synapse.api.errors import ( + AuthError, + Codes, + NotFoundError, + StoreError, + SynapseError, + UnsupportedRoomVersionError, +) +from synapse.api.ratelimiting import Ratelimiter +from synapse.events import EventBase +from synapse.types import JsonDict, Requester +from synapse.util.caches.response_cache import ResponseCache + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + +# number of rooms to return. We'll stop once we hit this limit. +MAX_ROOMS = 50 + +# max number of events to return per room. +MAX_ROOMS_PER_SPACE = 50 + +# max number of federation servers to hit per room +MAX_SERVERS_PER_SPACE = 3 + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _PaginationKey: + """The key used to find unique pagination session.""" + + # The first three entries match the request parameters (and cannot change + # during a pagination session). + room_id: str + suggested_only: bool + max_depth: Optional[int] + # The randomly generated token. + token: str + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _PaginationSession: + """The information that is stored for pagination.""" + + # The time the pagination session was created, in milliseconds. + creation_time_ms: int + # The queue of rooms which are still to process. + room_queue: List["_RoomQueueEntry"] + # A set of rooms which have been processed. + processed_rooms: Set[str] + + +class RoomSummaryHandler: + # A unique key used for pagination sessions for the room hierarchy endpoint. + _PAGINATION_SESSION_TYPE = "room_hierarchy_pagination" + + # The time a pagination session remains valid for. + _PAGINATION_SESSION_VALIDITY_PERIOD_MS = 5 * 60 * 1000 + + def __init__(self, hs: "HomeServer"): + self._event_auth_handler = hs.get_event_auth_handler() + self._store = hs.get_datastore() + self._event_serializer = hs.get_event_client_serializer() + self._server_name = hs.hostname + self._federation_client = hs.get_federation_client() + self._ratelimiter = Ratelimiter( + store=self._store, clock=hs.get_clock(), rate_hz=5, burst_count=10 + ) + + # If a user tries to fetch the same page multiple times in quick succession, + # only process the first attempt and return its result to subsequent requests. + self._pagination_response_cache: ResponseCache[ + Tuple[str, str, bool, Optional[int], Optional[int], Optional[str]] + ] = ResponseCache( + hs.get_clock(), + "get_room_hierarchy", + ) + + async def get_space_summary( + self, + requester: str, + room_id: str, + suggested_only: bool = False, + max_rooms_per_space: Optional[int] = None, + ) -> JsonDict: + """ + Implementation of the space summary C-S API + + Args: + requester: user id of the user making this request + + room_id: room id to start the summary at + + suggested_only: whether we should only return children with the "suggested" + flag set. + + max_rooms_per_space: an optional limit on the number of child rooms we will + return. This does not apply to the root room (ie, room_id), and + is overridden by MAX_ROOMS_PER_SPACE. + + Returns: + summary dict to return + """ + # First of all, check that the room is accessible. + if not await self._is_local_room_accessible(room_id, requester): + raise AuthError( + 403, + "User %s not in room %s, and room previews are disabled" + % (requester, room_id), + ) + + # the queue of rooms to process + room_queue = deque((_RoomQueueEntry(room_id, ()),)) + + # rooms we have already processed + processed_rooms: Set[str] = set() + + # events we have already processed. We don't necessarily have their event ids, + # so instead we key on (room id, state key) + processed_events: Set[Tuple[str, str]] = set() + + rooms_result: List[JsonDict] = [] + events_result: List[JsonDict] = [] + + while room_queue and len(rooms_result) < MAX_ROOMS: + queue_entry = room_queue.popleft() + room_id = queue_entry.room_id + if room_id in processed_rooms: + # already done this room + continue + + logger.debug("Processing room %s", room_id) + + is_in_room = await self._store.is_host_joined(room_id, self._server_name) + + # The client-specified max_rooms_per_space limit doesn't apply to the + # room_id specified in the request, so we ignore it if this is the + # first room we are processing. + max_children = max_rooms_per_space if processed_rooms else None + + if is_in_room: + room_entry = await self._summarize_local_room( + requester, None, room_id, suggested_only, max_children + ) + + events: Sequence[JsonDict] = [] + if room_entry: + rooms_result.append(room_entry.room) + events = room_entry.children_state_events + + logger.debug( + "Query of local room %s returned events %s", + room_id, + ["%s->%s" % (ev["room_id"], ev["state_key"]) for ev in events], + ) + else: + fed_rooms = await self._summarize_remote_room( + queue_entry, + suggested_only, + max_children, + exclude_rooms=processed_rooms, + ) + + # The results over federation might include rooms that the we, + # as the requesting server, are allowed to see, but the requesting + # user is not permitted see. + # + # Filter the returned results to only what is accessible to the user. + events = [] + for room_entry in fed_rooms: + room = room_entry.room + fed_room_id = room_entry.room_id + + # The user can see the room, include it! + if await self._is_remote_room_accessible( + requester, fed_room_id, room + ): + # Before returning to the client, remove the allowed_room_ids + # and allowed_spaces keys. + room.pop("allowed_room_ids", None) + room.pop("allowed_spaces", None) + + rooms_result.append(room) + events.extend(room_entry.children_state_events) + + # All rooms returned don't need visiting again (even if the user + # didn't have access to them). + processed_rooms.add(fed_room_id) + + logger.debug( + "Query of %s returned rooms %s, events %s", + room_id, + [room_entry.room.get("room_id") for room_entry in fed_rooms], + ["%s->%s" % (ev["room_id"], ev["state_key"]) for ev in events], + ) + + # the room we queried may or may not have been returned, but don't process + # it again, anyway. + processed_rooms.add(room_id) + + # XXX: is it ok that we blindly iterate through any events returned by + # a remote server, whether or not they actually link to any rooms in our + # tree? + for ev in events: + # remote servers might return events we have already processed + # (eg, Dendrite returns inward pointers as well as outward ones), so + # we need to filter them out, to avoid returning duplicate links to the + # client. + ev_key = (ev["room_id"], ev["state_key"]) + if ev_key in processed_events: + continue + events_result.append(ev) + + # add the child to the queue. we have already validated + # that the vias are a list of server names. + room_queue.append( + _RoomQueueEntry(ev["state_key"], ev["content"]["via"]) + ) + processed_events.add(ev_key) + + return {"rooms": rooms_result, "events": events_result} + + async def get_room_hierarchy( + self, + requester: Requester, + requested_room_id: str, + suggested_only: bool = False, + max_depth: Optional[int] = None, + limit: Optional[int] = None, + from_token: Optional[str] = None, + ) -> JsonDict: + """ + Implementation of the room hierarchy C-S API. + + Args: + requester: The user ID of the user making this request. + requested_room_id: The room ID to start the hierarchy at (the "root" room). + suggested_only: Whether we should only return children with the "suggested" + flag set. + max_depth: The maximum depth in the tree to explore, must be a + non-negative integer. + + 0 would correspond to just the root room, 1 would include just + the root room's children, etc. + limit: An optional limit on the number of rooms to return per + page. Must be a positive integer. + from_token: An optional pagination token. + + Returns: + The JSON hierarchy dictionary. + """ + await self._ratelimiter.ratelimit(requester) + + # If a user tries to fetch the same page multiple times in quick succession, + # only process the first attempt and return its result to subsequent requests. + # + # This is due to the pagination process mutating internal state, attempting + # to process multiple requests for the same page will result in errors. + return await self._pagination_response_cache.wrap( + ( + requester.user.to_string(), + requested_room_id, + suggested_only, + max_depth, + limit, + from_token, + ), + self._get_room_hierarchy, + requester.user.to_string(), + requested_room_id, + suggested_only, + max_depth, + limit, + from_token, + ) + + async def _get_room_hierarchy( + self, + requester: str, + requested_room_id: str, + suggested_only: bool = False, + max_depth: Optional[int] = None, + limit: Optional[int] = None, + from_token: Optional[str] = None, + ) -> JsonDict: + """See docstring for SpaceSummaryHandler.get_room_hierarchy.""" + + # First of all, check that the room is accessible. + if not await self._is_local_room_accessible(requested_room_id, requester): + raise AuthError( + 403, + "User %s not in room %s, and room previews are disabled" + % (requester, requested_room_id), + ) + + # If this is continuing a previous session, pull the persisted data. + if from_token: + try: + pagination_session = await self._store.get_session( + session_type=self._PAGINATION_SESSION_TYPE, + session_id=from_token, + ) + except StoreError: + raise SynapseError(400, "Unknown pagination token", Codes.INVALID_PARAM) + + # If the requester, room ID, suggested-only, or max depth were modified + # the session is invalid. + if ( + requester != pagination_session["requester"] + or requested_room_id != pagination_session["room_id"] + or suggested_only != pagination_session["suggested_only"] + or max_depth != pagination_session["max_depth"] + ): + raise SynapseError(400, "Unknown pagination token", Codes.INVALID_PARAM) + + # Load the previous state. + room_queue = [ + _RoomQueueEntry(*fields) for fields in pagination_session["room_queue"] + ] + processed_rooms = set(pagination_session["processed_rooms"]) + else: + # The queue of rooms to process, the next room is last on the stack. + room_queue = [_RoomQueueEntry(requested_room_id, ())] + + # Rooms we have already processed. + processed_rooms = set() + + rooms_result: List[JsonDict] = [] + + # Cap the limit to a server-side maximum. + if limit is None: + limit = MAX_ROOMS + else: + limit = min(limit, MAX_ROOMS) + + # Iterate through the queue until we reach the limit or run out of + # rooms to include. + while room_queue and len(rooms_result) < limit: + queue_entry = room_queue.pop() + room_id = queue_entry.room_id + current_depth = queue_entry.depth + if room_id in processed_rooms: + # already done this room + continue + + logger.debug("Processing room %s", room_id) + + # A map of summaries for children rooms that might be returned over + # federation. The rationale for caching these and *maybe* using them + # is to prefer any information local to the homeserver before trusting + # data received over federation. + children_room_entries: Dict[str, JsonDict] = {} + # A set of room IDs which are children that did not have information + # returned over federation and are known to be inaccessible to the + # current server. We should not reach out over federation to try to + # summarise these rooms. + inaccessible_children: Set[str] = set() + + # If the room is known locally, summarise it! + is_in_room = await self._store.is_host_joined(room_id, self._server_name) + if is_in_room: + room_entry = await self._summarize_local_room( + requester, + None, + room_id, + suggested_only, + # TODO Handle max children. + max_children=None, + ) + + # Otherwise, attempt to use information for federation. + else: + # A previous call might have included information for this room. + # It can be used if either: + # + # 1. The room is not a space. + # 2. The maximum depth has been achieved (since no children + # information is needed). + if queue_entry.remote_room and ( + queue_entry.remote_room.get("room_type") != RoomTypes.SPACE + or (max_depth is not None and current_depth >= max_depth) + ): + room_entry = _RoomEntry( + queue_entry.room_id, queue_entry.remote_room + ) + + # If the above isn't true, attempt to fetch the room + # information over federation. + else: + ( + room_entry, + children_room_entries, + inaccessible_children, + ) = await self._summarize_remote_room_hierarchy( + queue_entry, + suggested_only, + ) + + # Ensure this room is accessible to the requester (and not just + # the homeserver). + if room_entry and not await self._is_remote_room_accessible( + requester, queue_entry.room_id, room_entry.room + ): + room_entry = None + + # This room has been processed and should be ignored if it appears + # elsewhere in the hierarchy. + processed_rooms.add(room_id) + + # There may or may not be a room entry based on whether it is + # inaccessible to the requesting user. + if room_entry: + # Add the room (including the stripped m.space.child events). + rooms_result.append(room_entry.as_json()) + + # If this room is not at the max-depth, check if there are any + # children to process. + if max_depth is None or current_depth < max_depth: + # The children get added in reverse order so that the next + # room to process, according to the ordering, is the last + # item in the list. + room_queue.extend( + _RoomQueueEntry( + ev["state_key"], + ev["content"]["via"], + current_depth + 1, + children_room_entries.get(ev["state_key"]), + ) + for ev in reversed(room_entry.children_state_events) + if ev["type"] == EventTypes.SpaceChild + and ev["state_key"] not in inaccessible_children + ) + + result: JsonDict = {"rooms": rooms_result} + + # If there's additional data, generate a pagination token (and persist state). + if room_queue: + result["next_batch"] = await self._store.create_session( + session_type=self._PAGINATION_SESSION_TYPE, + value={ + # Information which must be identical across pagination. + "requester": requester, + "room_id": requested_room_id, + "suggested_only": suggested_only, + "max_depth": max_depth, + # The stored state. + "room_queue": [ + attr.astuple(room_entry) for room_entry in room_queue + ], + "processed_rooms": list(processed_rooms), + }, + expiry_ms=self._PAGINATION_SESSION_VALIDITY_PERIOD_MS, + ) + + return result + + async def federation_space_summary( + self, + origin: str, + room_id: str, + suggested_only: bool, + max_rooms_per_space: Optional[int], + exclude_rooms: Iterable[str], + ) -> JsonDict: + """ + Implementation of the space summary Federation API + + Args: + origin: The server requesting the spaces summary. + + room_id: room id to start the summary at + + suggested_only: whether we should only return children with the "suggested" + flag set. + + max_rooms_per_space: an optional limit on the number of child rooms we will + return. Unlike the C-S API, this applies to the root room (room_id). + It is clipped to MAX_ROOMS_PER_SPACE. + + exclude_rooms: a list of rooms to skip over (presumably because the + calling server has already seen them). + + Returns: + summary dict to return + """ + # the queue of rooms to process + room_queue = deque((room_id,)) + + # the set of rooms that we should not walk further. Initialise it with the + # excluded-rooms list; we will add other rooms as we process them so that + # we do not loop. + processed_rooms: Set[str] = set(exclude_rooms) + + rooms_result: List[JsonDict] = [] + events_result: List[JsonDict] = [] + + while room_queue and len(rooms_result) < MAX_ROOMS: + room_id = room_queue.popleft() + if room_id in processed_rooms: + # already done this room + continue + + room_entry = await self._summarize_local_room( + None, origin, room_id, suggested_only, max_rooms_per_space + ) + + processed_rooms.add(room_id) + + if room_entry: + rooms_result.append(room_entry.room) + events_result.extend(room_entry.children_state_events) + + # add any children to the queue + room_queue.extend( + edge_event["state_key"] + for edge_event in room_entry.children_state_events + ) + + return {"rooms": rooms_result, "events": events_result} + + async def get_federation_hierarchy( + self, + origin: str, + requested_room_id: str, + suggested_only: bool, + ) -> JsonDict: + """ + Implementation of the room hierarchy Federation API. + + This is similar to get_room_hierarchy, but does not recurse into the space. + It also considers whether anyone on the server may be able to access the + room, as opposed to whether a specific user can. + + Args: + origin: The server requesting the spaces summary. + requested_room_id: The room ID to start the hierarchy at (the "root" room). + suggested_only: whether we should only return children with the "suggested" + flag set. + + Returns: + The JSON hierarchy dictionary. + """ + root_room_entry = await self._summarize_local_room( + None, origin, requested_room_id, suggested_only, max_children=None + ) + if root_room_entry is None: + # Room is inaccessible to the requesting server. + raise SynapseError(404, "Unknown room: %s" % (requested_room_id,)) + + children_rooms_result: List[JsonDict] = [] + inaccessible_children: List[str] = [] + + # Iterate through each child and potentially add it, but not its children, + # to the response. + for child_room in root_room_entry.children_state_events: + room_id = child_room.get("state_key") + assert isinstance(room_id, str) + # If the room is unknown, skip it. + if not await self._store.is_host_joined(room_id, self._server_name): + continue + + room_entry = await self._summarize_local_room( + None, origin, room_id, suggested_only, max_children=0 + ) + # If the room is accessible, include it in the results. + # + # Note that only the room summary (without information on children) + # is included in the summary. + if room_entry: + children_rooms_result.append(room_entry.room) + # Otherwise, note that the requesting server shouldn't bother + # trying to summarize this room - they do not have access to it. + else: + inaccessible_children.append(room_id) + + return { + # Include the requested room (including the stripped children events). + "room": root_room_entry.as_json(), + "children": children_rooms_result, + "inaccessible_children": inaccessible_children, + } + + async def _summarize_local_room( + self, + requester: Optional[str], + origin: Optional[str], + room_id: str, + suggested_only: bool, + max_children: Optional[int], + ) -> Optional["_RoomEntry"]: + """ + Generate a room entry and a list of event entries for a given room. + + Args: + requester: + The user requesting the summary, if it is a local request. None + if this is a federation request. + origin: + The server requesting the summary, if it is a federation request. + None if this is a local request. + room_id: The room ID to summarize. + suggested_only: True if only suggested children should be returned. + Otherwise, all children are returned. + max_children: + The maximum number of children rooms to include. This is capped + to a server-set limit. + + Returns: + A room entry if the room should be returned. None, otherwise. + """ + if not await self._is_local_room_accessible(room_id, requester, origin): + return None + + room_entry = await self._build_room_entry(room_id, for_federation=bool(origin)) + + # If the room is not a space or the children don't matter, return just + # the room information. + if room_entry.get("room_type") != RoomTypes.SPACE or max_children == 0: + return _RoomEntry(room_id, room_entry) + + # Otherwise, look for child rooms/spaces. + child_events = await self._get_child_events(room_id) + + if suggested_only: + # we only care about suggested children + child_events = filter(_is_suggested_child_event, child_events) + + if max_children is None or max_children > MAX_ROOMS_PER_SPACE: + max_children = MAX_ROOMS_PER_SPACE + + stripped_events: List[JsonDict] = [ + { + "type": e.type, + "state_key": e.state_key, + "content": e.content, + "room_id": e.room_id, + "sender": e.sender, + "origin_server_ts": e.origin_server_ts, + } + for e in itertools.islice(child_events, max_children) + ] + return _RoomEntry(room_id, room_entry, stripped_events) + + async def _summarize_remote_room( + self, + room: "_RoomQueueEntry", + suggested_only: bool, + max_children: Optional[int], + exclude_rooms: Iterable[str], + ) -> Iterable["_RoomEntry"]: + """ + Request room entries and a list of event entries for a given room by querying a remote server. + + Args: + room: The room to summarize. + suggested_only: True if only suggested children should be returned. + Otherwise, all children are returned. + max_children: + The maximum number of children rooms to include. This is capped + to a server-set limit. + exclude_rooms: + Rooms IDs which do not need to be summarized. + + Returns: + An iterable of room entries. + """ + room_id = room.room_id + logger.info("Requesting summary for %s via %s", room_id, room.via) + + # we need to make the exclusion list json-serialisable + exclude_rooms = list(exclude_rooms) + + via = itertools.islice(room.via, MAX_SERVERS_PER_SPACE) + try: + res = await self._federation_client.get_space_summary( + via, + room_id, + suggested_only=suggested_only, + max_rooms_per_space=max_children, + exclude_rooms=exclude_rooms, + ) + except Exception as e: + logger.warning( + "Unable to get summary of %s via federation: %s", + room_id, + e, + exc_info=logger.isEnabledFor(logging.DEBUG), + ) + return () + + # Group the events by their room. + children_by_room: Dict[str, List[JsonDict]] = {} + for ev in res.events: + if ev.event_type == EventTypes.SpaceChild: + children_by_room.setdefault(ev.room_id, []).append(ev.data) + + # Generate the final results. + results = [] + for fed_room in res.rooms: + fed_room_id = fed_room.get("room_id") + if not fed_room_id or not isinstance(fed_room_id, str): + continue + + results.append( + _RoomEntry( + fed_room_id, + fed_room, + children_by_room.get(fed_room_id, []), + ) + ) + + return results + + async def _summarize_remote_room_hierarchy( + self, room: "_RoomQueueEntry", suggested_only: bool + ) -> Tuple[Optional["_RoomEntry"], Dict[str, JsonDict], Set[str]]: + """ + Request room entries and a list of event entries for a given room by querying a remote server. + + Args: + room: The room to summarize. + suggested_only: True if only suggested children should be returned. + Otherwise, all children are returned. + + Returns: + A tuple of: + The room entry. + Partial room data return over federation. + A set of inaccessible children room IDs. + """ + room_id = room.room_id + logger.info("Requesting summary for %s via %s", room_id, room.via) + + via = itertools.islice(room.via, MAX_SERVERS_PER_SPACE) + try: + ( + room_response, + children, + inaccessible_children, + ) = await self._federation_client.get_room_hierarchy( + via, + room_id, + suggested_only=suggested_only, + ) + except Exception as e: + logger.warning( + "Unable to get hierarchy of %s via federation: %s", + room_id, + e, + exc_info=logger.isEnabledFor(logging.DEBUG), + ) + return None, {}, set() + + # Map the children to their room ID. + children_by_room_id = { + c["room_id"]: c + for c in children + if "room_id" in c and isinstance(c["room_id"], str) + } + + return ( + _RoomEntry(room_id, room_response, room_response.pop("children_state", ())), + children_by_room_id, + set(inaccessible_children), + ) + + async def _is_local_room_accessible( + self, room_id: str, requester: Optional[str], origin: Optional[str] = None + ) -> bool: + """ + Calculate whether the room should be shown to the requester. + + It should return true if: + + * The requester is joined or can join the room (per MSC3173). + * The origin server has any user that is joined or can join the room. + * The history visibility is set to world readable. + + Args: + room_id: The room ID to check accessibility of. + requester: + The user making the request, if it is a local request. + None if this is a federation request. + origin: + The server making the request, if it is a federation request. + None if this is a local request. + + Returns: + True if the room is accessible to the requesting user or server. + """ + state_ids = await self._store.get_current_state_ids(room_id) + + # If there's no state for the room, it isn't known. + if not state_ids: + # The user might have a pending invite for the room. + if requester and await self._store.get_invite_for_local_user_in_room( + requester, room_id + ): + return True + + logger.info("room %s is unknown, omitting from summary", room_id) + return False + + try: + room_version = await self._store.get_room_version(room_id) + except UnsupportedRoomVersionError: + # If a room with an unsupported room version is encountered, ignore + # it to avoid breaking the entire summary response. + return False + + # Include the room if it has join rules of public or knock. + join_rules_event_id = state_ids.get((EventTypes.JoinRules, "")) + if join_rules_event_id: + join_rules_event = await self._store.get_event(join_rules_event_id) + join_rule = join_rules_event.content.get("join_rule") + if join_rule == JoinRules.PUBLIC or ( + room_version.msc2403_knocking and join_rule == JoinRules.KNOCK + ): + return True + + # Include the room if it is peekable. + hist_vis_event_id = state_ids.get((EventTypes.RoomHistoryVisibility, "")) + if hist_vis_event_id: + hist_vis_ev = await self._store.get_event(hist_vis_event_id) + hist_vis = hist_vis_ev.content.get("history_visibility") + if hist_vis == HistoryVisibility.WORLD_READABLE: + return True + + # Otherwise we need to check information specific to the user or server. + + # If we have an authenticated requesting user, check if they are a member + # of the room (or can join the room). + if requester: + member_event_id = state_ids.get((EventTypes.Member, requester), None) + + # If they're in the room they can see info on it. + if member_event_id: + member_event = await self._store.get_event(member_event_id) + if member_event.membership in (Membership.JOIN, Membership.INVITE): + return True + + # Otherwise, check if they should be allowed access via membership in a space. + if await self._event_auth_handler.has_restricted_join_rules( + state_ids, room_version + ): + allowed_rooms = ( + await self._event_auth_handler.get_rooms_that_allow_join(state_ids) + ) + if await self._event_auth_handler.is_user_in_rooms( + allowed_rooms, requester + ): + return True + + # If this is a request over federation, check if the host is in the room or + # has a user who could join the room. + elif origin: + if await self._event_auth_handler.check_host_in_room( + room_id, origin + ) or await self._store.is_host_invited(room_id, origin): + return True + + # Alternately, if the host has a user in any of the spaces specified + # for access, then the host can see this room (and should do filtering + # if the requester cannot see it). + if await self._event_auth_handler.has_restricted_join_rules( + state_ids, room_version + ): + allowed_rooms = ( + await self._event_auth_handler.get_rooms_that_allow_join(state_ids) + ) + for space_id in allowed_rooms: + if await self._event_auth_handler.check_host_in_room( + space_id, origin + ): + return True + + logger.info( + "room %s is unpeekable and requester %s is not a member / not allowed to join, omitting from summary", + room_id, + requester or origin, + ) + return False + + async def _is_remote_room_accessible( + self, requester: str, room_id: str, room: JsonDict + ) -> bool: + """ + Calculate whether the room received over federation should be shown to the requester. + + It should return true if: + + * The requester is joined or can join the room (per MSC3173). + * The history visibility is set to world readable. + + Note that the local server is not in the requested room (which is why the + remote call was made in the first place), but the user could have access + due to an invite, etc. + + Args: + requester: The user requesting the summary. + room_id: The room ID returned over federation. + room: The summary of the room returned over federation. + + Returns: + True if the room is accessible to the requesting user. + """ + # The API doesn't return the room version so assume that a + # join rule of knock is valid. + if ( + room.get("join_rules") in (JoinRules.PUBLIC, JoinRules.KNOCK) + or room.get("world_readable") is True + ): + return True + + # Check if the user is a member of any of the allowed spaces + # from the response. + allowed_rooms = room.get("allowed_room_ids") or room.get("allowed_spaces") + if allowed_rooms and isinstance(allowed_rooms, list): + if await self._event_auth_handler.is_user_in_rooms( + allowed_rooms, requester + ): + return True + + # Finally, check locally if we can access the room. The user might + # already be in the room (if it was a child room), or there might be a + # pending invite, etc. + return await self._is_local_room_accessible(room_id, requester) + + async def _build_room_entry(self, room_id: str, for_federation: bool) -> JsonDict: + """ + Generate en entry summarising a single room. + + Args: + room_id: The room ID to summarize. + for_federation: True if this is a summary requested over federation + (which includes additional fields). + + Returns: + The JSON dictionary for the room. + """ + stats = await self._store.get_room_with_stats(room_id) + + # currently this should be impossible because we call + # _is_local_room_accessible on the room before we get here, so + # there should always be an entry + assert stats is not None, "unable to retrieve stats for %s" % (room_id,) + + current_state_ids = await self._store.get_current_state_ids(room_id) + create_event = await self._store.get_event( + current_state_ids[(EventTypes.Create, "")] + ) + + entry = { + "room_id": stats["room_id"], + "name": stats["name"], + "topic": stats["topic"], + "canonical_alias": stats["canonical_alias"], + "num_joined_members": stats["joined_members"], + "avatar_url": stats["avatar"], + "join_rules": stats["join_rules"], + "world_readable": ( + stats["history_visibility"] == HistoryVisibility.WORLD_READABLE + ), + "guest_can_join": stats["guest_access"] == "can_join", + "creation_ts": create_event.origin_server_ts, + "room_type": create_event.content.get(EventContentFields.ROOM_TYPE), + } + + # Federation requests need to provide additional information so the + # requested server is able to filter the response appropriately. + if for_federation: + room_version = await self._store.get_room_version(room_id) + if await self._event_auth_handler.has_restricted_join_rules( + current_state_ids, room_version + ): + allowed_rooms = ( + await self._event_auth_handler.get_rooms_that_allow_join( + current_state_ids + ) + ) + if allowed_rooms: + entry["allowed_room_ids"] = allowed_rooms + # TODO Remove this key once the API is stable. + entry["allowed_spaces"] = allowed_rooms + + # Filter out Nones – rather omit the field altogether + room_entry = {k: v for k, v in entry.items() if v is not None} + + return room_entry + + async def _get_child_events(self, room_id: str) -> Iterable[EventBase]: + """ + Get the child events for a given room. + + The returned results are sorted for stability. + + Args: + room_id: The room id to get the children of. + + Returns: + An iterable of sorted child events. + """ + + # look for child rooms/spaces. + current_state_ids = await self._store.get_current_state_ids(room_id) + + events = await self._store.get_events_as_list( + [ + event_id + for key, event_id in current_state_ids.items() + if key[0] == EventTypes.SpaceChild + ] + ) + + # filter out any events without a "via" (which implies it has been redacted), + # and order to ensure we return stable results. + return sorted(filter(_has_valid_via, events), key=_child_events_comparison_key) + + async def get_room_summary( + self, + requester: Optional[str], + room_id: str, + remote_room_hosts: Optional[List[str]] = None, + ) -> JsonDict: + """ + Implementation of the room summary C-S API from MSC3266 + + Args: + requester: user id of the user making this request, will be None + for unauthenticated requests + + room_id: room id to summarise. + + remote_room_hosts: a list of homeservers to try fetching data through + if we don't know it ourselves + + Returns: + summary dict to return + """ + is_in_room = await self._store.is_host_joined(room_id, self._server_name) + + if is_in_room: + room_entry = await self._summarize_local_room( + requester, + None, + room_id, + # Suggested-only doesn't matter since no children are requested. + suggested_only=False, + max_children=0, + ) + + if not room_entry: + raise NotFoundError("Room not found or is not accessible") + + room_summary = room_entry.room + + # If there was a requester, add their membership. + if requester: + ( + membership, + _, + ) = await self._store.get_local_current_membership_for_user_in_room( + requester, room_id + ) + + room_summary["membership"] = membership or "leave" + else: + # TODO federation API, descoped from initial unstable implementation + # as MSC needs more maturing on that side. + raise SynapseError(400, "Federation is not currently supported.") + + return room_summary + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class _RoomQueueEntry: + # The room ID of this entry. + room_id: str + # The server to query if the room is not known locally. + via: Sequence[str] + # The minimum number of hops necessary to get to this room (compared to the + # originally requested room). + depth: int = 0 + # The room summary for this room returned via federation. This will only be + # used if the room is not known locally (and is not a space). + remote_room: Optional[JsonDict] = None + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class _RoomEntry: + room_id: str + # The room summary for this room. + room: JsonDict + # An iterable of the sorted, stripped children events for children of this room. + # + # This may not include all children. + children_state_events: Sequence[JsonDict] = () + + def as_json(self) -> JsonDict: + """ + Returns a JSON dictionary suitable for the room hierarchy endpoint. + + It returns the room summary including the stripped m.space.child events + as a sub-key. + """ + result = dict(self.room) + result["children_state"] = self.children_state_events + return result + + +def _has_valid_via(e: EventBase) -> bool: + via = e.content.get("via") + if not via or not isinstance(via, Sequence): + return False + for v in via: + if not isinstance(v, str): + logger.debug("Ignoring edge event %s with invalid via entry", e.event_id) + return False + return True + + +def _is_suggested_child_event(edge_event: EventBase) -> bool: + suggested = edge_event.content.get("suggested") + if isinstance(suggested, bool) and suggested: + return True + logger.debug("Ignorning not-suggested child %s", edge_event.state_key) + return False + + +# Order may only contain characters in the range of \x20 (space) to \x7E (~) inclusive. +_INVALID_ORDER_CHARS_RE = re.compile(r"[^\x20-\x7E]") + + +def _child_events_comparison_key( + child: EventBase, +) -> Tuple[bool, Optional[str], int, str]: + """ + Generate a value for comparing two child events for ordering. + + The rules for ordering are: + + 1. The 'order' key, if it is valid. + 2. The 'origin_server_ts' of the 'm.space.child' event. + 3. The 'room_id'. + + Args: + child: The event for generating a comparison key. + + Returns: + The comparison key as a tuple of: + False if the ordering is valid. + The 'order' field or None if it is not given or invalid. + The 'origin_server_ts' field. + The room ID. + """ + order = child.content.get("order") + # If order is not a string or doesn't meet the requirements, ignore it. + if not isinstance(order, str): + order = None + elif len(order) > 50 or _INVALID_ORDER_CHARS_RE.search(order): + order = None + + # Items without an order come last. + return order is None, order, child.origin_server_ts, child.room_id diff --git a/synapse/handlers/saml.py b/synapse/handlers/saml.py index e6e71e972964..727d75a50c6c 100644 --- a/synapse/handlers/saml.py +++ b/synapse/handlers/saml.py @@ -22,7 +22,6 @@ from synapse.api.errors import SynapseError from synapse.config import ConfigError -from synapse.handlers._base import BaseHandler from synapse.handlers.sso import MappingException, UserAttributes from synapse.http.servlet import parse_string from synapse.http.site import SynapseRequest @@ -40,33 +39,34 @@ logger = logging.getLogger(__name__) -@attr.s(slots=True) +@attr.s(slots=True, auto_attribs=True) class Saml2SessionData: """Data we track about SAML2 sessions""" # time the session was created, in milliseconds - creation_time = attr.ib() + creation_time: int # The user interactive authentication session ID associated with this SAML # session (or None if this SAML session is for an initial login). - ui_auth_session_id = attr.ib(type=Optional[str], default=None) + ui_auth_session_id: Optional[str] = None -class SamlHandler(BaseHandler): +class SamlHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) - self._saml_client = Saml2Client(hs.config.saml2_sp_config) - self._saml_idp_entityid = hs.config.saml2_idp_entityid + self.store = hs.get_datastore() + self.clock = hs.get_clock() + self.server_name = hs.hostname + self._saml_client = Saml2Client(hs.config.saml2.saml2_sp_config) + self._saml_idp_entityid = hs.config.saml2.saml2_idp_entityid - self._saml2_session_lifetime = hs.config.saml2_session_lifetime + self._saml2_session_lifetime = hs.config.saml2.saml2_session_lifetime self._grandfathered_mxid_source_attribute = ( - hs.config.saml2_grandfathered_mxid_source_attribute + hs.config.saml2.saml2_grandfathered_mxid_source_attribute ) self._saml2_attribute_requirements = hs.config.saml2.attribute_requirements - self._error_template = hs.config.sso_error_template # plugin to do custom mapping from saml response to mxid - self._user_mapping_provider = hs.config.saml2_user_mapping_provider_class( - hs.config.saml2_user_mapping_provider_config, + self._user_mapping_provider = hs.config.saml2.saml2_user_mapping_provider_class( + hs.config.saml2.saml2_user_mapping_provider_config, ModuleApi(hs, hs.get_auth_handler()), ) @@ -80,7 +80,6 @@ def __init__(self, hs: "HomeServer"): # the SsoIdentityProvider protocol type. self.idp_icon = None self.idp_brand = None - self.unstable_idp_brand = None # a map from saml session id to Saml2SessionData object self._outstanding_requests_dict: Dict[str, Saml2SessionData] = {} @@ -360,7 +359,7 @@ def _remote_id_from_saml_response( return remote_user_id - def expire_sessions(self): + def expire_sessions(self) -> None: expire_before = self.clock.time_msec() - self._saml2_session_lifetime to_expire = set() for reqid, data in self._outstanding_requests_dict.items(): @@ -392,10 +391,10 @@ def dot_replace_for_mxid(username: str) -> str: } -@attr.s +@attr.s(auto_attribs=True) class SamlConfig: - mxid_source_attribute = attr.ib() - mxid_mapper = attr.ib() + mxid_source_attribute: str + mxid_mapper: Callable[[str], str] class DefaultSamlMappingProvider: @@ -412,7 +411,7 @@ def __init__(self, parsed_config: SamlConfig, module_api: ModuleApi): self._mxid_mapper = parsed_config.mxid_mapper self._grandfathered_mxid_source_attribute = ( - module_api._hs.config.saml2_grandfathered_mxid_source_attribute + module_api._hs.config.saml2.saml2_grandfathered_mxid_source_attribute ) def get_remote_user_id( diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 8226d6f5a1a8..ab7eaab2fb56 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -26,17 +26,18 @@ from synapse.types import JsonDict, UserID from synapse.visibility import filter_events_for_client -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer logger = logging.getLogger(__name__) -class SearchHandler(BaseHandler): +class SearchHandler: def __init__(self, hs: "HomeServer"): - super().__init__(hs) + self.store = hs.get_datastore() + self.state_handler = hs.get_state_handler() + self.clock = hs.get_clock() + self.hs = hs self._event_serializer = hs.get_event_client_serializer() self.storage = hs.get_storage() self.state_store = self.storage.state @@ -105,7 +106,7 @@ async def search( dict to be returned to the client with results of search """ - if not self.hs.config.enable_search: + if not self.hs.config.server.enable_search: raise SynapseError(400, "Search is disabled on this homeserver") batch_group = None @@ -179,7 +180,7 @@ async def search( % (set(group_keys) - {"room_id", "sender"},), ) - search_filter = Filter(filter_dict) + search_filter = Filter(self.hs, filter_dict) # TODO: Search through left rooms too rooms = await self.store.get_rooms_for_local_user_where_membership_is( @@ -241,14 +242,14 @@ async def search( rank_map.update({r["event"].event_id: r["rank"] for r in results}) - filtered_events = search_filter.filter([r["event"] for r in results]) + filtered_events = await search_filter.filter([r["event"] for r in results]) events = await filter_events_for_client( self.storage, user.to_string(), filtered_events ) events.sort(key=lambda e: -rank_map[e.event_id]) - allowed_events = events[: search_filter.limit()] + allowed_events = events[: search_filter.limit] for e in allowed_events: rm = room_groups.setdefault( @@ -270,13 +271,13 @@ async def search( # We keep looping and we keep filtering until we reach the limit # or we run out of things. # But only go around 5 times since otherwise synapse will be sad. - while len(room_events) < search_filter.limit() and i < 5: + while len(room_events) < search_filter.limit and i < 5: i += 1 search_result = await self.store.search_rooms( room_ids, search_term, keys, - search_filter.limit() * 2, + search_filter.limit * 2, pagination_token=pagination_token, ) @@ -291,16 +292,18 @@ async def search( rank_map.update({r["event"].event_id: r["rank"] for r in results}) - filtered_events = search_filter.filter([r["event"] for r in results]) + filtered_events = await search_filter.filter( + [r["event"] for r in results] + ) events = await filter_events_for_client( self.storage, user.to_string(), filtered_events ) room_events.extend(events) - room_events = room_events[: search_filter.limit()] + room_events = room_events[: search_filter.limit] - if len(results) < search_filter.limit() * 2: + if len(results) < search_filter.limit * 2: pagination_token = None break else: @@ -310,7 +313,7 @@ async def search( group = room_groups.setdefault(event.room_id, {"results": []}) group["results"].append(event.event_id) - if room_events and len(room_events) >= search_filter.limit(): + if room_events and len(room_events) >= search_filter.limit: last_event_id = room_events[-1].event_id pagination_token = results_map[last_event_id]["pagination_token"] diff --git a/synapse/handlers/send_email.py b/synapse/handlers/send_email.py index e9f6aef06f01..1a062a784cd4 100644 --- a/synapse/handlers/send_email.py +++ b/synapse/handlers/send_email.py @@ -16,7 +16,15 @@ import logging from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText -from typing import TYPE_CHECKING +from io import BytesIO +from typing import TYPE_CHECKING, Any, Optional + +from pkg_resources import parse_version + +import twisted +from twisted.internet.defer import Deferred +from twisted.internet.interfaces import IOpenSSLContextFactory, IReactorTCP +from twisted.mail.smtp import ESMTPSender, ESMTPSenderFactory from synapse.logging.context import make_deferred_yieldable @@ -25,20 +33,107 @@ logger = logging.getLogger(__name__) +_is_old_twisted = parse_version(twisted.__version__) < parse_version("21") + + +class _NoTLSESMTPSender(ESMTPSender): + """Extend ESMTPSender to disable TLS + + Unfortunately, before Twisted 21.2, ESMTPSender doesn't give an easy way to disable + TLS, so we override its internal method which it uses to generate a context factory. + """ + + def _getContextFactory(self) -> Optional[IOpenSSLContextFactory]: + return None + + +async def _sendmail( + reactor: IReactorTCP, + smtphost: str, + smtpport: int, + from_addr: str, + to_addr: str, + msg_bytes: bytes, + username: Optional[bytes] = None, + password: Optional[bytes] = None, + require_auth: bool = False, + require_tls: bool = False, + enable_tls: bool = True, +) -> None: + """A simple wrapper around ESMTPSenderFactory, to allow substitution in tests + + Params: + reactor: reactor to use to make the outbound connection + smtphost: hostname to connect to + smtpport: port to connect to + from_addr: "From" address for email + to_addr: "To" address for email + msg_bytes: Message content + username: username to authenticate with, if auth is enabled + password: password to give when authenticating + require_auth: if auth is not offered, fail the request + require_tls: if TLS is not offered, fail the reqest + enable_tls: True to enable TLS. If this is False and require_tls is True, + the request will fail. + """ + msg = BytesIO(msg_bytes) + d: "Deferred[object]" = Deferred() + + def build_sender_factory(**kwargs: Any) -> ESMTPSenderFactory: + return ESMTPSenderFactory( + username, + password, + from_addr, + to_addr, + msg, + d, + heloFallback=True, + requireAuthentication=require_auth, + requireTransportSecurity=require_tls, + **kwargs, + ) + + if _is_old_twisted: + # before twisted 21.2, we have to override the ESMTPSender protocol to disable + # TLS + factory = build_sender_factory() + + if not enable_tls: + factory.protocol = _NoTLSESMTPSender + else: + # for twisted 21.2 and later, there is a 'hostname' parameter which we should + # set to enable TLS. + factory = build_sender_factory(hostname=smtphost if enable_tls else None) + + reactor.connectTCP( + smtphost, # type: ignore[arg-type] + smtpport, + factory, + timeout=30, + bindAddress=None, + ) + + await make_deferred_yieldable(d) + class SendEmailHandler: def __init__(self, hs: "HomeServer"): self.hs = hs - self._sendmail = hs.get_sendmail() self._reactor = hs.get_reactor() self._from = hs.config.email.email_notif_from self._smtp_host = hs.config.email.email_smtp_host self._smtp_port = hs.config.email.email_smtp_port - self._smtp_user = hs.config.email.email_smtp_user - self._smtp_pass = hs.config.email.email_smtp_pass + + user = hs.config.email.email_smtp_user + self._smtp_user = user.encode("utf-8") if user is not None else None + passwd = hs.config.email.email_smtp_pass + self._smtp_pass = passwd.encode("utf-8") if passwd is not None else None self._require_transport_security = hs.config.email.require_transport_security + self._enable_tls = hs.config.email.enable_smtp_tls + + self._sendmail = _sendmail async def send_email( self, @@ -82,17 +177,16 @@ async def send_email( logger.info("Sending email to %s" % email_address) - await make_deferred_yieldable( - self._sendmail( - self._smtp_host, - raw_from, - raw_to, - multipart_msg.as_string().encode("utf8"), - reactor=self._reactor, - port=self._smtp_port, - requireAuthentication=self._smtp_user is not None, - username=self._smtp_user, - password=self._smtp_pass, - requireTransportSecurity=self._require_transport_security, - ) + await self._sendmail( + self._reactor, + self._smtp_host, + self._smtp_port, + raw_from, + raw_to, + multipart_msg.as_string().encode("utf8"), + username=self._smtp_user, + password=self._smtp_pass, + require_auth=self._smtp_user is not None, + require_tls=self._require_transport_security, + enable_tls=self._enable_tls, ) diff --git a/synapse/handlers/set_password.py b/synapse/handlers/set_password.py index a63fac828342..706ad72761a3 100644 --- a/synapse/handlers/set_password.py +++ b/synapse/handlers/set_password.py @@ -17,19 +17,17 @@ from synapse.api.errors import Codes, StoreError, SynapseError from synapse.types import Requester -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.server import HomeServer logger = logging.getLogger(__name__) -class SetPasswordHandler(BaseHandler): +class SetPasswordHandler: """Handler which deals with changing user account passwords""" def __init__(self, hs: "HomeServer"): - super().__init__(hs) + self.store = hs.get_datastore() self._auth_handler = hs.get_auth_handler() self._device_handler = hs.get_device_handler() diff --git a/synapse/handlers/space_summary.py b/synapse/handlers/space_summary.py deleted file mode 100644 index 5f7d4602bd8d..000000000000 --- a/synapse/handlers/space_summary.py +++ /dev/null @@ -1,667 +0,0 @@ -# Copyright 2021 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import itertools -import logging -import re -from collections import deque -from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence, Set, Tuple - -import attr - -from synapse.api.constants import ( - EventContentFields, - EventTypes, - HistoryVisibility, - JoinRules, - Membership, - RoomTypes, -) -from synapse.events import EventBase -from synapse.events.utils import format_event_for_client_v2 -from synapse.types import JsonDict - -if TYPE_CHECKING: - from synapse.server import HomeServer - -logger = logging.getLogger(__name__) - -# number of rooms to return. We'll stop once we hit this limit. -MAX_ROOMS = 50 - -# max number of events to return per room. -MAX_ROOMS_PER_SPACE = 50 - -# max number of federation servers to hit per room -MAX_SERVERS_PER_SPACE = 3 - - -class SpaceSummaryHandler: - def __init__(self, hs: "HomeServer"): - self._clock = hs.get_clock() - self._auth = hs.get_auth() - self._event_auth_handler = hs.get_event_auth_handler() - self._store = hs.get_datastore() - self._event_serializer = hs.get_event_client_serializer() - self._server_name = hs.hostname - self._federation_client = hs.get_federation_client() - - async def get_space_summary( - self, - requester: str, - room_id: str, - suggested_only: bool = False, - max_rooms_per_space: Optional[int] = None, - ) -> JsonDict: - """ - Implementation of the space summary C-S API - - Args: - requester: user id of the user making this request - - room_id: room id to start the summary at - - suggested_only: whether we should only return children with the "suggested" - flag set. - - max_rooms_per_space: an optional limit on the number of child rooms we will - return. This does not apply to the root room (ie, room_id), and - is overridden by MAX_ROOMS_PER_SPACE. - - Returns: - summary dict to return - """ - # first of all, check that the user is in the room in question (or it's - # world-readable) - await self._auth.check_user_in_room_or_world_readable(room_id, requester) - - # the queue of rooms to process - room_queue = deque((_RoomQueueEntry(room_id, ()),)) - - # rooms we have already processed - processed_rooms: Set[str] = set() - - # events we have already processed. We don't necessarily have their event ids, - # so instead we key on (room id, state key) - processed_events: Set[Tuple[str, str]] = set() - - rooms_result: List[JsonDict] = [] - events_result: List[JsonDict] = [] - - while room_queue and len(rooms_result) < MAX_ROOMS: - queue_entry = room_queue.popleft() - room_id = queue_entry.room_id - if room_id in processed_rooms: - # already done this room - continue - - logger.debug("Processing room %s", room_id) - - is_in_room = await self._store.is_host_joined(room_id, self._server_name) - - # The client-specified max_rooms_per_space limit doesn't apply to the - # room_id specified in the request, so we ignore it if this is the - # first room we are processing. - max_children = max_rooms_per_space if processed_rooms else None - - if is_in_room: - room, events = await self._summarize_local_room( - requester, None, room_id, suggested_only, max_children - ) - - logger.debug( - "Query of local room %s returned events %s", - room_id, - ["%s->%s" % (ev["room_id"], ev["state_key"]) for ev in events], - ) - - if room: - rooms_result.append(room) - else: - fed_rooms, fed_events = await self._summarize_remote_room( - queue_entry, - suggested_only, - max_children, - exclude_rooms=processed_rooms, - ) - - # The results over federation might include rooms that the we, - # as the requesting server, are allowed to see, but the requesting - # user is not permitted see. - # - # Filter the returned results to only what is accessible to the user. - room_ids = set() - events = [] - for room in fed_rooms: - fed_room_id = room.get("room_id") - if not fed_room_id or not isinstance(fed_room_id, str): - continue - - # The room should only be included in the summary if: - # a. the user is in the room; - # b. the room is world readable; or - # c. the user could join the room, e.g. the join rules - # are set to public or the user is in a space that - # has been granted access to the room. - # - # Note that we know the user is not in the root room (which is - # why the remote call was made in the first place), but the user - # could be in one of the children rooms and we just didn't know - # about the link. - - # The API doesn't return the room version so assume that a - # join rule of knock is valid. - include_room = ( - room.get("join_rules") in (JoinRules.PUBLIC, JoinRules.KNOCK) - or room.get("world_readable") is True - ) - - # Check if the user is a member of any of the allowed spaces - # from the response. - allowed_rooms = room.get("allowed_spaces") - if ( - not include_room - and allowed_rooms - and isinstance(allowed_rooms, list) - ): - include_room = await self._event_auth_handler.is_user_in_rooms( - allowed_rooms, requester - ) - - # Finally, if this isn't the requested room, check ourselves - # if we can access the room. - if not include_room and fed_room_id != queue_entry.room_id: - include_room = await self._is_room_accessible( - fed_room_id, requester, None - ) - - # The user can see the room, include it! - if include_room: - rooms_result.append(room) - room_ids.add(fed_room_id) - - # All rooms returned don't need visiting again (even if the user - # didn't have access to them). - processed_rooms.add(fed_room_id) - - for event in fed_events: - if event.get("room_id") in room_ids: - events.append(event) - - logger.debug( - "Query of %s returned rooms %s, events %s", - room_id, - [room.get("room_id") for room in fed_rooms], - ["%s->%s" % (ev["room_id"], ev["state_key"]) for ev in fed_events], - ) - - # the room we queried may or may not have been returned, but don't process - # it again, anyway. - processed_rooms.add(room_id) - - # XXX: is it ok that we blindly iterate through any events returned by - # a remote server, whether or not they actually link to any rooms in our - # tree? - for ev in events: - # remote servers might return events we have already processed - # (eg, Dendrite returns inward pointers as well as outward ones), so - # we need to filter them out, to avoid returning duplicate links to the - # client. - ev_key = (ev["room_id"], ev["state_key"]) - if ev_key in processed_events: - continue - events_result.append(ev) - - # add the child to the queue. we have already validated - # that the vias are a list of server names. - room_queue.append( - _RoomQueueEntry(ev["state_key"], ev["content"]["via"]) - ) - processed_events.add(ev_key) - - # Before returning to the client, remove the allowed_spaces key for any - # rooms. - for room in rooms_result: - room.pop("allowed_spaces", None) - - return {"rooms": rooms_result, "events": events_result} - - async def federation_space_summary( - self, - origin: str, - room_id: str, - suggested_only: bool, - max_rooms_per_space: Optional[int], - exclude_rooms: Iterable[str], - ) -> JsonDict: - """ - Implementation of the space summary Federation API - - Args: - origin: The server requesting the spaces summary. - - room_id: room id to start the summary at - - suggested_only: whether we should only return children with the "suggested" - flag set. - - max_rooms_per_space: an optional limit on the number of child rooms we will - return. Unlike the C-S API, this applies to the root room (room_id). - It is clipped to MAX_ROOMS_PER_SPACE. - - exclude_rooms: a list of rooms to skip over (presumably because the - calling server has already seen them). - - Returns: - summary dict to return - """ - # the queue of rooms to process - room_queue = deque((room_id,)) - - # the set of rooms that we should not walk further. Initialise it with the - # excluded-rooms list; we will add other rooms as we process them so that - # we do not loop. - processed_rooms: Set[str] = set(exclude_rooms) - - rooms_result: List[JsonDict] = [] - events_result: List[JsonDict] = [] - - while room_queue and len(rooms_result) < MAX_ROOMS: - room_id = room_queue.popleft() - if room_id in processed_rooms: - # already done this room - continue - - logger.debug("Processing room %s", room_id) - - room, events = await self._summarize_local_room( - None, origin, room_id, suggested_only, max_rooms_per_space - ) - - processed_rooms.add(room_id) - - if room: - rooms_result.append(room) - events_result.extend(events) - - # add any children to the queue - room_queue.extend(edge_event["state_key"] for edge_event in events) - - return {"rooms": rooms_result, "events": events_result} - - async def _summarize_local_room( - self, - requester: Optional[str], - origin: Optional[str], - room_id: str, - suggested_only: bool, - max_children: Optional[int], - ) -> Tuple[Optional[JsonDict], Sequence[JsonDict]]: - """ - Generate a room entry and a list of event entries for a given room. - - Args: - requester: - The user requesting the summary, if it is a local request. None - if this is a federation request. - origin: - The server requesting the summary, if it is a federation request. - None if this is a local request. - room_id: The room ID to summarize. - suggested_only: True if only suggested children should be returned. - Otherwise, all children are returned. - max_children: - The maximum number of children rooms to include. This is capped - to a server-set limit. - - Returns: - A tuple of: - The room information, if the room should be returned to the - user. None, otherwise. - - An iterable of the sorted children events. This may be limited - to a maximum size or may include all children. - """ - if not await self._is_room_accessible(room_id, requester, origin): - return None, () - - room_entry = await self._build_room_entry(room_id) - - # If the room is not a space, return just the room information. - if room_entry.get("room_type") != RoomTypes.SPACE: - return room_entry, () - - # Otherwise, look for child rooms/spaces. - child_events = await self._get_child_events(room_id) - - if suggested_only: - # we only care about suggested children - child_events = filter(_is_suggested_child_event, child_events) - - if max_children is None or max_children > MAX_ROOMS_PER_SPACE: - max_children = MAX_ROOMS_PER_SPACE - - now = self._clock.time_msec() - events_result: List[JsonDict] = [] - for edge_event in itertools.islice(child_events, max_children): - events_result.append( - await self._event_serializer.serialize_event( - edge_event, - time_now=now, - event_format=format_event_for_client_v2, - ) - ) - - return room_entry, events_result - - async def _summarize_remote_room( - self, - room: "_RoomQueueEntry", - suggested_only: bool, - max_children: Optional[int], - exclude_rooms: Iterable[str], - ) -> Tuple[Sequence[JsonDict], Sequence[JsonDict]]: - """ - Request room entries and a list of event entries for a given room by querying a remote server. - - Args: - room: The room to summarize. - suggested_only: True if only suggested children should be returned. - Otherwise, all children are returned. - max_children: - The maximum number of children rooms to include. This is capped - to a server-set limit. - exclude_rooms: - Rooms IDs which do not need to be summarized. - - Returns: - A tuple of: - An iterable of rooms. - - An iterable of the sorted children events. This may be limited - to a maximum size or may include all children. - """ - room_id = room.room_id - logger.info("Requesting summary for %s via %s", room_id, room.via) - - # we need to make the exclusion list json-serialisable - exclude_rooms = list(exclude_rooms) - - via = itertools.islice(room.via, MAX_SERVERS_PER_SPACE) - try: - res = await self._federation_client.get_space_summary( - via, - room_id, - suggested_only=suggested_only, - max_rooms_per_space=max_children, - exclude_rooms=exclude_rooms, - ) - except Exception as e: - logger.warning( - "Unable to get summary of %s via federation: %s", - room_id, - e, - exc_info=logger.isEnabledFor(logging.DEBUG), - ) - return (), () - - return res.rooms, tuple( - ev.data for ev in res.events if ev.event_type == EventTypes.SpaceChild - ) - - async def _is_room_accessible( - self, room_id: str, requester: Optional[str], origin: Optional[str] - ) -> bool: - """ - Calculate whether the room should be shown in the spaces summary. - - It should be included if: - - * The requester is joined or can join the room (per MSC3173). - * The origin server has any user that is joined or can join the room. - * The history visibility is set to world readable. - - Args: - room_id: The room ID to summarize. - requester: - The user requesting the summary, if it is a local request. None - if this is a federation request. - origin: - The server requesting the summary, if it is a federation request. - None if this is a local request. - - Returns: - True if the room should be included in the spaces summary. - """ - state_ids = await self._store.get_current_state_ids(room_id) - - # If there's no state for the room, it isn't known. - if not state_ids: - # The user might have a pending invite for the room. - if requester and await self._store.get_invite_for_local_user_in_room( - requester, room_id - ): - return True - - logger.info("room %s is unknown, omitting from summary", room_id) - return False - - room_version = await self._store.get_room_version(room_id) - - # Include the room if it has join rules of public or knock. - join_rules_event_id = state_ids.get((EventTypes.JoinRules, "")) - if join_rules_event_id: - join_rules_event = await self._store.get_event(join_rules_event_id) - join_rule = join_rules_event.content.get("join_rule") - if join_rule == JoinRules.PUBLIC or ( - room_version.msc2403_knocking and join_rule == JoinRules.KNOCK - ): - return True - - # Include the room if it is peekable. - hist_vis_event_id = state_ids.get((EventTypes.RoomHistoryVisibility, "")) - if hist_vis_event_id: - hist_vis_ev = await self._store.get_event(hist_vis_event_id) - hist_vis = hist_vis_ev.content.get("history_visibility") - if hist_vis == HistoryVisibility.WORLD_READABLE: - return True - - # Otherwise we need to check information specific to the user or server. - - # If we have an authenticated requesting user, check if they are a member - # of the room (or can join the room). - if requester: - member_event_id = state_ids.get((EventTypes.Member, requester), None) - - # If they're in the room they can see info on it. - if member_event_id: - member_event = await self._store.get_event(member_event_id) - if member_event.membership in (Membership.JOIN, Membership.INVITE): - return True - - # Otherwise, check if they should be allowed access via membership in a space. - if await self._event_auth_handler.has_restricted_join_rules( - state_ids, room_version - ): - allowed_rooms = ( - await self._event_auth_handler.get_rooms_that_allow_join(state_ids) - ) - if await self._event_auth_handler.is_user_in_rooms( - allowed_rooms, requester - ): - return True - - # If this is a request over federation, check if the host is in the room or - # has a user who could join the room. - elif origin: - if await self._event_auth_handler.check_host_in_room( - room_id, origin - ) or await self._store.is_host_invited(room_id, origin): - return True - - # Alternately, if the host has a user in any of the spaces specified - # for access, then the host can see this room (and should do filtering - # if the requester cannot see it). - if await self._event_auth_handler.has_restricted_join_rules( - state_ids, room_version - ): - allowed_rooms = ( - await self._event_auth_handler.get_rooms_that_allow_join(state_ids) - ) - for space_id in allowed_rooms: - if await self._event_auth_handler.check_host_in_room( - space_id, origin - ): - return True - - logger.info( - "room %s is unpeekable and requester %s is not a member / not allowed to join, omitting from summary", - room_id, - requester or origin, - ) - return False - - async def _build_room_entry(self, room_id: str) -> JsonDict: - """Generate en entry suitable for the 'rooms' list in the summary response""" - stats = await self._store.get_room_with_stats(room_id) - - # currently this should be impossible because we call - # check_user_in_room_or_world_readable on the room before we get here, so - # there should always be an entry - assert stats is not None, "unable to retrieve stats for %s" % (room_id,) - - current_state_ids = await self._store.get_current_state_ids(room_id) - create_event = await self._store.get_event( - current_state_ids[(EventTypes.Create, "")] - ) - - room_version = await self._store.get_room_version(room_id) - allowed_rooms = None - if await self._event_auth_handler.has_restricted_join_rules( - current_state_ids, room_version - ): - allowed_rooms = await self._event_auth_handler.get_rooms_that_allow_join( - current_state_ids - ) - - entry = { - "room_id": stats["room_id"], - "name": stats["name"], - "topic": stats["topic"], - "canonical_alias": stats["canonical_alias"], - "num_joined_members": stats["joined_members"], - "avatar_url": stats["avatar"], - "join_rules": stats["join_rules"], - "world_readable": ( - stats["history_visibility"] == HistoryVisibility.WORLD_READABLE - ), - "guest_can_join": stats["guest_access"] == "can_join", - "creation_ts": create_event.origin_server_ts, - "room_type": create_event.content.get(EventContentFields.ROOM_TYPE), - "allowed_spaces": allowed_rooms, - } - - # Filter out Nones – rather omit the field altogether - room_entry = {k: v for k, v in entry.items() if v is not None} - - return room_entry - - async def _get_child_events(self, room_id: str) -> Iterable[EventBase]: - """ - Get the child events for a given room. - - The returned results are sorted for stability. - - Args: - room_id: The room id to get the children of. - - Returns: - An iterable of sorted child events. - """ - - # look for child rooms/spaces. - current_state_ids = await self._store.get_current_state_ids(room_id) - - events = await self._store.get_events_as_list( - [ - event_id - for key, event_id in current_state_ids.items() - if key[0] == EventTypes.SpaceChild - ] - ) - - # filter out any events without a "via" (which implies it has been redacted), - # and order to ensure we return stable results. - return sorted(filter(_has_valid_via, events), key=_child_events_comparison_key) - - -@attr.s(frozen=True, slots=True) -class _RoomQueueEntry: - room_id = attr.ib(type=str) - via = attr.ib(type=Sequence[str]) - - -def _has_valid_via(e: EventBase) -> bool: - via = e.content.get("via") - if not via or not isinstance(via, Sequence): - return False - for v in via: - if not isinstance(v, str): - logger.debug("Ignoring edge event %s with invalid via entry", e.event_id) - return False - return True - - -def _is_suggested_child_event(edge_event: EventBase) -> bool: - suggested = edge_event.content.get("suggested") - if isinstance(suggested, bool) and suggested: - return True - logger.debug("Ignorning not-suggested child %s", edge_event.state_key) - return False - - -# Order may only contain characters in the range of \x20 (space) to \x7E (~) inclusive. -_INVALID_ORDER_CHARS_RE = re.compile(r"[^\x20-\x7E]") - - -def _child_events_comparison_key(child: EventBase) -> Tuple[bool, Optional[str], str]: - """ - Generate a value for comparing two child events for ordering. - - The rules for ordering are supposed to be: - - 1. The 'order' key, if it is valid. - 2. The 'origin_server_ts' of the 'm.room.create' event. - 3. The 'room_id'. - - But we skip step 2 since we may not have any state from the room. - - Args: - child: The event for generating a comparison key. - - Returns: - The comparison key as a tuple of: - False if the ordering is valid. - The ordering field. - The room ID. - """ - order = child.content.get("order") - # If order is not a string or doesn't meet the requirements, ignore it. - if not isinstance(order, str): - order = None - elif len(order) > 50 or _INVALID_ORDER_CHARS_RE.search(order): - order = None - - # Items without an order come last. - return (order is None, order, child.room_id) diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index 1b855a685c4a..65c27bc64a5e 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -37,6 +37,7 @@ from synapse.api.constants import LoginType from synapse.api.errors import Codes, NotFoundError, RedirectException, SynapseError from synapse.config.sso import SsoAttributeRequirement +from synapse.handlers.register import init_counters_for_auth_provider from synapse.handlers.ui_auth import UIAuthSessionDataConstants from synapse.http import get_request_user_agent from synapse.http.server import respond_with_html, respond_with_redirect @@ -103,11 +104,6 @@ def idp_brand(self) -> Optional[str]: """Optional branding identifier""" return None - @property - def unstable_idp_brand(self) -> Optional[str]: - """Optional brand identifier for the unstable API (see MSC2858).""" - return None - @abc.abstractmethod async def handle_redirect_request( self, @@ -188,15 +184,17 @@ def __init__(self, hs: "HomeServer"): self._server_name = hs.hostname self._registration_handler = hs.get_registration_handler() self._auth_handler = hs.get_auth_handler() - self._error_template = hs.config.sso_error_template - self._bad_user_template = hs.config.sso_auth_bad_user_template + self._error_template = hs.config.sso.sso_error_template + self._bad_user_template = hs.config.sso.sso_auth_bad_user_template self._profile_handler = hs.get_profile_handler() # The following template is shown after a successful user interactive # authentication session. It tells the user they can close the window. - self._sso_auth_success_template = hs.config.sso_auth_success_template + self._sso_auth_success_template = hs.config.sso.sso_auth_success_template - self._sso_update_profile_information = hs.config.sso_update_profile_information + self._sso_update_profile_information = ( + hs.config.sso.sso_update_profile_information + ) # a lock on the mappings self._mapping_lock = Linearizer(name="sso_user_mapping", clock=hs.get_clock()) @@ -209,10 +207,11 @@ def __init__(self, hs: "HomeServer"): self._consent_at_registration = hs.config.consent.user_consent_at_registration - def register_identity_provider(self, p: SsoIdentityProvider): + def register_identity_provider(self, p: SsoIdentityProvider) -> None: p_id = p.idp_id assert p_id not in self._identity_providers self._identity_providers[p_id] = p + init_counters_for_auth_provider(p_id) def get_identity_providers(self) -> Mapping[str, SsoIdentityProvider]: """Get the configured identity providers""" @@ -366,6 +365,7 @@ async def complete_sso_login_request( sso_to_matrix_id_mapper: Callable[[int], Awaitable[UserAttributes]], grandfather_existing_users: Callable[[], Awaitable[Optional[str]]], extra_login_attributes: Optional[JsonDict] = None, + auth_provider_session_id: Optional[str] = None, ) -> None: """ Given an SSO ID, retrieve the user ID for it and possibly register the user. @@ -416,6 +416,8 @@ async def complete_sso_login_request( extra_login_attributes: An optional dictionary of extra attributes to be provided to the client in the login response. + auth_provider_session_id: An optional session ID from the IdP. + Raises: MappingException if there was a problem mapping the response to a user. RedirectException: if the mapping provider needs to redirect the user @@ -447,14 +449,16 @@ async def complete_sso_login_request( if not user_id: attributes = await self._call_attribute_mapper(sso_to_matrix_id_mapper) - if attributes.localpart is None: - # the mapper doesn't return a username. bail out with a redirect to - # the username picker. - await self._redirect_to_username_picker( + next_step_url = self._get_url_for_next_new_user_step( + attributes=attributes + ) + if next_step_url: + await self._redirect_to_next_new_user_step( auth_provider_id, remote_user_id, attributes, client_redirect_url, + next_step_url, extra_login_attributes, ) @@ -489,6 +493,7 @@ async def complete_sso_login_request( client_redirect_url, extra_login_attributes, new_user=new_user, + auth_provider_session_id=auth_provider_session_id, ) async def _call_attribute_mapper( @@ -533,18 +538,53 @@ async def _call_attribute_mapper( ) return attributes - async def _redirect_to_username_picker( + def _get_url_for_next_new_user_step( + self, + attributes: Optional[UserAttributes] = None, + session: Optional[UsernameMappingSession] = None, + ) -> bytes: + """Returns the URL to redirect to for the next step of new user registration + + Given attributes from the user mapping provider or a UsernameMappingSession, + returns the URL to redirect to for the next step of the registration flow. + + Args: + attributes: the user attributes returned by the user mapping provider, + from before a UsernameMappingSession has begun. + + session: an active UsernameMappingSession, possibly with some of its + attributes chosen by the user. + + Returns: + The URL to redirect to, or an empty value if no redirect is necessary + """ + # Must provide either attributes or session, not both + assert (attributes is not None) != (session is not None) + + if (attributes and attributes.localpart is None) or ( + session and session.chosen_localpart is None + ): + return b"/_synapse/client/pick_username/account_details" + elif self._consent_at_registration and not ( + session and session.terms_accepted_version + ): + return b"/_synapse/client/new_user_consent" + else: + return b"/_synapse/client/sso_register" if session else b"" + + async def _redirect_to_next_new_user_step( self, auth_provider_id: str, remote_user_id: str, attributes: UserAttributes, client_redirect_url: str, + next_step_url: bytes, extra_login_attributes: Optional[JsonDict], ) -> NoReturn: """Creates a UsernameMappingSession and redirects the browser - Called if the user mapping provider doesn't return a localpart for a new user. - Raises a RedirectException which redirects the browser to the username picker. + Called if the user mapping provider doesn't return complete information for a new user. + Raises a RedirectException which redirects the browser to a specified URL. Args: auth_provider_id: A unique identifier for this SSO provider, e.g. @@ -557,12 +597,15 @@ async def _redirect_to_username_picker( client_redirect_url: The redirect URL passed in by the client, which we will eventually redirect back to. + next_step_url: The URL to redirect to for the next step of the new user flow. + extra_login_attributes: An optional dictionary of extra attributes to be provided to the client in the login response. Raises: RedirectException """ + # TODO: If needed, allow using/looking up an existing session here. session_id = random_string(16) now = self._clock.time_msec() session = UsernameMappingSession( @@ -573,13 +616,18 @@ async def _redirect_to_username_picker( client_redirect_url=client_redirect_url, expiry_time_ms=now + self._MAPPING_SESSION_VALIDITY_PERIOD_MS, extra_login_attributes=extra_login_attributes, + # Treat the localpart returned by the user mapping provider as though + # it was chosen by the user. If it's None, it must be chosen eventually. + chosen_localpart=attributes.localpart, + # TODO: Consider letting the user mapping provider specify defaults for + # other user-chosen attributes. ) self._username_mapping_sessions[session_id] = session logger.info("Recorded registration session id %s", session_id) - # Set the cookie and redirect to the username picker - e = RedirectException(b"/_synapse/client/pick_username/account_details") + # Set the cookie and redirect to the next step + e = RedirectException(next_step_url) e.cookies.append( b"%s=%s; path=/" % (USERNAME_MAPPING_SESSION_COOKIE_NAME, session_id.encode("ascii")) @@ -808,20 +856,13 @@ async def handle_submit_username_request( ) session.emails_to_use = filtered_emails - # we may now need to collect consent from the user, in which case, redirect - # to the consent-extraction-unit - if self._consent_at_registration: - redirect_url = b"/_synapse/client/new_user_consent" - - # otherwise, redirect to the completion page - else: - redirect_url = b"/_synapse/client/sso_register" - - respond_with_redirect(request, redirect_url) + respond_with_redirect( + request, self._get_url_for_next_new_user_step(session=session) + ) async def handle_terms_accepted( self, request: Request, session_id: str, terms_version: str - ): + ) -> None: """Handle a request to the new-user 'consent' endpoint Will serve an HTTP response to the request. @@ -845,8 +886,9 @@ async def handle_terms_accepted( session.terms_accepted_version = terms_version - # we're done; now we can register the user - respond_with_redirect(request, b"/_synapse/client/sso_register") + respond_with_redirect( + request, self._get_url_for_next_new_user_step(session=session) + ) async def register_sso_user(self, request: Request, session_id: str) -> None: """Called once we have all the info we need to register a new user. @@ -923,7 +965,7 @@ async def register_sso_user(self, request: Request, session_id: str) -> None: new_user=True, ) - def _expire_old_sessions(self): + def _expire_old_sessions(self) -> None: to_expire = [] now = int(self._clock.time_msec()) diff --git a/synapse/handlers/state_deltas.py b/synapse/handlers/state_deltas.py index 077c7c064965..d30ba2b7248c 100644 --- a/synapse/handlers/state_deltas.py +++ b/synapse/handlers/state_deltas.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +from enum import Enum, auto from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: @@ -21,6 +22,12 @@ logger = logging.getLogger(__name__) +class MatchChange(Enum): + no_change = auto() + now_true = auto() + now_false = auto() + + class StateDeltasHandler: def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() @@ -31,18 +38,12 @@ async def _get_key_change( event_id: Optional[str], key_name: str, public_value: str, - ) -> Optional[bool]: + ) -> MatchChange: """Given two events check if the `key_name` field in content changed from not matching `public_value` to doing so. For example, check if `history_visibility` (`key_name`) changed from `shared` to `world_readable` (`public_value`). - - Returns: - None if the field in the events either both match `public_value` - or if neither do, i.e. there has been no change. - True if it didn't match `public_value` but now does - False if it did match `public_value` but now doesn't """ prev_event = None event = None @@ -54,7 +55,7 @@ async def _get_key_change( if not event and not prev_event: logger.debug("Neither event exists: %r %r", prev_event_id, event_id) - return None + return MatchChange.no_change prev_value = None value = None @@ -68,8 +69,8 @@ async def _get_key_change( logger.debug("prev_value: %r -> value: %r", prev_value, value) if value == public_value and prev_value != public_value: - return True + return MatchChange.now_true elif value != public_value and prev_value == public_value: - return False + return MatchChange.now_false else: - return None + return MatchChange.no_change diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 3fd89af2a4e2..bd3e6f2ec77b 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -18,7 +18,7 @@ from typing_extensions import Counter as CounterType -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventContentFields, EventTypes, Membership from synapse.metrics import event_processing_positions from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import JsonDict @@ -46,7 +46,7 @@ def __init__(self, hs: "HomeServer"): self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id - self.stats_enabled = hs.config.stats_enabled + self.stats_enabled = hs.config.stats.stats_enabled # The current position in the current_state_delta stream self.pos: Optional[int] = None @@ -54,7 +54,7 @@ def __init__(self, hs: "HomeServer"): # Guard to ensure we only process deltas one at a time self._is_processing = False - if self.stats_enabled and hs.config.run_background_tasks: + if self.stats_enabled and hs.config.worker.run_background_tasks: self.notifier.add_replication_callback(self.notify_new_event) # We kick this off so that we don't have to wait for a change before @@ -68,7 +68,7 @@ def notify_new_event(self) -> None: self._is_processing = True - async def process(): + async def process() -> None: try: await self._unsafe_process() finally: @@ -254,7 +254,7 @@ async def _handle_deltas( elif typ == EventTypes.Create: room_state["is_federatable"] = ( - event_content.get("m.federate", True) is True + event_content.get(EventContentFields.FEDERATE, True) is True ) elif typ == EventTypes.JoinRules: room_state["join_rules"] = event_content.get("join_rule") @@ -273,7 +273,9 @@ async def _handle_deltas( elif typ == EventTypes.CanonicalAlias: room_state["canonical_alias"] = event_content.get("alias") elif typ == EventTypes.GuestAccess: - room_state["guest_access"] = event_content.get("guest_access") + room_state["guest_access"] = event_content.get( + EventContentFields.GUEST_ACCESS + ) for room_id, state in room_to_state_updates.items(): logger.debug("Updating room_stats_state for %s: %s", room_id, state) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index f30bfcc93cf2..f3039c3c3fb7 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1,5 +1,4 @@ -# Copyright 2015, 2016 OpenMarket Ltd -# Copyright 2018, 2019 New Vector Ltd +# Copyright 2015-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,6 +30,8 @@ from synapse.api.constants import AccountDataTypes, EventTypes, Membership from synapse.api.filtering import FilterCollection +from synapse.api.presence import UserPresenceState +from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.events import EventBase from synapse.logging.context import current_context from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, start_active_span @@ -86,20 +87,20 @@ SyncRequestKey = Tuple[Any, ...] -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class SyncConfig: - user = attr.ib(type=UserID) - filter_collection = attr.ib(type=FilterCollection) - is_guest = attr.ib(type=bool) - request_key = attr.ib(type=SyncRequestKey) - device_id = attr.ib(type=Optional[str]) + user: UserID + filter_collection: FilterCollection + is_guest: bool + request_key: SyncRequestKey + device_id: Optional[str] -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class TimelineBatch: - prev_batch = attr.ib(type=StreamToken) - events = attr.ib(type=List[EventBase]) - limited = attr.ib(type=bool) + prev_batch: StreamToken + events: List[EventBase] + limited: bool def __bool__(self) -> bool: """Make the result appear empty if there are no updates. This is used @@ -113,16 +114,16 @@ def __bool__(self) -> bool: # if there are updates for it, which we check after the instance has been created. # This should not be a big deal because we update the notification counts afterwards as # well anyway. -@attr.s(slots=True) +@attr.s(slots=True, auto_attribs=True) class JoinedSyncResult: - room_id = attr.ib(type=str) - timeline = attr.ib(type=TimelineBatch) - state = attr.ib(type=StateMap[EventBase]) - ephemeral = attr.ib(type=List[JsonDict]) - account_data = attr.ib(type=List[JsonDict]) - unread_notifications = attr.ib(type=JsonDict) - summary = attr.ib(type=Optional[JsonDict]) - unread_count = attr.ib(type=int) + room_id: str + timeline: TimelineBatch + state: StateMap[EventBase] + ephemeral: List[JsonDict] + account_data: List[JsonDict] + unread_notifications: JsonDict + summary: Optional[JsonDict] + unread_count: int def __bool__(self) -> bool: """Make the result appear empty if there are no updates. This is used @@ -138,12 +139,12 @@ def __bool__(self) -> bool: ) -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class ArchivedSyncResult: - room_id = attr.ib(type=str) - timeline = attr.ib(type=TimelineBatch) - state = attr.ib(type=StateMap[EventBase]) - account_data = attr.ib(type=List[JsonDict]) + room_id: str + timeline: TimelineBatch + state: StateMap[EventBase] + account_data: List[JsonDict] def __bool__(self) -> bool: """Make the result appear empty if there are no updates. This is used @@ -152,37 +153,37 @@ def __bool__(self) -> bool: return bool(self.timeline or self.state or self.account_data) -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class InvitedSyncResult: - room_id = attr.ib(type=str) - invite = attr.ib(type=EventBase) + room_id: str + invite: EventBase def __bool__(self) -> bool: """Invited rooms should always be reported to the client""" return True -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class KnockedSyncResult: - room_id = attr.ib(type=str) - knock = attr.ib(type=EventBase) + room_id: str + knock: EventBase def __bool__(self) -> bool: """Knocked rooms should always be reported to the client""" return True -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class GroupsSyncResult: - join = attr.ib(type=JsonDict) - invite = attr.ib(type=JsonDict) - leave = attr.ib(type=JsonDict) + join: JsonDict + invite: JsonDict + leave: JsonDict def __bool__(self) -> bool: return bool(self.join or self.invite or self.leave) -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class DeviceLists: """ Attributes: @@ -190,27 +191,27 @@ class DeviceLists: left: List of user_ids whose devices we no longer track """ - changed = attr.ib(type=Collection[str]) - left = attr.ib(type=Collection[str]) + changed: Collection[str] + left: Collection[str] def __bool__(self) -> bool: return bool(self.changed or self.left) -@attr.s(slots=True) +@attr.s(slots=True, auto_attribs=True) class _RoomChanges: """The set of room entries to include in the sync, plus the set of joined and left room IDs since last sync. """ - room_entries = attr.ib(type=List["RoomSyncResultBuilder"]) - invited = attr.ib(type=List[InvitedSyncResult]) - knocked = attr.ib(type=List[KnockedSyncResult]) - newly_joined_rooms = attr.ib(type=List[str]) - newly_left_rooms = attr.ib(type=List[str]) + room_entries: List["RoomSyncResultBuilder"] + invited: List[InvitedSyncResult] + knocked: List[KnockedSyncResult] + newly_joined_rooms: List[str] + newly_left_rooms: List[str] -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class SyncResult: """ Attributes: @@ -230,18 +231,18 @@ class SyncResult: groups: Group updates, if any """ - next_batch = attr.ib(type=StreamToken) - presence = attr.ib(type=List[JsonDict]) - account_data = attr.ib(type=List[JsonDict]) - joined = attr.ib(type=List[JoinedSyncResult]) - invited = attr.ib(type=List[InvitedSyncResult]) - knocked = attr.ib(type=List[KnockedSyncResult]) - archived = attr.ib(type=List[ArchivedSyncResult]) - to_device = attr.ib(type=List[JsonDict]) - device_lists = attr.ib(type=DeviceLists) - device_one_time_keys_count = attr.ib(type=JsonDict) - device_unused_fallback_key_types = attr.ib(type=List[str]) - groups = attr.ib(type=Optional[GroupsSyncResult]) + next_batch: StreamToken + presence: List[UserPresenceState] + account_data: List[JsonDict] + joined: List[JoinedSyncResult] + invited: List[InvitedSyncResult] + knocked: List[KnockedSyncResult] + archived: List[ArchivedSyncResult] + to_device: List[JsonDict] + device_lists: DeviceLists + device_one_time_keys_count: JsonDict + device_unused_fallback_key_types: List[str] + groups: Optional[GroupsSyncResult] def __bool__(self) -> bool: """Make the result appear empty if there are no updates. This is used @@ -269,14 +270,22 @@ def __init__(self, hs: "HomeServer"): self.presence_handler = hs.get_presence_handler() self.event_sources = hs.get_event_sources() self.clock = hs.get_clock() - self.response_cache: ResponseCache[SyncRequestKey] = ResponseCache( - hs.get_clock(), "sync" - ) self.state = hs.get_state_handler() self.auth = hs.get_auth() self.storage = hs.get_storage() self.state_store = self.storage.state + # TODO: flush cache entries on subsequent sync request. + # Once we get the next /sync request (ie, one with the same access token + # that sets 'since' to 'next_batch'), we know that device won't need a + # cached result any more, and we could flush the entry from the cache to save + # memory. + self.response_cache: ResponseCache[SyncRequestKey] = ResponseCache( + hs.get_clock(), + "sync", + timeout_ms=hs.config.caches.sync_response_cache_duration, + ) + # ExpiringCache((User, Device)) -> LruCache(user_id => event_id) self.lazy_loaded_members_cache: ExpiringCache[ Tuple[str, Optional[str]], LruCache[str, str] @@ -325,6 +334,19 @@ async def _wait_for_sync_for_user( full_state: bool, cache_context: ResponseCacheContext[SyncRequestKey], ) -> SyncResult: + """The start of the machinery that produces a /sync response. + + See https://spec.matrix.org/v1.1/client-server-api/#syncing for full details. + + This method does high-level bookkeeping: + - tracking the kind of sync in the logging context + - deleting any to_device messages whose delivery has been acknowledged. + - deciding if we should dispatch an instant or delayed response + - marking the sync as being lazily loaded, if appropriate + + Computing the body of the response begins in the next method, + `current_sync_for_user`. + """ if since_token is None: sync_type = "initial_sync" elif full_state: @@ -354,8 +376,10 @@ async def _wait_for_sync_for_user( sync_config, since_token, full_state=full_state ) else: - - async def current_sync_callback(before_token, after_token) -> SyncResult: + # Otherwise, we wait for something to happen and report it to the user. + async def current_sync_callback( + before_token: StreamToken, after_token: StreamToken + ) -> SyncResult: return await self.current_sync_for_user(sync_config, since_token) result = await self.notifier.wait_for_events( @@ -391,7 +415,12 @@ async def current_sync_for_user( since_token: Optional[StreamToken] = None, full_state: bool = False, ) -> SyncResult: - """Get the sync for client needed to match what the server has now.""" + """Generates the response body of a sync result, represented as a SyncResult. + + This is a wrapper around `generate_sync_result` which starts an open tracing + span to track the sync. See `generate_sync_result` for the next part of your + indoctrination. + """ with start_active_span("current_sync_for_user"): log_kv({"since_token": since_token}) sync_result = await self.generate_sync_result( @@ -432,7 +461,7 @@ async def ephemeral_by_room( room_ids = sync_result_builder.joined_room_ids - typing_source = self.event_sources.sources["typing"] + typing_source = self.event_sources.sources.typing typing, typing_key = await typing_source.get_new_events( user=sync_config.user, from_key=typing_key, @@ -454,7 +483,7 @@ async def ephemeral_by_room( receipt_key = since_token.receipt_key if since_token else 0 - receipt_source = self.event_sources.sources["receipt"] + receipt_source = self.event_sources.sources.receipt receipts, receipt_key = await receipt_source.get_new_events( user=sync_config.user, from_key=receipt_key, @@ -496,10 +525,13 @@ async def _load_filtered_recents( else: limited = False + log_kv({"limited": limited}) + if potential_recents: - recents = sync_config.filter_collection.filter_room_timeline( + recents = await sync_config.filter_collection.filter_room_timeline( potential_recents ) + log_kv({"recents_after_sync_filtering": len(recents)}) # We check if there are any state events, if there are then we pass # all current state events to the filter_events function. This is to @@ -517,6 +549,7 @@ async def _load_filtered_recents( recents, always_include_ids=current_state_ids, ) + log_kv({"recents_after_visibility_filtering": len(recents)}) else: recents = [] @@ -545,7 +578,7 @@ async def _load_filtered_recents( # that have happened since `since_key` up to `end_key`, so we # can just use `get_room_events_stream_for_room`. # Otherwise, we want to return the last N events in the room - # in toplogical ordering. + # in topological ordering. if since_key: events, end_key = await self.store.get_room_events_stream_for_room( room_id, @@ -557,10 +590,15 @@ async def _load_filtered_recents( events, end_key = await self.store.get_recent_events_for_room( room_id, limit=load_limit + 1, end_token=end_key ) - loaded_recents = sync_config.filter_collection.filter_room_timeline( - events + + log_kv({"loaded_recents": len(events)}) + + loaded_recents = ( + await sync_config.filter_collection.filter_room_timeline(events) ) + log_kv({"loaded_recents_after_sync_filtering": len(loaded_recents)}) + # We check if there are any state events, if there are then we pass # all current state events to the filter_events function. This is to # ensure that we always include current state in the timeline @@ -577,6 +615,9 @@ async def _load_filtered_recents( loaded_recents, always_include_ids=current_state_ids, ) + + log_kv({"loaded_recents_after_client_filtering": len(loaded_recents)}) + loaded_recents.extend(recents) recents = loaded_recents @@ -693,7 +734,7 @@ async def compute_summary( name_id = state_ids.get((EventTypes.Name, "")) canonical_alias_id = state_ids.get((EventTypes.CanonicalAlias, "")) - summary = {} + summary: JsonDict = {} empty_ms = MemberSummary([], 0) # TODO: only send these when they change. @@ -992,7 +1033,7 @@ async def compute_state_delta( return { (e.type, e.state_key): e - for e in sync_config.filter_collection.filter_room_state( + for e in await sync_config.filter_collection.filter_room_state( list(state.values()) ) if e.type != EventTypes.Aliases # until MSC2261 or alternative solution @@ -1019,7 +1060,18 @@ async def generate_sync_result( since_token: Optional[StreamToken] = None, full_state: bool = False, ) -> SyncResult: - """Generates a sync result.""" + """Generates the response body of a sync result. + + This is represented by a `SyncResult` struct, which is built from small pieces + using a `SyncResultBuilder`. See also + https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync + the `sync_result_builder` is passed as a mutable ("inout") parameter to various + helper functions. These retrieve and process the data which forms the sync body, + often writing to the `sync_result_builder` to store their output. + + At the end, we transfer data from the `sync_result_builder` to a new `SyncResult` + instance to signify that the sync calculation is complete. + """ # NB: The now_token gets changed by some of the generate_sync_* methods, # this is due to some of the underlying streams not supporting the ability # to query up to a given point. @@ -1069,7 +1121,7 @@ async def generate_sync_result( block_all_presence_data = ( since_token is None and sync_config.filter_collection.blocks_all_presence() ) - if self.hs_config.use_presence and not block_all_presence_data: + if self.hs_config.server.use_presence and not block_all_presence_data: logger.debug("Fetching presence data") await self._generate_sync_entry_for_presence( sync_result_builder, @@ -1107,6 +1159,8 @@ async def generate_sync_result( logger.debug("Fetching group data") await self._generate_sync_entry_for_groups(sync_result_builder) + num_events = 0 + # debug for https://github.com/matrix-org/synapse/issues/4422 for joined_room in sync_result_builder.joined: room_id = joined_room.room_id @@ -1114,6 +1168,14 @@ async def generate_sync_result( issue4422_logger.debug( "Sync result for newly joined room %s: %r", room_id, joined_room ) + num_events += len(joined_room.timeline.events) + + log_kv( + { + "joined_rooms_in_result": len(sync_result_builder.joined), + "events_in_result": num_events, + } + ) logger.debug("Sync response calculation complete") return SyncResult( @@ -1311,14 +1373,22 @@ async def _generate_sync_entry_for_to_device( async def _generate_sync_entry_for_account_data( self, sync_result_builder: "SyncResultBuilder" ) -> Dict[str, Dict[str, JsonDict]]: - """Generates the account data portion of the sync response. Populates - `sync_result_builder` with the result. + """Generates the account data portion of the sync response. + + Account data (called "Client Config" in the spec) can be set either globally + or for a specific room. Account data consists of a list of events which + accumulate state, much like a room. + + This function retrieves global and per-room account data. The former is written + to the given `sync_result_builder`. The latter is returned directly, to be + later written to the `sync_result_builder` on a room-by-room basis. Args: sync_result_builder Returns: - A dictionary containing the per room account data. + A dictionary whose keys (room ids) map to the per room account data for that + room. """ sync_config = sync_result_builder.sync_config user_id = sync_result_builder.sync_config.user.to_string() @@ -1326,7 +1396,7 @@ async def _generate_sync_entry_for_account_data( if since_token and not sync_result_builder.full_state: ( - account_data, + global_account_data, account_data_by_room, ) = await self.store.get_updated_account_data_for_user( user_id, since_token.account_data_key @@ -1337,23 +1407,23 @@ async def _generate_sync_entry_for_account_data( ) if push_rules_changed: - account_data["m.push_rules"] = await self.push_rules_for_user( + global_account_data["m.push_rules"] = await self.push_rules_for_user( sync_config.user ) else: ( - account_data, + global_account_data, account_data_by_room, ) = await self.store.get_account_data_for_user(sync_config.user.to_string()) - account_data["m.push_rules"] = await self.push_rules_for_user( + global_account_data["m.push_rules"] = await self.push_rules_for_user( sync_config.user ) - account_data_for_user = sync_config.filter_collection.filter_account_data( + account_data_for_user = await sync_config.filter_collection.filter_account_data( [ {"type": account_data_type, "content": content} - for account_data_type, content in account_data.items() + for account_data_type, content in global_account_data.items() ] ) @@ -1382,7 +1452,7 @@ async def _generate_sync_entry_for_presence( sync_config = sync_result_builder.sync_config user = sync_result_builder.sync_config.user - presence_source = self.event_sources.sources["presence"] + presence_source = self.event_sources.sources.presence since_token = sync_result_builder.since_token presence_key = None @@ -1415,7 +1485,7 @@ async def _generate_sync_entry_for_presence( # Deduplicate the presence entries so that there's at most one per user presence = list({p.user_id: p for p in presence}.values()) - presence = sync_config.filter_collection.filter_presence(presence) + presence = await sync_config.filter_collection.filter_presence(presence) sync_result_builder.presence = presence @@ -1427,18 +1497,31 @@ async def _generate_sync_entry_for_rooms( """Generates the rooms portion of the sync response. Populates the `sync_result_builder` with the result. + In the response that reaches the client, rooms are divided into four categories: + `invite`, `join`, `knock`, `leave`. These aren't the same as the four sets of + room ids returned by this function. + Args: sync_result_builder account_data_by_room: Dictionary of per room account data Returns: - Returns a 4-tuple of - `(newly_joined_rooms, newly_joined_or_invited_users, - newly_left_rooms, newly_left_users)` + Returns a 4-tuple describing rooms the user has joined or left, and users who've + joined or left rooms any rooms the user is in. This gets used later in + `_generate_sync_entry_for_device_list`. + + Its entries are: + - newly_joined_rooms + - newly_joined_or_invited_or_knocked_users + - newly_left_rooms + - newly_left_users """ + since_token = sync_result_builder.since_token + + # 1. Start by fetching all ephemeral events in rooms we've joined (if required). user_id = sync_result_builder.sync_config.user.to_string() block_all_room_ephemeral = ( - sync_result_builder.since_token is None + since_token is None and sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral() ) @@ -1452,12 +1535,12 @@ async def _generate_sync_entry_for_rooms( ) sync_result_builder.now_token = now_token - # We check up front if anything has changed, if it hasn't then there is + # 2. We check up front if anything has changed, if it hasn't then there is # no point in going further. - since_token = sync_result_builder.since_token if not sync_result_builder.full_state: if since_token and not ephemeral_by_room and not account_data_by_room: have_changed = await self._have_rooms_changed(sync_result_builder) + log_kv({"rooms_have_changed": have_changed}) if not have_changed: tags_by_room = await self.store.get_updated_tags( user_id, since_token.account_data_key @@ -1466,20 +1549,8 @@ async def _generate_sync_entry_for_rooms( logger.debug("no-oping sync") return set(), set(), set(), set() - ignored_account_data = ( - await self.store.get_global_account_data_by_type_for_user( - AccountDataTypes.IGNORED_USER_LIST, user_id=user_id - ) - ) - - # If there is ignored users account data and it matches the proper type, - # then use it. - ignored_users: FrozenSet[str] = frozenset() - if ignored_account_data: - ignored_users_data = ignored_account_data.get("ignored_users", {}) - if isinstance(ignored_users_data, dict): - ignored_users = frozenset(ignored_users_data.keys()) - + # 3. Work out which rooms need reporting in the sync response. + ignored_users = await self._get_ignored_users(user_id) if since_token: room_changes = await self._get_rooms_changed( sync_result_builder, ignored_users @@ -1489,18 +1560,21 @@ async def _generate_sync_entry_for_rooms( ) else: room_changes = await self._get_all_rooms(sync_result_builder, ignored_users) - tags_by_room = await self.store.get_tags_for_user(user_id) + log_kv({"rooms_changed": len(room_changes.room_entries)}) + room_entries = room_changes.room_entries invited = room_changes.invited knocked = room_changes.knocked newly_joined_rooms = room_changes.newly_joined_rooms newly_left_rooms = room_changes.newly_left_rooms - async def handle_room_entries(room_entry): + # 4. We need to apply further processing to `room_entries` (rooms considered + # joined or archived). + async def handle_room_entries(room_entry: "RoomSyncResultBuilder") -> None: logger.debug("Generating room entry for %s", room_entry.room_id) - res = await self._generate_room_entry( + await self._generate_room_entry( sync_result_builder, ignored_users, room_entry, @@ -1510,38 +1584,19 @@ async def handle_room_entries(room_entry): always_include=sync_result_builder.full_state, ) logger.debug("Generated room entry for %s", room_entry.room_id) - return res await concurrently_execute(handle_room_entries, room_entries, 10) sync_result_builder.invited.extend(invited) sync_result_builder.knocked.extend(knocked) - # Now we want to get any newly joined, invited or knocking users - newly_joined_or_invited_or_knocked_users = set() - newly_left_users = set() - if since_token: - for joined_sync in sync_result_builder.joined: - it = itertools.chain( - joined_sync.timeline.events, joined_sync.state.values() - ) - for event in it: - if event.type == EventTypes.Member: - if ( - event.membership == Membership.JOIN - or event.membership == Membership.INVITE - or event.membership == Membership.KNOCK - ): - newly_joined_or_invited_or_knocked_users.add( - event.state_key - ) - else: - prev_content = event.unsigned.get("prev_content", {}) - prev_membership = prev_content.get("membership", None) - if prev_membership == Membership.JOIN: - newly_left_users.add(event.state_key) - - newly_left_users -= newly_joined_or_invited_or_knocked_users + # 5. Work out which users have joined or left rooms we're in. We use this + # to build the device_list part of the sync response in + # `_generate_sync_entry_for_device_list`. + ( + newly_joined_or_invited_or_knocked_users, + newly_left_users, + ) = sync_result_builder.calculate_user_changes() return ( set(newly_joined_rooms), @@ -1550,11 +1605,36 @@ async def handle_room_entries(room_entry): newly_left_users, ) + async def _get_ignored_users(self, user_id: str) -> FrozenSet[str]: + """Retrieve the users ignored by the given user from their global account_data. + + Returns an empty set if + - there is no global account_data entry for ignored_users + - there is such an entry, but it's not a JSON object. + """ + # TODO: Can we `SELECT ignored_user_id FROM ignored_users WHERE ignorer_user_id=?;` instead? + ignored_account_data = ( + await self.store.get_global_account_data_by_type_for_user( + AccountDataTypes.IGNORED_USER_LIST, user_id=user_id + ) + ) + + # If there is ignored users account data and it matches the proper type, + # then use it. + ignored_users: FrozenSet[str] = frozenset() + if ignored_account_data: + ignored_users_data = ignored_account_data.get("ignored_users", {}) + if isinstance(ignored_users_data, dict): + ignored_users = frozenset(ignored_users_data.keys()) + return ignored_users + async def _have_rooms_changed( self, sync_result_builder: "SyncResultBuilder" ) -> bool: """Returns whether there may be any new events that should be sent down the sync. Returns True if there are. + + Does not modify the `sync_result_builder`. """ user_id = sync_result_builder.sync_config.user.to_string() since_token = sync_result_builder.since_token @@ -1562,12 +1642,13 @@ async def _have_rooms_changed( assert since_token - # Get a list of membership change events that have happened. - rooms_changed = await self.store.get_membership_changes_for_user( + # Get a list of membership change events that have happened to the user + # requesting the sync. + membership_changes = await self.store.get_membership_changes_for_user( user_id, since_token.room_key, now_token.room_key ) - if rooms_changed: + if membership_changes: return True stream_id = since_token.room_key.stream @@ -1579,7 +1660,25 @@ async def _have_rooms_changed( async def _get_rooms_changed( self, sync_result_builder: "SyncResultBuilder", ignored_users: FrozenSet[str] ) -> _RoomChanges: - """Gets the the changes that have happened since the last sync.""" + """Determine the changes in rooms to report to the user. + + Ideally, we want to report all events whose stream ordering `s` lies in the + range `since_token < s <= now_token`, where the two tokens are read from the + sync_result_builder. + + If there are too many events in that range to report, things get complicated. + In this situation we return a truncated list of the most recent events, and + indicate in the response that there is a "gap" of omitted events. Additionally: + + - we include a "state_delta", to describe the changes in state over the gap, + - we include all membership events applying to the user making the request, + even those in the gap. + + See the spec for the rationale: + https://spec.matrix.org/v1.1/client-server-api/#syncing + + The sync_result_builder is not modified by this function. + """ user_id = sync_result_builder.sync_config.user.to_string() since_token = sync_result_builder.since_token now_token = sync_result_builder.now_token @@ -1587,21 +1686,36 @@ async def _get_rooms_changed( assert since_token - # Get a list of membership change events that have happened. - rooms_changed = await self.store.get_membership_changes_for_user( + # The spec + # https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync + # notes that membership events need special consideration: + # + # > When a sync is limited, the server MUST return membership events for events + # > in the gap (between since and the start of the returned timeline), regardless + # > as to whether or not they are redundant. + # + # We fetch such events here, but we only seem to use them for categorising rooms + # as newly joined, newly left, invited or knocked. + # TODO: we've already called this function and ran this query in + # _have_rooms_changed. We could keep the results in memory to avoid a + # second query, at the cost of more complicated source code. + membership_change_events = await self.store.get_membership_changes_for_user( user_id, since_token.room_key, now_token.room_key ) mem_change_events_by_room_id: Dict[str, List[EventBase]] = {} - for event in rooms_changed: + for event in membership_change_events: mem_change_events_by_room_id.setdefault(event.room_id, []).append(event) - newly_joined_rooms = [] - newly_left_rooms = [] - room_entries = [] - invited = [] - knocked = [] + newly_joined_rooms: List[str] = [] + newly_left_rooms: List[str] = [] + room_entries: List[RoomSyncResultBuilder] = [] + invited: List[InvitedSyncResult] = [] + knocked: List[KnockedSyncResult] = [] for room_id, events in mem_change_events_by_room_id.items(): + # The body of this loop will add this room to at least one of the five lists + # above. Things get messy if you've e.g. joined, left, joined then left the + # room all in the same sync period. logger.debug( "Membership changes in %s: [%s]", room_id, @@ -1656,6 +1770,7 @@ async def _get_rooms_changed( if not non_joins: continue + last_non_join = non_joins[-1] # Check if we have left the room. This can either be because we were # joined before *or* that we since joined and then left. @@ -1677,18 +1792,18 @@ async def _get_rooms_changed( newly_left_rooms.append(room_id) # Only bother if we're still currently invited - should_invite = non_joins[-1].membership == Membership.INVITE + should_invite = last_non_join.membership == Membership.INVITE if should_invite: - if event.sender not in ignored_users: - invite_room_sync = InvitedSyncResult(room_id, invite=non_joins[-1]) + if last_non_join.sender not in ignored_users: + invite_room_sync = InvitedSyncResult(room_id, invite=last_non_join) if invite_room_sync: invited.append(invite_room_sync) # Only bother if our latest membership in the room is knock (and we haven't # been accepted/rejected in the meantime). - should_knock = non_joins[-1].membership == Membership.KNOCK + should_knock = last_non_join.membership == Membership.KNOCK if should_knock: - knock_room_sync = KnockedSyncResult(room_id, knock=non_joins[-1]) + knock_room_sync = KnockedSyncResult(room_id, knock=last_non_join) if knock_room_sync: knocked.append(knock_room_sync) @@ -1746,7 +1861,9 @@ async def _get_rooms_changed( timeline_limit = sync_config.filter_collection.timeline_limit() - # Get all events for rooms we're currently joined to. + # Get all events since the `from_key` in rooms we're currently joined to. + # If there are too many, we get the most recent events only. This leaves + # a "gap" in the timeline, as described by the spec for /sync. room_to_events = await self.store.get_room_events_stream_for_rooms( room_ids=sync_result_builder.joined_room_ids, from_key=since_token.room_key, @@ -1807,6 +1924,10 @@ async def _get_all_rooms( ) -> _RoomChanges: """Returns entries for all rooms for the user. + Like `_get_rooms_changed`, but assumes the `since_token` is `None`. + + This function does not modify the sync_result_builder. + Args: sync_result_builder ignored_users: Set of users ignored by user. @@ -1818,16 +1939,9 @@ async def _get_all_rooms( now_token = sync_result_builder.now_token sync_config = sync_result_builder.sync_config - membership_list = ( - Membership.INVITE, - Membership.KNOCK, - Membership.JOIN, - Membership.LEAVE, - Membership.BAN, - ) - room_list = await self.store.get_rooms_for_local_user_where_membership_is( - user_id=user_id, membership_list=membership_list + user_id=user_id, + membership_list=Membership.LIST, ) room_entries = [] @@ -1835,6 +1949,9 @@ async def _get_all_rooms( knocked = [] for event in room_list: + if event.room_version_id not in KNOWN_ROOM_VERSIONS: + continue + if event.membership == Membership.JOIN: room_entries.append( RoomSyncResultBuilder( @@ -1888,7 +2005,7 @@ async def _generate_room_entry( tags: Optional[Dict[str, Dict[str, Any]]], account_data: Dict[str, JsonDict], always_include: bool = False, - ): + ) -> None: """Populates the `joined` and `archived` section of `sync_result_builder` based on the `room_builder`. @@ -1921,125 +2038,158 @@ async def _generate_room_entry( since_token = room_builder.since_token upto_token = room_builder.upto_token - batch = await self._load_filtered_recents( - room_id, - sync_config, - now_token=upto_token, - since_token=since_token, - potential_recents=events, - newly_joined_room=newly_joined, - ) + with start_active_span("generate_room_entry"): + set_tag("room_id", room_id) + log_kv({"events": len(events or ())}) - # Note: `batch` can be both empty and limited here in the case where - # `_load_filtered_recents` can't find any events the user should see - # (e.g. due to having ignored the sender of the last 50 events). + log_kv( + { + "since_token": since_token, + "upto_token": upto_token, + } + ) - if newly_joined: - # debug for https://github.com/matrix-org/synapse/issues/4422 - issue4422_logger.debug( - "Timeline events after filtering in newly-joined room %s: %r", + batch = await self._load_filtered_recents( room_id, - batch, + sync_config, + now_token=upto_token, + since_token=since_token, + potential_recents=events, + newly_joined_room=newly_joined, + ) + log_kv( + { + "batch_events": len(batch.events), + "prev_batch": batch.prev_batch, + "batch_limited": batch.limited, + } ) - # When we join the room (or the client requests full_state), we should - # send down any existing tags. Usually the user won't have tags in a - # newly joined room, unless either a) they've joined before or b) the - # tag was added by synapse e.g. for server notice rooms. - if full_state: - user_id = sync_result_builder.sync_config.user.to_string() - tags = await self.store.get_tags_for_room(user_id, room_id) - - # If there aren't any tags, don't send the empty tags list down - # sync - if not tags: - tags = None - - account_data_events = [] - if tags is not None: - account_data_events.append({"type": "m.tag", "content": {"tags": tags}}) + # Note: `batch` can be both empty and limited here in the case where + # `_load_filtered_recents` can't find any events the user should see + # (e.g. due to having ignored the sender of the last 50 events). - for account_data_type, content in account_data.items(): - account_data_events.append({"type": account_data_type, "content": content}) + if newly_joined: + # debug for https://github.com/matrix-org/synapse/issues/4422 + issue4422_logger.debug( + "Timeline events after filtering in newly-joined room %s: %r", + room_id, + batch, + ) - account_data_events = sync_config.filter_collection.filter_room_account_data( - account_data_events - ) + # When we join the room (or the client requests full_state), we should + # send down any existing tags. Usually the user won't have tags in a + # newly joined room, unless either a) they've joined before or b) the + # tag was added by synapse e.g. for server notice rooms. + if full_state: + user_id = sync_result_builder.sync_config.user.to_string() + tags = await self.store.get_tags_for_room(user_id, room_id) - ephemeral = sync_config.filter_collection.filter_room_ephemeral(ephemeral) + # If there aren't any tags, don't send the empty tags list down + # sync + if not tags: + tags = None - if not ( - always_include or batch or account_data_events or ephemeral or full_state - ): - return + account_data_events = [] + if tags is not None: + account_data_events.append({"type": "m.tag", "content": {"tags": tags}}) - state = await self.compute_state_delta( - room_id, batch, sync_config, since_token, now_token, full_state=full_state - ) + for account_data_type, content in account_data.items(): + account_data_events.append( + {"type": account_data_type, "content": content} + ) - summary: Optional[JsonDict] = {} - - # we include a summary in room responses when we're lazy loading - # members (as the client otherwise doesn't have enough info to form - # the name itself). - if sync_config.filter_collection.lazy_load_members() and ( - # we recalculate the summary: - # if there are membership changes in the timeline, or - # if membership has changed during a gappy sync, or - # if this is an initial sync. - any(ev.type == EventTypes.Member for ev in batch.events) - or ( - # XXX: this may include false positives in the form of LL - # members which have snuck into state - batch.limited - and any(t == EventTypes.Member for (t, k) in state) + account_data_events = ( + await sync_config.filter_collection.filter_room_account_data( + account_data_events + ) ) - or since_token is None - ): - summary = await self.compute_summary( - room_id, sync_config, batch, state, now_token + + ephemeral = await sync_config.filter_collection.filter_room_ephemeral( + ephemeral ) - if room_builder.rtype == "joined": - unread_notifications: Dict[str, int] = {} - room_sync = JoinedSyncResult( - room_id=room_id, - timeline=batch, - state=state, - ephemeral=ephemeral, - account_data=account_data_events, - unread_notifications=unread_notifications, - summary=summary, - unread_count=0, + if not ( + always_include + or batch + or account_data_events + or ephemeral + or full_state + ): + return + + state = await self.compute_state_delta( + room_id, + batch, + sync_config, + since_token, + now_token, + full_state=full_state, ) - if room_sync or always_include: - notifs = await self.unread_notifs_for_room_id(room_id, sync_config) + summary: Optional[JsonDict] = {} + + # we include a summary in room responses when we're lazy loading + # members (as the client otherwise doesn't have enough info to form + # the name itself). + if sync_config.filter_collection.lazy_load_members() and ( + # we recalculate the summary: + # if there are membership changes in the timeline, or + # if membership has changed during a gappy sync, or + # if this is an initial sync. + any(ev.type == EventTypes.Member for ev in batch.events) + or ( + # XXX: this may include false positives in the form of LL + # members which have snuck into state + batch.limited + and any(t == EventTypes.Member for (t, k) in state) + ) + or since_token is None + ): + summary = await self.compute_summary( + room_id, sync_config, batch, state, now_token + ) + + if room_builder.rtype == "joined": + unread_notifications: Dict[str, int] = {} + room_sync = JoinedSyncResult( + room_id=room_id, + timeline=batch, + state=state, + ephemeral=ephemeral, + account_data=account_data_events, + unread_notifications=unread_notifications, + summary=summary, + unread_count=0, + ) + + if room_sync or always_include: + notifs = await self.unread_notifs_for_room_id(room_id, sync_config) - unread_notifications["notification_count"] = notifs["notify_count"] - unread_notifications["highlight_count"] = notifs["highlight_count"] + unread_notifications["notification_count"] = notifs["notify_count"] + unread_notifications["highlight_count"] = notifs["highlight_count"] - room_sync.unread_count = notifs["unread_count"] + room_sync.unread_count = notifs["unread_count"] - sync_result_builder.joined.append(room_sync) + sync_result_builder.joined.append(room_sync) - if batch.limited and since_token: - user_id = sync_result_builder.sync_config.user.to_string() - logger.debug( - "Incremental gappy sync of %s for user %s with %d state events" - % (room_id, user_id, len(state)) + if batch.limited and since_token: + user_id = sync_result_builder.sync_config.user.to_string() + logger.debug( + "Incremental gappy sync of %s for user %s with %d state events" + % (room_id, user_id, len(state)) + ) + elif room_builder.rtype == "archived": + archived_room_sync = ArchivedSyncResult( + room_id=room_id, + timeline=batch, + state=state, + account_data=account_data_events, ) - elif room_builder.rtype == "archived": - archived_room_sync = ArchivedSyncResult( - room_id=room_id, - timeline=batch, - state=state, - account_data=account_data_events, - ) - if archived_room_sync or always_include: - sync_result_builder.archived.append(archived_room_sync) - else: - raise Exception("Unrecognized rtype: %r", room_builder.rtype) + if archived_room_sync or always_include: + sync_result_builder.archived.append(archived_room_sync) + else: + raise Exception("Unrecognized rtype: %r", room_builder.rtype) async def get_rooms_for_user_at( self, user_id: str, room_key: RoomStreamToken @@ -2068,21 +2218,23 @@ async def get_rooms_for_user_at( # If the membership's stream ordering is after the given stream # ordering, we need to go and work out if the user was in the room # before. - for room_id, event_pos in joined_rooms: - if not event_pos.persisted_after(room_key): - joined_room_ids.add(room_id) + for joined_room in joined_rooms: + if not joined_room.event_pos.persisted_after(room_key): + joined_room_ids.add(joined_room.room_id) continue - logger.info("User joined room after current token: %s", room_id) + logger.info("User joined room after current token: %s", joined_room.room_id) extrems = ( await self.store.get_forward_extremities_for_room_at_stream_ordering( - room_id, event_pos.stream + joined_room.room_id, joined_room.event_pos.stream ) ) - users_in_room = await self.state.get_current_users_in_room(room_id, extrems) + users_in_room = await self.state.get_current_users_in_room( + joined_room.room_id, extrems + ) if user_id in users_in_room: - joined_room_ids.add(room_id) + joined_room_ids.add(joined_room.room_id) return frozenset(joined_room_ids) @@ -2139,8 +2291,7 @@ def _calculate_state( # to only include membership events for the senders in the timeline. # In practice, we can do this by removing them from the p_ids list, # which is the list of relevant state we know we have already sent to the client. - # see https://github.com/matrix-org/synapse/pull/2970 - # /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 + # see https://github.com/matrix-org/synapse/pull/2970/files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 if lazy_load_members: p_ids.difference_update( @@ -2152,7 +2303,7 @@ def _calculate_state( return {event_id_to_key[e]: e for e in state_ids} -@attr.s(slots=True) +@attr.s(slots=True, auto_attribs=True) class SyncResultBuilder: """Used to help build up a new SyncResult for a user @@ -2164,33 +2315,66 @@ class SyncResultBuilder: joined_room_ids: List of rooms the user is joined to # The following mirror the fields in a sync response - presence (list) - account_data (list) - joined (list[JoinedSyncResult]) - invited (list[InvitedSyncResult]) - knocked (list[KnockedSyncResult]) - archived (list[ArchivedSyncResult]) - groups (GroupsSyncResult|None) - to_device (list) + presence + account_data + joined + invited + knocked + archived + groups + to_device """ - sync_config = attr.ib(type=SyncConfig) - full_state = attr.ib(type=bool) - since_token = attr.ib(type=Optional[StreamToken]) - now_token = attr.ib(type=StreamToken) - joined_room_ids = attr.ib(type=FrozenSet[str]) + sync_config: SyncConfig + full_state: bool + since_token: Optional[StreamToken] + now_token: StreamToken + joined_room_ids: FrozenSet[str] + + presence: List[UserPresenceState] = attr.Factory(list) + account_data: List[JsonDict] = attr.Factory(list) + joined: List[JoinedSyncResult] = attr.Factory(list) + invited: List[InvitedSyncResult] = attr.Factory(list) + knocked: List[KnockedSyncResult] = attr.Factory(list) + archived: List[ArchivedSyncResult] = attr.Factory(list) + groups: Optional[GroupsSyncResult] = None + to_device: List[JsonDict] = attr.Factory(list) + + def calculate_user_changes(self) -> Tuple[Set[str], Set[str]]: + """Work out which other users have joined or left rooms we are joined to. - presence = attr.ib(type=List[JsonDict], default=attr.Factory(list)) - account_data = attr.ib(type=List[JsonDict], default=attr.Factory(list)) - joined = attr.ib(type=List[JoinedSyncResult], default=attr.Factory(list)) - invited = attr.ib(type=List[InvitedSyncResult], default=attr.Factory(list)) - knocked = attr.ib(type=List[KnockedSyncResult], default=attr.Factory(list)) - archived = attr.ib(type=List[ArchivedSyncResult], default=attr.Factory(list)) - groups = attr.ib(type=Optional[GroupsSyncResult], default=None) - to_device = attr.ib(type=List[JsonDict], default=attr.Factory(list)) + This data only is only useful for an incremental sync. + + The SyncResultBuilder is not modified by this function. + """ + newly_joined_or_invited_or_knocked_users = set() + newly_left_users = set() + if self.since_token: + for joined_sync in self.joined: + it = itertools.chain( + joined_sync.timeline.events, joined_sync.state.values() + ) + for event in it: + if event.type == EventTypes.Member: + if ( + event.membership == Membership.JOIN + or event.membership == Membership.INVITE + or event.membership == Membership.KNOCK + ): + newly_joined_or_invited_or_knocked_users.add( + event.state_key + ) + else: + prev_content = event.unsigned.get("prev_content", {}) + prev_membership = prev_content.get("membership", None) + if prev_membership == Membership.JOIN: + newly_left_users.add(event.state_key) + + newly_left_users -= newly_joined_or_invited_or_knocked_users + return newly_joined_or_invited_or_knocked_users, newly_left_users -@attr.s(slots=True) +@attr.s(slots=True, auto_attribs=True) class RoomSyncResultBuilder: """Stores information needed to create either a `JoinedSyncResult` or `ArchivedSyncResult`. @@ -2206,10 +2390,10 @@ class RoomSyncResultBuilder: upto_token: Latest point to return events from. """ - room_id = attr.ib(type=str) - rtype = attr.ib(type=str) - events = attr.ib(type=Optional[List[EventBase]]) - newly_joined = attr.ib(type=bool) - full_state = attr.ib(type=bool) - since_token = attr.ib(type=Optional[StreamToken]) - upto_token = attr.ib(type=StreamToken) + room_id: str + rtype: str + events: Optional[List[EventBase]] + newly_joined: bool + full_state: bool + since_token: Optional[StreamToken] + upto_token: StreamToken diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 0cb651a40009..1676ebd057c1 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -23,6 +23,7 @@ wrap_as_background_process, ) from synapse.replication.tcp.streams import TypingStream +from synapse.streams import EventSource from synapse.types import JsonDict, Requester, UserID, get_domain_from_id from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.metrics import Measure @@ -53,7 +54,7 @@ class FollowerTypingHandler: def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() - self.server_name = hs.config.server_name + self.server_name = hs.config.server.server_name self.clock = hs.get_clock() self.is_mine_id = hs.is_mine_id @@ -61,8 +62,8 @@ def __init__(self, hs: "HomeServer"): if hs.should_send_federation(): self.federation = hs.get_federation_sender() - if hs.config.worker.writers.typing != hs.get_instance_name(): - hs.get_federation_registry().register_instance_for_edu( + if hs.get_instance_name() not in hs.config.worker.writers.typing: + hs.get_federation_registry().register_instances_for_edu( "m.typing", hs.config.worker.writers.typing, ) @@ -73,7 +74,7 @@ def __init__(self, hs: "HomeServer"): self._room_typing: Dict[str, Set[str]] = {} self._member_last_federation_poke: Dict[RoomMember, int] = {} - self.wheel_timer = WheelTimer(bucket_size=5000) + self.wheel_timer: WheelTimer[RoomMember] = WheelTimer(bucket_size=5000) self._latest_room_serial = 0 self.clock.looping_call(self._handle_timeouts, 5000) @@ -89,7 +90,7 @@ def _reset(self) -> None: self.wheel_timer = WheelTimer(bucket_size=5000) @wrap_as_background_process("typing._handle_timeouts") - def _handle_timeouts(self) -> None: + async def _handle_timeouts(self) -> None: logger.debug("Checking for typing timeouts") now = self.clock.time_msec() @@ -204,7 +205,7 @@ class TypingWriterHandler(FollowerTypingHandler): def __init__(self, hs: "HomeServer"): super().__init__(hs) - assert hs.config.worker.writers.typing == hs.get_instance_name() + assert hs.get_instance_name() in hs.config.worker.writers.typing self.auth = hs.get_auth() self.notifier = hs.get_notifier() @@ -335,7 +336,8 @@ async def _recv_edu(self, origin: str, content: JsonDict) -> None: ) if not is_in_room: logger.info( - "Ignoring typing update from %s as we're not in the room", + "Ignoring typing update for room %r from server %s as we're not in the room", + room_id, origin, ) return @@ -438,7 +440,7 @@ def process_replication_rows( raise Exception("Typing writer instance got typing info over replication") -class TypingNotificationEventSource: +class TypingNotificationEventSource(EventSource[int, JsonDict]): def __init__(self, hs: "HomeServer"): self.hs = hs self.clock = hs.get_clock() @@ -463,17 +465,23 @@ async def get_new_events_as( may be interested in. Args: - from_key: the stream position at which events should be fetched from - service: The appservice which may be interested + from_key: the stream position at which events should be fetched from. + service: The appservice which may be interested. + + Returns: + A two-tuple containing the following: + * A list of json dictionaries derived from typing events that the + appservice may be interested in. + * The latest known room serial. """ with Measure(self.clock, "typing.get_new_events_as"): - from_key = int(from_key) handler = self.get_typing_handler() events = [] for room_id in handler._room_serials.keys(): if handler._room_serials[room_id] <= from_key: continue + if not await service.matches_user_in_member_list( room_id, handler.store ): @@ -481,10 +489,16 @@ async def get_new_events_as( events.append(self._make_event_for(room_id)) - return (events, handler._latest_room_serial) + return events, handler._latest_room_serial async def get_new_events( - self, from_key: int, room_ids: Iterable[str], **kwargs + self, + user: UserID, + from_key: int, + limit: Optional[int], + room_ids: Iterable[str], + is_guest: bool, + explicit_room_id: Optional[str] = None, ) -> Tuple[List[JsonDict], int]: with Measure(self.clock, "typing.get_new_events"): from_key = int(from_key) @@ -499,7 +513,7 @@ async def get_new_events( events.append(self._make_event_for(room_id)) - return (events, handler._latest_room_serial) + return events, handler._latest_room_serial def get_current_key(self) -> int: return self.get_typing_handler()._latest_room_serial diff --git a/synapse/handlers/ui_auth/__init__.py b/synapse/handlers/ui_auth/__init__.py index 4c3b669faeef..13b0c61d2e20 100644 --- a/synapse/handlers/ui_auth/__init__.py +++ b/synapse/handlers/ui_auth/__init__.py @@ -34,3 +34,8 @@ class UIAuthSessionDataConstants: # used by validate_user_via_ui_auth to store the mxid of the user we are validating # for. REQUEST_USER_ID = "request_user_id" + + # used during registration to store the registration token used (if required) so that: + # - we can prevent a token being used twice by one session + # - we can 'use up' the token after registration has successfully completed + REGISTRATION_TOKEN = "org.matrix.msc3231.login.registration_token" diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py index 5414ce77d83c..184730ebe8a4 100644 --- a/synapse/handlers/ui_auth/checkers.py +++ b/synapse/handlers/ui_auth/checkers.py @@ -49,7 +49,7 @@ async def check_auth(self, authdict: dict, clientip: str) -> Any: clientip: The IP address of the client. Raises: - SynapseError if authentication failed + LoginError if authentication failed. Returns: The result of authentication (to pass back to the client?) @@ -70,7 +70,7 @@ async def check_auth(self, authdict: dict, clientip: str) -> Any: class TermsAuthChecker(UserInteractiveAuthChecker): AUTH_TYPE = LoginType.TERMS - def is_enabled(self): + def is_enabled(self) -> bool: return True async def check_auth(self, authdict: dict, clientip: str) -> Any: @@ -82,10 +82,10 @@ class RecaptchaAuthChecker(UserInteractiveAuthChecker): def __init__(self, hs: "HomeServer"): super().__init__(hs) - self._enabled = bool(hs.config.recaptcha_private_key) + self._enabled = bool(hs.config.captcha.recaptcha_private_key) self._http_client = hs.get_proxied_http_client() - self._url = hs.config.recaptcha_siteverify_api - self._secret = hs.config.recaptcha_private_key + self._url = hs.config.captcha.recaptcha_siteverify_api + self._secret = hs.config.captcha.recaptcha_private_key def is_enabled(self) -> bool: return self._enabled @@ -131,7 +131,9 @@ async def check_auth(self, authdict: dict, clientip: str) -> Any: ) if resp_body["success"]: return True - raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) + raise LoginError( + 401, "Captcha authentication failed", errcode=Codes.UNAUTHORIZED + ) class _BaseThreepidAuthChecker: @@ -151,20 +153,27 @@ async def _check_threepid(self, medium: str, authdict: dict) -> dict: # msisdns are currently always ThreepidBehaviour.REMOTE if medium == "msisdn": - if not self.hs.config.account_threepid_delegate_msisdn: + if not self.hs.config.registration.account_threepid_delegate_msisdn: raise SynapseError( 400, "Phone number verification is not enabled on this homeserver" ) threepid = await identity_handler.threepid_from_creds( - self.hs.config.account_threepid_delegate_msisdn, threepid_creds + self.hs.config.registration.account_threepid_delegate_msisdn, + threepid_creds, ) elif medium == "email": - if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - assert self.hs.config.account_threepid_delegate_email + if ( + self.hs.config.email.threepid_behaviour_email + == ThreepidBehaviour.REMOTE + ): + assert self.hs.config.registration.account_threepid_delegate_email threepid = await identity_handler.threepid_from_creds( - self.hs.config.account_threepid_delegate_email, threepid_creds + self.hs.config.registration.account_threepid_delegate_email, + threepid_creds, ) - elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + elif ( + self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL + ): threepid = None row = await self.store.get_threepid_validation_session( medium, @@ -191,7 +200,9 @@ async def _check_threepid(self, medium: str, authdict: dict) -> dict: raise AssertionError("Unrecognized threepid medium: %s" % (medium,)) if not threepid: - raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) + raise LoginError( + 401, "Unable to get validated threepid", errcode=Codes.UNAUTHORIZED + ) if threepid["medium"] != medium: raise LoginError( @@ -214,7 +225,7 @@ def __init__(self, hs: "HomeServer"): _BaseThreepidAuthChecker.__init__(self, hs) def is_enabled(self) -> bool: - return self.hs.config.threepid_behaviour_email in ( + return self.hs.config.email.threepid_behaviour_email in ( ThreepidBehaviour.REMOTE, ThreepidBehaviour.LOCAL, ) @@ -231,17 +242,82 @@ def __init__(self, hs: "HomeServer"): _BaseThreepidAuthChecker.__init__(self, hs) def is_enabled(self) -> bool: - return bool(self.hs.config.account_threepid_delegate_msisdn) + return bool(self.hs.config.registration.account_threepid_delegate_msisdn) async def check_auth(self, authdict: dict, clientip: str) -> Any: return await self._check_threepid("msisdn", authdict) +class RegistrationTokenAuthChecker(UserInteractiveAuthChecker): + AUTH_TYPE = LoginType.REGISTRATION_TOKEN + + def __init__(self, hs: "HomeServer"): + super().__init__(hs) + self.hs = hs + self._enabled = bool(hs.config.registration.registration_requires_token) + self.store = hs.get_datastore() + + def is_enabled(self) -> bool: + return self._enabled + + async def check_auth(self, authdict: dict, clientip: str) -> Any: + if "token" not in authdict: + raise LoginError(400, "Missing registration token", Codes.MISSING_PARAM) + if not isinstance(authdict["token"], str): + raise LoginError( + 400, "Registration token must be a string", Codes.INVALID_PARAM + ) + if "session" not in authdict: + raise LoginError(400, "Missing UIA session", Codes.MISSING_PARAM) + + # Get these here to avoid cyclic dependencies + from synapse.handlers.ui_auth import UIAuthSessionDataConstants + + auth_handler = self.hs.get_auth_handler() + + session = authdict["session"] + token = authdict["token"] + + # If the LoginType.REGISTRATION_TOKEN stage has already been completed, + # return early to avoid incrementing `pending` again. + stored_token = await auth_handler.get_session_data( + session, UIAuthSessionDataConstants.REGISTRATION_TOKEN + ) + if stored_token: + if token != stored_token: + raise LoginError( + 400, "Registration token has changed", Codes.INVALID_PARAM + ) + else: + return token + + if await self.store.registration_token_is_valid(token): + # Increment pending counter, so that if token has limited uses it + # can't be used up by someone else in the meantime. + await self.store.set_registration_token_pending(token) + # Store the token in the UIA session, so that once registration + # is complete `completed` can be incremented. + await auth_handler.set_session_data( + session, + UIAuthSessionDataConstants.REGISTRATION_TOKEN, + token, + ) + # The token will be stored as the result of the authentication stage + # in ui_auth_sessions_credentials. This allows the pending counter + # for tokens to be decremented when expired sessions are deleted. + return token + else: + raise LoginError( + 401, "Invalid registration token", errcode=Codes.UNAUTHORIZED + ) + + INTERACTIVE_AUTH_CHECKERS = [ DummyAuthChecker, TermsAuthChecker, RecaptchaAuthChecker, EmailIdentityAuthChecker, MsisdnAuthChecker, + RegistrationTokenAuthChecker, ] """A list of UserInteractiveAuthChecker classes""" diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 6edb1da50a98..a0eb45446f56 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -17,7 +17,7 @@ import synapse.metrics from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules, Membership -from synapse.handlers.state_deltas import StateDeltasHandler +from synapse.handlers.state_deltas import MatchChange, StateDeltasHandler from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.roommember import ProfileInfo from synapse.types import JsonDict @@ -30,14 +30,26 @@ class UserDirectoryHandler(StateDeltasHandler): - """Handles querying of and keeping updated the user_directory. + """Handles queries and updates for the user_directory. N.B.: ASSUMES IT IS THE ONLY THING THAT MODIFIES THE USER DIRECTORY - The user directory is filled with users who this server can see are joined to a - world_readable or publicly joinable room. We keep a database table up to date - by streaming changes of the current state and recalculating whether users should - be in the directory or not when necessary. + When a local user searches the user_directory, we report two kinds of users: + + - users this server can see are joined to a world_readable or publicly + joinable room, and + - users belonging to a private room shared by that local user. + + The two cases are tracked separately in the `users_in_public_rooms` and + `users_who_share_private_rooms` tables. Both kinds of users have their + username and avatar tracked in a `user_directory` table. + + This handler has three responsibilities: + 1. Forwarding requests to `/user_directory/search` to the UserDirectoryStore. + 2. Providing hooks for the application to call when local users are added, + removed, or have their profile changed. + 3. Listening for room state changes that indicate remote users have + joined or left a room, or that their profile has changed. """ def __init__(self, hs: "HomeServer"): @@ -48,8 +60,8 @@ def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id - self.update_user_directory = hs.config.update_user_directory - self.search_all_users = hs.config.user_directory_search_all_users + self.update_user_directory = hs.config.server.update_user_directory + self.search_all_users = hs.config.userdirectory.user_directory_search_all_users self.spam_checker = hs.get_spam_checker() # The current position in the current_state_delta stream self.pos: Optional[int] = None @@ -102,7 +114,7 @@ def notify_new_event(self) -> None: if self._is_processing: return - async def process(): + async def process() -> None: try: await self._unsafe_process() finally: @@ -120,17 +132,12 @@ async def handle_local_profile_change( # FIXME(#3714): We should probably do this in the same worker as all # the other changes. - # Support users are for diagnostics and should not appear in the user directory. - is_support = await self.store.is_support_user(user_id) - # When change profile information of deactivated user it should not appear in the user directory. - is_deactivated = await self.store.get_user_deactivated_status(user_id) - - if not (is_support or is_deactivated): + if await self.store.should_include_local_user_in_dir(user_id): await self.store.update_profile_in_user_dir( user_id, profile.display_name, profile.avatar_url ) - async def handle_user_deactivated(self, user_id: str) -> None: + async def handle_local_user_deactivated(self, user_id: str) -> None: """Called when a user ID is deactivated""" # FIXME(#3714): We should probably do this in the same worker as all # the other changes. @@ -189,58 +196,12 @@ async def _handle_deltas(self, deltas: List[Dict[str, Any]]) -> None: room_id, prev_event_id, event_id, typ ) elif typ == EventTypes.Member: - change = await self._get_key_change( + await self._handle_room_membership_event( + room_id, prev_event_id, event_id, - key_name="membership", - public_value=Membership.JOIN, + state_key, ) - - if change is False: - # Need to check if the server left the room entirely, if so - # we might need to remove all the users in that room - is_in_room = await self.store.is_host_joined( - room_id, self.server_name - ) - if not is_in_room: - logger.debug("Server left room: %r", room_id) - # Fetch all the users that we marked as being in user - # directory due to being in the room and then check if - # need to remove those users or not - user_ids = await self.store.get_users_in_dir_due_to_room( - room_id - ) - - for user_id in user_ids: - await self._handle_remove_user(room_id, user_id) - return - else: - logger.debug("Server is still in room: %r", room_id) - - is_support = await self.store.is_support_user(state_key) - if not is_support: - if change is None: - # Handle any profile changes - await self._handle_profile_change( - state_key, room_id, prev_event_id, event_id - ) - continue - - if change: # The user joined - event = await self.store.get_event(event_id, allow_none=True) - # It isn't expected for this event to not exist, but we - # don't want the entire background process to break. - if event is None: - continue - - profile = ProfileInfo( - avatar_url=event.content.get("avatar_url"), - display_name=event.content.get("displayname"), - ) - - await self._handle_new_user(room_id, state_key, profile) - else: # The user left - await self._handle_remove_user(room_id, state_key) else: logger.debug("Ignoring irrelevant type: %r", typ) @@ -263,14 +224,14 @@ async def _handle_room_publicity_change( logger.debug("Handling change for %s: %s", typ, room_id) if typ == EventTypes.RoomHistoryVisibility: - change = await self._get_key_change( + publicness = await self._get_key_change( prev_event_id, event_id, key_name="history_visibility", public_value=HistoryVisibility.WORLD_READABLE, ) elif typ == EventTypes.JoinRules: - change = await self._get_key_change( + publicness = await self._get_key_change( prev_event_id, event_id, key_name="join_rule", @@ -278,9 +239,7 @@ async def _handle_room_publicity_change( ) else: raise Exception("Invalid event type") - # If change is None, no change. True => become world_readable/public, - # False => was world_readable/public - if change is None: + if publicness is MatchChange.no_change: logger.debug("No change") return @@ -290,108 +249,185 @@ async def _handle_room_publicity_change( room_id ) - logger.debug("Change: %r, is_public: %r", change, is_public) + logger.debug("Publicness change: %r, is_public: %r", publicness, is_public) - if change and not is_public: + if publicness is MatchChange.now_true and not is_public: # If we became world readable but room isn't currently public then # we ignore the change return - elif not change and is_public: + elif publicness is MatchChange.now_false and is_public: # If we stopped being world readable but are still public, # ignore the change return - other_users_in_room_with_profiles = ( - await self.store.get_users_in_room_with_profiles(room_id) - ) + users_in_room = await self.store.get_users_in_room(room_id) # Remove every user from the sharing tables for that room. - for user_id in other_users_in_room_with_profiles.keys(): + for user_id in users_in_room: await self.store.remove_user_who_share_room(user_id, room_id) - # Then, re-add them to the tables. - # NOTE: this is not the most efficient method, as handle_new_user sets + # Then, re-add all remote users and some local users to the tables. + # NOTE: this is not the most efficient method, as _track_user_joined_room sets # up local_user -> other_user and other_user_whos_local -> local_user, # which when ran over an entire room, will result in the same values # being added multiple times. The batching upserts shouldn't make this # too bad, though. - for user_id, profile in other_users_in_room_with_profiles.items(): - await self._handle_new_user(room_id, user_id, profile) + for user_id in users_in_room: + if not self.is_mine_id( + user_id + ) or await self.store.should_include_local_user_in_dir(user_id): + await self._track_user_joined_room(room_id, user_id) - async def _handle_new_user( - self, room_id: str, user_id: str, profile: ProfileInfo + async def _handle_room_membership_event( + self, + room_id: str, + prev_event_id: str, + event_id: str, + state_key: str, ) -> None: - """Called when we might need to add user to directory + """Process a single room membershp event. + + We have to do two things: + + 1. Update the room-sharing tables. + This applies to remote users and non-excluded local users. + 2. Update the user_directory and user_directory_search tables. + This applies to remote users only, because we only become aware of + the (and any profile changes) by listening to these events. + The rest of the application knows exactly when local users are + created or their profile changed---it will directly call methods + on this class. + """ + joined = await self._get_key_change( + prev_event_id, + event_id, + key_name="membership", + public_value=Membership.JOIN, + ) - Args: - room_id: The room ID that user joined or started being public - user_id + # Both cases ignore excluded local users, so start by discarding them. + is_remote = not self.is_mine_id(state_key) + if not is_remote and not await self.store.should_include_local_user_in_dir( + state_key + ): + return + + if joined is MatchChange.now_false: + # Need to check if the server left the room entirely, if so + # we might need to remove all the users in that room + is_in_room = await self.store.is_host_joined(room_id, self.server_name) + if not is_in_room: + logger.debug("Server left room: %r", room_id) + # Fetch all the users that we marked as being in user + # directory due to being in the room and then check if + # need to remove those users or not + user_ids = await self.store.get_users_in_dir_due_to_room(room_id) + + for user_id in user_ids: + await self._handle_remove_user(room_id, user_id) + else: + logger.debug("Server is still in room: %r", room_id) + await self._handle_remove_user(room_id, state_key) + elif joined is MatchChange.no_change: + # Handle any profile changes for remote users. + # (For local users the rest of the application calls + # `handle_local_profile_change`.) + if is_remote: + await self._handle_possible_remote_profile_change( + state_key, room_id, prev_event_id, event_id + ) + elif joined is MatchChange.now_true: # The user joined + # This may be the first time we've seen a remote user. If + # so, ensure we have a directory entry for them. (For local users, + # the rest of the application calls `handle_local_profile_change`.) + if is_remote: + await self._upsert_directory_entry_for_remote_user(state_key, event_id) + await self._track_user_joined_room(room_id, state_key) + + async def _upsert_directory_entry_for_remote_user( + self, user_id: str, event_id: str + ) -> None: + """A remote user has just joined a room. Ensure they have an entry in + the user directory. The caller is responsible for making sure they're + remote. """ + event = await self.store.get_event(event_id, allow_none=True) + # It isn't expected for this event to not exist, but we + # don't want the entire background process to break. + if event is None: + return + logger.debug("Adding new user to dir, %r", user_id) await self.store.update_profile_in_user_dir( - user_id, profile.display_name, profile.avatar_url + user_id, event.content.get("displayname"), event.content.get("avatar_url") ) + async def _track_user_joined_room(self, room_id: str, user_id: str) -> None: + """Someone's just joined a room. Update `users_in_public_rooms` or + `users_who_share_private_rooms` as appropriate. + + The caller is responsible for ensuring that the given user should be + included in the user directory. + """ is_public = await self.store.is_room_world_readable_or_publicly_joinable( room_id ) - # Now we update users who share rooms with users. - other_users_in_room = await self.store.get_users_in_room(room_id) - if is_public: await self.store.add_users_in_public_rooms(room_id, (user_id,)) else: + users_in_room = await self.store.get_users_in_room(room_id) + other_users_in_room = [ + other + for other in users_in_room + if other != user_id + and ( + not self.is_mine_id(other) + or await self.store.should_include_local_user_in_dir(other) + ) + ] to_insert = set() # First, if they're our user then we need to update for every user if self.is_mine_id(user_id): - - is_appservice = self.store.get_if_app_services_interested_in_user( - user_id - ) - - # We don't care about appservice users. - if not is_appservice: - for other_user_id in other_users_in_room: - if user_id == other_user_id: - continue - - to_insert.add((user_id, other_user_id)) + for other_user_id in other_users_in_room: + to_insert.add((user_id, other_user_id)) # Next we need to update for every local user in the room for other_user_id in other_users_in_room: - if user_id == other_user_id: - continue - - is_appservice = self.store.get_if_app_services_interested_in_user( - other_user_id - ) - if self.is_mine_id(other_user_id) and not is_appservice: + if self.is_mine_id(other_user_id): to_insert.add((other_user_id, user_id)) if to_insert: await self.store.add_users_who_share_private_room(room_id, to_insert) async def _handle_remove_user(self, room_id: str, user_id: str) -> None: - """Called when we might need to remove user from directory + """Called when when someone leaves a room. The user may be local or remote. + + (If the person who left was the last local user in this room, the server + is no longer in the room. We call this function to forget that the remaining + remote users are in the room, even though they haven't left. So the name is + a little misleading!) Args: room_id: The room ID that user left or stopped being public that user_id """ - logger.debug("Removing user %r", user_id) + logger.debug("Removing user %r from room %r", user_id, room_id) # Remove user from sharing tables await self.store.remove_user_who_share_room(user_id, room_id) - # Are they still in any rooms? If not, remove them entirely. - rooms_user_is_in = await self.store.get_user_dir_rooms_user_is_in(user_id) + # Additionally, if they're a remote user and we're no longer joined + # to any rooms they're in, remove them from the user directory. + if not self.is_mine_id(user_id): + rooms_user_is_in = await self.store.get_user_dir_rooms_user_is_in(user_id) - if len(rooms_user_is_in) == 0: - await self.store.remove_from_user_dir(user_id) + if len(rooms_user_is_in) == 0: + logger.debug("Removing user %r from directory", user_id) + await self.store.remove_from_user_dir(user_id) - async def _handle_profile_change( + async def _handle_possible_remote_profile_change( self, user_id: str, room_id: str, @@ -399,7 +435,8 @@ async def _handle_profile_change( event_id: Optional[str], ) -> None: """Check member event changes for any profile changes and update the - database if there are. + database if there are. This is intended for remote users only. The caller + is responsible for checking that the given user is remote. """ if not prev_event_id or not event_id: return diff --git a/synapse/http/additional_resource.py b/synapse/http/additional_resource.py index 55ea97a07f29..9a2684aca432 100644 --- a/synapse/http/additional_resource.py +++ b/synapse/http/additional_resource.py @@ -12,8 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import TYPE_CHECKING + +from twisted.web.server import Request + from synapse.http.server import DirectServeJsonResource +if TYPE_CHECKING: + from synapse.server import HomeServer + class AdditionalResource(DirectServeJsonResource): """Resource wrapper for additional_resources @@ -25,7 +32,7 @@ class AdditionalResource(DirectServeJsonResource): and exception handling. """ - def __init__(self, hs, handler): + def __init__(self, hs: "HomeServer", handler): """Initialise AdditionalResource The ``handler`` should return a deferred which completes when it has @@ -33,14 +40,14 @@ def __init__(self, hs, handler): ``request.write()``, and call ``request.finish()``. Args: - hs (synapse.server.HomeServer): homeserver + hs: homeserver handler ((twisted.web.server.Request) -> twisted.internet.defer.Deferred): function to be called to handle the request. """ super().__init__() self._handler = handler - def _async_render(self, request): + def _async_render(self, request: Request): # Cheekily pass the result straight through, so we don't need to worry # if its an awaitable or not. return self._handler(request) diff --git a/synapse/http/client.py b/synapse/http/client.py index c2ea51ee162b..b5a2d333a6ce 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -321,8 +321,11 @@ def __init__( self.user_agent = hs.version_string self.clock = hs.get_clock() - if hs.config.user_agent_suffix: - self.user_agent = "%s %s" % (self.user_agent, hs.config.user_agent_suffix) + if hs.config.server.user_agent_suffix: + self.user_agent = "%s %s" % ( + self.user_agent, + hs.config.server.user_agent_suffix, + ) # We use this for our body producers to ensure that they use the correct # reactor. @@ -909,7 +912,7 @@ class InsecureInterceptableContextFactory(ssl.ContextFactory): def __init__(self): self._context = SSL.Context(SSL.SSLv23_METHOD) - self._context.set_verify(VERIFY_NONE, lambda *_: None) + self._context.set_verify(VERIFY_NONE, lambda *_: False) def getContext(self, hostname=None, port=None): return self._context diff --git a/synapse/http/connectproxyclient.py b/synapse/http/connectproxyclient.py index 17e1c5abb13d..fbafffd69bd6 100644 --- a/synapse/http/connectproxyclient.py +++ b/synapse/http/connectproxyclient.py @@ -12,8 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import base64 import logging +from typing import Optional +import attr from zope.interface import implementer from twisted.internet import defer, protocol @@ -21,7 +24,6 @@ from twisted.internet.interfaces import IReactorCore, IStreamClientEndpoint from twisted.internet.protocol import ClientFactory, Protocol, connectionDone from twisted.web import http -from twisted.web.http_headers import Headers logger = logging.getLogger(__name__) @@ -30,6 +32,22 @@ class ProxyConnectError(ConnectError): pass +@attr.s +class ProxyCredentials: + username_password = attr.ib(type=bytes) + + def as_proxy_authorization_value(self) -> bytes: + """ + Return the value for a Proxy-Authorization header (i.e. 'Basic abdef=='). + + Returns: + A transformation of the authentication string the encoded value for + a Proxy-Authorization header. + """ + # Encode as base64 and prepend the authorization type + return b"Basic " + base64.encodebytes(self.username_password) + + @implementer(IStreamClientEndpoint) class HTTPConnectProxyEndpoint: """An Endpoint implementation which will send a CONNECT request to an http proxy @@ -46,7 +64,7 @@ class HTTPConnectProxyEndpoint: proxy_endpoint: the endpoint to use to connect to the proxy host: hostname that we want to CONNECT to port: port that we want to connect to - headers: Extra HTTP headers to include in the CONNECT request + proxy_creds: credentials to authenticate at proxy """ def __init__( @@ -55,20 +73,24 @@ def __init__( proxy_endpoint: IStreamClientEndpoint, host: bytes, port: int, - headers: Headers, + proxy_creds: Optional[ProxyCredentials], ): self._reactor = reactor self._proxy_endpoint = proxy_endpoint self._host = host self._port = port - self._headers = headers + self._proxy_creds = proxy_creds def __repr__(self): return "" % (self._proxy_endpoint,) - def connect(self, protocolFactory: ClientFactory): + # Mypy encounters a false positive here: it complains that ClientFactory + # is incompatible with IProtocolFactory. But ClientFactory inherits from + # Factory, which implements IProtocolFactory. So I think this is a bug + # in mypy-zope. + def connect(self, protocolFactory: ClientFactory): # type: ignore[override] f = HTTPProxiedClientFactory( - self._host, self._port, protocolFactory, self._headers + self._host, self._port, protocolFactory, self._proxy_creds ) d = self._proxy_endpoint.connect(f) # once the tcp socket connects successfully, we need to wait for the @@ -87,7 +109,7 @@ class HTTPProxiedClientFactory(protocol.ClientFactory): dst_host: hostname that we want to CONNECT to dst_port: port that we want to connect to wrapped_factory: The original Factory - headers: Extra HTTP headers to include in the CONNECT request + proxy_creds: credentials to authenticate at proxy """ def __init__( @@ -95,26 +117,28 @@ def __init__( dst_host: bytes, dst_port: int, wrapped_factory: ClientFactory, - headers: Headers, + proxy_creds: Optional[ProxyCredentials], ): self.dst_host = dst_host self.dst_port = dst_port self.wrapped_factory = wrapped_factory - self.headers = headers - self.on_connection = defer.Deferred() + self.proxy_creds = proxy_creds + self.on_connection: "defer.Deferred[None]" = defer.Deferred() def startedConnecting(self, connector): return self.wrapped_factory.startedConnecting(connector) def buildProtocol(self, addr): wrapped_protocol = self.wrapped_factory.buildProtocol(addr) + if wrapped_protocol is None: + raise TypeError("buildProtocol produced None instead of a Protocol") return HTTPConnectProtocol( self.dst_host, self.dst_port, wrapped_protocol, self.on_connection, - self.headers, + self.proxy_creds, ) def clientConnectionFailed(self, connector, reason): @@ -145,7 +169,7 @@ class HTTPConnectProtocol(protocol.Protocol): connected_deferred: a Deferred which will be callbacked with wrapped_protocol when the CONNECT completes - headers: Extra HTTP headers to include in the CONNECT request + proxy_creds: credentials to authenticate at proxy """ def __init__( @@ -154,16 +178,16 @@ def __init__( port: int, wrapped_protocol: Protocol, connected_deferred: defer.Deferred, - headers: Headers, + proxy_creds: Optional[ProxyCredentials], ): self.host = host self.port = port self.wrapped_protocol = wrapped_protocol self.connected_deferred = connected_deferred - self.headers = headers + self.proxy_creds = proxy_creds self.http_setup_client = HTTPConnectSetupClient( - self.host, self.port, self.headers + self.host, self.port, self.proxy_creds ) self.http_setup_client.on_connected.addCallback(self.proxyConnected) @@ -205,30 +229,38 @@ class HTTPConnectSetupClient(http.HTTPClient): Args: host: The hostname to send in the CONNECT message port: The port to send in the CONNECT message - headers: Extra headers to send with the CONNECT message + proxy_creds: credentials to authenticate at proxy """ - def __init__(self, host: bytes, port: int, headers: Headers): + def __init__( + self, + host: bytes, + port: int, + proxy_creds: Optional[ProxyCredentials], + ): self.host = host self.port = port - self.headers = headers - self.on_connected = defer.Deferred() + self.proxy_creds = proxy_creds + self.on_connected: "defer.Deferred[None]" = defer.Deferred() def connectionMade(self): logger.debug("Connected to proxy, sending CONNECT") self.sendCommand(b"CONNECT", b"%s:%d" % (self.host, self.port)) - # Send any additional specified headers - for name, values in self.headers.getAllRawHeaders(): - for value in values: - self.sendHeader(name, value) + # Determine whether we need to set Proxy-Authorization headers + if self.proxy_creds: + # Set a Proxy-Authorization header + self.sendHeader( + b"Proxy-Authorization", + self.proxy_creds.as_proxy_authorization_value(), + ) self.endHeaders() def handleStatus(self, version: bytes, status: bytes, message: bytes): logger.debug("Got Status: %s %s %s", status, message, version) if status != b"200": - raise ProxyConnectError("Unexpected status on CONNECT: %s" % status) + raise ProxyConnectError(f"Unexpected status on CONNECT: {status!s}") def handleEndHeaders(self): logger.debug("End Headers") diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index c16b7f10e645..1238bfd28726 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -14,6 +14,10 @@ import logging import urllib.parse from typing import Any, Generator, List, Optional +from urllib.request import ( # type: ignore[attr-defined] + getproxies_environment, + proxy_bypass_environment, +) from netaddr import AddrFormatError, IPAddress, IPSet from zope.interface import implementer @@ -30,9 +34,12 @@ from twisted.web.iweb import IAgent, IAgentEndpointFactory, IBodyProducer, IResponse from synapse.crypto.context_factory import FederationPolicyForHTTPS -from synapse.http.client import BlacklistingAgentWrapper +from synapse.http import proxyagent +from synapse.http.client import BlacklistingAgentWrapper, BlacklistingReactorWrapper +from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint from synapse.http.federation.srv_resolver import Server, SrvResolver from synapse.http.federation.well_known_resolver import WellKnownResolver +from synapse.http.proxyagent import ProxyAgent from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.types import ISynapseReactor from synapse.util import Clock @@ -57,6 +64,14 @@ class MatrixFederationAgent: user_agent: The user agent header to use for federation requests. + ip_whitelist: Allowed IP addresses. + + ip_blacklist: Disallowed IP addresses. + + proxy_reactor: twisted reactor to use for connections to the proxy server + reactor might have some blacklisting applied (i.e. for DNS queries), + but we need unblocked access to the proxy. + _srv_resolver: SrvResolver implementation to use for looking up SRV records. None to use a default implementation. @@ -71,11 +86,18 @@ def __init__( reactor: ISynapseReactor, tls_client_options_factory: Optional[FederationPolicyForHTTPS], user_agent: bytes, + ip_whitelist: IPSet, ip_blacklist: IPSet, _srv_resolver: Optional[SrvResolver] = None, _well_known_resolver: Optional[WellKnownResolver] = None, ): - self._reactor = reactor + # proxy_reactor is not blacklisted + proxy_reactor = reactor + + # We need to use a DNS resolver which filters out blacklisted IP + # addresses, to prevent DNS rebinding. + reactor = BlacklistingReactorWrapper(reactor, ip_whitelist, ip_blacklist) + self._clock = Clock(reactor) self._pool = HTTPConnectionPool(reactor) self._pool.retryAutomatically = False @@ -83,24 +105,27 @@ def __init__( self._pool.cachedConnectionTimeout = 2 * 60 self._agent = Agent.usingEndpointFactory( - self._reactor, + reactor, MatrixHostnameEndpointFactory( - reactor, tls_client_options_factory, _srv_resolver + reactor, + proxy_reactor, + tls_client_options_factory, + _srv_resolver, ), pool=self._pool, ) self.user_agent = user_agent if _well_known_resolver is None: - # Note that the name resolver has already been wrapped in a - # IPBlacklistingResolver by MatrixFederationHttpClient. _well_known_resolver = WellKnownResolver( - self._reactor, + reactor, agent=BlacklistingAgentWrapper( - Agent( - self._reactor, + ProxyAgent( + reactor, + proxy_reactor, pool=self._pool, contextFactory=tls_client_options_factory, + use_proxy=True, ), ip_blacklist=ip_blacklist, ), @@ -200,10 +225,12 @@ class MatrixHostnameEndpointFactory: def __init__( self, reactor: IReactorCore, + proxy_reactor: IReactorCore, tls_client_options_factory: Optional[FederationPolicyForHTTPS], srv_resolver: Optional[SrvResolver], ): self._reactor = reactor + self._proxy_reactor = proxy_reactor self._tls_client_options_factory = tls_client_options_factory if srv_resolver is None: @@ -211,9 +238,10 @@ def __init__( self._srv_resolver = srv_resolver - def endpointForURI(self, parsed_uri): + def endpointForURI(self, parsed_uri: URI): return MatrixHostnameEndpoint( self._reactor, + self._proxy_reactor, self._tls_client_options_factory, self._srv_resolver, parsed_uri, @@ -227,23 +255,45 @@ class MatrixHostnameEndpoint: Args: reactor: twisted reactor to use for underlying requests + proxy_reactor: twisted reactor to use for connections to the proxy server. + 'reactor' might have some blacklisting applied (i.e. for DNS queries), + but we need unblocked access to the proxy. tls_client_options_factory: factory to use for fetching client tls options, or none to disable TLS. srv_resolver: The SRV resolver to use parsed_uri: The parsed URI that we're wanting to connect to. + + Raises: + ValueError if the environment variables contain an invalid proxy specification. + RuntimeError if no tls_options_factory is given for a https connection """ def __init__( self, reactor: IReactorCore, + proxy_reactor: IReactorCore, tls_client_options_factory: Optional[FederationPolicyForHTTPS], srv_resolver: SrvResolver, parsed_uri: URI, ): self._reactor = reactor - self._parsed_uri = parsed_uri + # http_proxy is not needed because federation is always over TLS + proxies = getproxies_environment() + https_proxy = proxies["https"].encode() if "https" in proxies else None + self.no_proxy = proxies["no"] if "no" in proxies else None + + # endpoint and credentials to use to connect to the outbound https proxy, if any. + ( + self._https_proxy_endpoint, + self._https_proxy_creds, + ) = proxyagent.http_proxy_endpoint( + https_proxy, + proxy_reactor, + tls_client_options_factory, + ) + # set up the TLS connection params # # XXX disabling TLS is really only supported here for the benefit of the @@ -273,9 +323,33 @@ async def _do_connect(self, protocol_factory: IProtocolFactory) -> None: host = server.host port = server.port + should_skip_proxy = False + if self.no_proxy is not None: + should_skip_proxy = proxy_bypass_environment( + host.decode(), + proxies={"no": self.no_proxy}, + ) + + endpoint: IStreamClientEndpoint try: - logger.debug("Connecting to %s:%i", host.decode("ascii"), port) - endpoint = HostnameEndpoint(self._reactor, host, port) + if self._https_proxy_endpoint and not should_skip_proxy: + logger.debug( + "Connecting to %s:%i via %s", + host.decode("ascii"), + port, + self._https_proxy_endpoint, + ) + endpoint = HTTPConnectProxyEndpoint( + self._reactor, + self._https_proxy_endpoint, + host, + port, + proxy_creds=self._https_proxy_creds, + ) + else: + logger.debug("Connecting to %s:%i", host.decode("ascii"), port) + # not using a proxy + endpoint = HostnameEndpoint(self._reactor, host, port) if self._tls_options: endpoint = wrapClientTLS(self._tls_options, endpoint) result = await make_deferred_yieldable( diff --git a/synapse/http/federation/srv_resolver.py b/synapse/http/federation/srv_resolver.py index b8ed4ec905d4..f68646fd0dd4 100644 --- a/synapse/http/federation/srv_resolver.py +++ b/synapse/http/federation/srv_resolver.py @@ -16,7 +16,7 @@ import logging import random import time -from typing import List +from typing import Callable, Dict, List import attr @@ -28,35 +28,35 @@ logger = logging.getLogger(__name__) -SERVER_CACHE = {} +SERVER_CACHE: Dict[bytes, List["Server"]] = {} -@attr.s(slots=True, frozen=True) +@attr.s(auto_attribs=True, slots=True, frozen=True) class Server: """ Our record of an individual server which can be tried to reach a destination. Attributes: - host (bytes): target hostname - port (int): - priority (int): - weight (int): - expires (int): when the cache should expire this record - in *seconds* since + host: target hostname + port: + priority: + weight: + expires: when the cache should expire this record - in *seconds* since the epoch """ - host = attr.ib() - port = attr.ib() - priority = attr.ib(default=0) - weight = attr.ib(default=0) - expires = attr.ib(default=0) + host: bytes + port: int + priority: int = 0 + weight: int = 0 + expires: int = 0 -def _sort_server_list(server_list): +def _sort_server_list(server_list: List[Server]) -> List[Server]: """Given a list of SRV records sort them into priority order and shuffle each priority with the given weight. """ - priority_map = {} + priority_map: Dict[int, List[Server]] = {} for server in server_list: priority_map.setdefault(server.priority, []).append(server) @@ -103,11 +103,16 @@ class SrvResolver: Args: dns_client (twisted.internet.interfaces.IResolver): twisted resolver impl - cache (dict): cache object - get_time (callable): clock implementation. Should return seconds since the epoch + cache: cache object + get_time: clock implementation. Should return seconds since the epoch """ - def __init__(self, dns_client=client, cache=SERVER_CACHE, get_time=time.time): + def __init__( + self, + dns_client=client, + cache: Dict[bytes, List[Server]] = SERVER_CACHE, + get_time: Callable[[], float] = time.time, + ): self._dns_client = dns_client self._cache = cache self._get_time = get_time @@ -116,7 +121,7 @@ async def resolve_service(self, service_name: bytes) -> List[Server]: """Look up a SRV record Args: - service_name (bytes): record to look up + service_name: record to look up Returns: a list of the SRV records, or an empty list if none found @@ -158,7 +163,7 @@ async def resolve_service(self, service_name: bytes) -> List[Server]: and answers[0].payload and answers[0].payload.target == dns.Name(b".") ): - raise ConnectError("Service %s unavailable" % service_name) + raise ConnectError(f"Service {service_name!r} unavailable") servers = [] diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 2efa15bf0470..203d723d4120 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -21,6 +21,7 @@ import urllib.parse from io import BytesIO, StringIO from typing import ( + TYPE_CHECKING, Callable, Dict, Generic, @@ -59,7 +60,6 @@ from synapse.http import QuieterFileBodyProducer from synapse.http.client import ( BlacklistingAgentWrapper, - BlacklistingReactorWrapper, BodyExceededMaxSize, ByteWriteable, encode_query_args, @@ -67,13 +67,16 @@ ) from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.logging import opentracing -from synapse.logging.context import make_deferred_yieldable +from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.logging.opentracing import set_tag, start_active_span, tags -from synapse.types import ISynapseReactor, JsonDict +from synapse.types import JsonDict from synapse.util import json_decoder from synapse.util.async_helpers import timeout_deferred from synapse.util.metrics import Measure +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) outgoing_requests_counter = Counter( @@ -320,36 +323,31 @@ class MatrixFederationHttpClient: requests. """ - def __init__(self, hs, tls_client_options_factory): + def __init__(self, hs: "HomeServer", tls_client_options_factory): self.hs = hs self.signing_key = hs.signing_key self.server_name = hs.hostname - # We need to use a DNS resolver which filters out blacklisted IP - # addresses, to prevent DNS rebinding. - self.reactor: ISynapseReactor = BlacklistingReactorWrapper( - hs.get_reactor(), - hs.config.federation_ip_range_whitelist, - hs.config.federation_ip_range_blacklist, - ) + self.reactor = hs.get_reactor() user_agent = hs.version_string - if hs.config.user_agent_suffix: - user_agent = "%s %s" % (user_agent, hs.config.user_agent_suffix) + if hs.config.server.user_agent_suffix: + user_agent = "%s %s" % (user_agent, hs.config.server.user_agent_suffix) user_agent = user_agent.encode("ascii") federation_agent = MatrixFederationAgent( self.reactor, tls_client_options_factory, user_agent, - hs.config.federation_ip_range_blacklist, + hs.config.server.federation_ip_range_whitelist, + hs.config.server.federation_ip_range_blacklist, ) # Use a BlacklistingAgentWrapper to prevent circumventing the IP # blacklist via IP literals in server names self.agent = BlacklistingAgentWrapper( federation_agent, - ip_blacklist=hs.config.federation_ip_range_blacklist, + ip_blacklist=hs.config.server.federation_ip_range_blacklist, ) self.clock = hs.get_clock() @@ -471,8 +469,9 @@ async def _send_request( _sec_timeout = self.default_timeout if ( - self.hs.config.federation_domain_whitelist is not None - and request.destination not in self.hs.config.federation_domain_whitelist + self.hs.config.federation.federation_domain_whitelist is not None + and request.destination + not in self.hs.config.federation.federation_domain_whitelist ): raise FederationDeniedError(request.destination) @@ -559,20 +558,29 @@ async def _send_request( with Measure(self.clock, "outbound_request"): # we don't want all the fancy cookie and redirect handling # that treq.request gives: just use the raw Agent. - request_deferred = self.agent.request( + + # To preserve the logging context, the timeout is treated + # in a similar way to `defer.gatherResults`: + # * Each logging context-preserving fork is wrapped in + # `run_in_background`. In this case there is only one, + # since the timeout fork is not logging-context aware. + # * The `Deferred` that joins the forks back together is + # wrapped in `make_deferred_yieldable` to restore the + # logging context regardless of the path taken. + request_deferred = run_in_background( + self.agent.request, method_bytes, url_bytes, headers=Headers(headers_dict), bodyProducer=producer, ) - request_deferred = timeout_deferred( request_deferred, timeout=_sec_timeout, reactor=self.reactor, ) - response = await request_deferred + response = await make_deferred_yieldable(request_deferred) except DNSLookupError as e: raise RequestSendFailed(e, can_retry=retry_on_dns_fail) from e except Exception as e: @@ -707,7 +715,7 @@ def build_auth_headers( Returns: A list of headers to be added as "Authorization:" headers """ - request = { + request: JsonDict = { "method": method.decode("ascii"), "uri": url_bytes.decode("ascii"), "origin": self.server_name, @@ -1183,7 +1191,7 @@ async def get_file( request.method, request.uri.decode("ascii"), ) - return (length, headers) + return length, headers def _flatten_response_never_received(e): diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py index 19e987f11877..6fd88bde204c 100644 --- a/synapse/http/proxyagent.py +++ b/synapse/http/proxyagent.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import base64 import logging import re from typing import Any, Dict, Optional, Tuple @@ -21,7 +20,6 @@ proxy_bypass_environment, ) -import attr from zope.interface import implementer from twisted.internet import defer @@ -38,7 +36,7 @@ from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS -from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint +from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint, ProxyCredentials from synapse.types import ISynapseReactor logger = logging.getLogger(__name__) @@ -46,22 +44,6 @@ _VALID_URI = re.compile(br"\A[\x21-\x7e]+\Z") -@attr.s -class ProxyCredentials: - username_password = attr.ib(type=bytes) - - def as_proxy_authorization_value(self) -> bytes: - """ - Return the value for a Proxy-Authorization header (i.e. 'Basic abdef=='). - - Returns: - A transformation of the authentication string the encoded value for - a Proxy-Authorization header. - """ - # Encode as base64 and prepend the authorization type - return b"Basic " + base64.encodebytes(self.username_password) - - @implementer(IAgent) class ProxyAgent(_AgentBase): """An Agent implementation which will use an HTTP proxy if one was requested @@ -95,6 +77,7 @@ class ProxyAgent(_AgentBase): Raises: ValueError if use_proxy is set and the environment variables contain an invalid proxy specification. + RuntimeError if no tls_options_factory is given for a https connection """ def __init__( @@ -131,11 +114,11 @@ def __init__( https_proxy = proxies["https"].encode() if "https" in proxies else None no_proxy = proxies["no"] if "no" in proxies else None - self.http_proxy_endpoint, self.http_proxy_creds = _http_proxy_endpoint( + self.http_proxy_endpoint, self.http_proxy_creds = http_proxy_endpoint( http_proxy, self.proxy_reactor, contextFactory, **self._endpoint_kwargs ) - self.https_proxy_endpoint, self.https_proxy_creds = _http_proxy_endpoint( + self.https_proxy_endpoint, self.https_proxy_creds = http_proxy_endpoint( https_proxy, self.proxy_reactor, contextFactory, **self._endpoint_kwargs ) @@ -190,7 +173,7 @@ def request( raise ValueError(f"Invalid URI {uri!r}") parsed_uri = URI.fromBytes(uri) - pool_key = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port) + pool_key = f"{parsed_uri.scheme!r}{parsed_uri.host!r}{parsed_uri.port}" request_path = parsed_uri.originForm should_skip_proxy = False @@ -216,7 +199,7 @@ def request( ) # Cache *all* connections under the same key, since we are only # connecting to a single destination, the proxy: - pool_key = ("http-proxy", self.http_proxy_endpoint) + pool_key = "http-proxy" endpoint = self.http_proxy_endpoint request_path = uri elif ( @@ -224,22 +207,12 @@ def request( and self.https_proxy_endpoint and not should_skip_proxy ): - connect_headers = Headers() - - # Determine whether we need to set Proxy-Authorization headers - if self.https_proxy_creds: - # Set a Proxy-Authorization header - connect_headers.addRawHeader( - b"Proxy-Authorization", - self.https_proxy_creds.as_proxy_authorization_value(), - ) - endpoint = HTTPConnectProxyEndpoint( self.proxy_reactor, self.https_proxy_endpoint, parsed_uri.host, parsed_uri.port, - headers=connect_headers, + self.https_proxy_creds, ) else: # not using a proxy @@ -268,10 +241,10 @@ def request( ) -def _http_proxy_endpoint( +def http_proxy_endpoint( proxy: Optional[bytes], reactor: IReactorCore, - tls_options_factory: IPolicyForHTTPS, + tls_options_factory: Optional[IPolicyForHTTPS], **kwargs, ) -> Tuple[Optional[IStreamClientEndpoint], Optional[ProxyCredentials]]: """Parses an http proxy setting and returns an endpoint for the proxy @@ -294,6 +267,7 @@ def _http_proxy_endpoint( Raise: ValueError if proxy has no hostname or unsupported scheme. + RuntimeError if no tls_options_factory is given for a https connection """ if proxy is None: return None, None @@ -305,8 +279,13 @@ def _http_proxy_endpoint( proxy_endpoint = HostnameEndpoint(reactor, host, port, **kwargs) if scheme == b"https": - tls_options = tls_options_factory.creatorForNetloc(host, port) - proxy_endpoint = wrapClientTLS(tls_options, proxy_endpoint) + if tls_options_factory: + tls_options = tls_options_factory.creatorForNetloc(host, port) + proxy_endpoint = wrapClientTLS(tls_options, proxy_endpoint) + else: + raise RuntimeError( + f"No TLS options for a https connection via proxy {proxy!s}" + ) return proxy_endpoint, credentials diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 602f93c49710..4886626d5074 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -15,6 +15,8 @@ import logging import threading +import traceback +from typing import Dict, Mapping, Set, Tuple from prometheus_client.core import Counter, Histogram @@ -105,19 +107,14 @@ ["method", "servlet"], ) -# The set of all in flight requests, set[RequestMetrics] -_in_flight_requests = set() +_in_flight_requests: Set["RequestMetrics"] = set() # Protects the _in_flight_requests set from concurrent access _in_flight_requests_lock = threading.Lock() -def _get_in_flight_counts(): - """Returns a count of all in flight requests by (method, server_name) - - Returns: - dict[tuple[str, str], int] - """ +def _get_in_flight_counts() -> Mapping[Tuple[str, ...], int]: + """Returns a count of all in flight requests by (method, server_name)""" # Cast to a list to prevent it changing while the Prometheus # thread is collecting metrics with _in_flight_requests_lock: @@ -127,8 +124,9 @@ def _get_in_flight_counts(): rm.update_metrics() # Map from (method, name) -> int, the number of in flight requests of that - # type - counts = {} + # type. The key type is Tuple[str, str], but we leave the length unspecified + # for compatability with LaterGauge's annotations. + counts: Dict[Tuple[str, ...], int] = {} for rm in reqs: key = (rm.method, rm.name) counts[key] = counts.get(key, 0) + 1 @@ -145,15 +143,21 @@ def _get_in_flight_counts(): class RequestMetrics: - def start(self, time_sec, name, method): - self.start = time_sec + def start(self, time_sec: float, name: str, method: str) -> None: + self.start_ts = time_sec self.start_context = current_context() self.name = name self.method = method - # _request_stats records resource usage that we have already added - # to the "in flight" metrics. - self._request_stats = self.start_context.get_resource_usage() + if self.start_context: + # _request_stats records resource usage that we have already added + # to the "in flight" metrics. + self._request_stats = self.start_context.get_resource_usage() + else: + logger.error( + "Tried to start a RequestMetric from the sentinel context.\n%s", + "".join(traceback.format_stack()), + ) with _in_flight_requests_lock: _in_flight_requests.add(self) @@ -169,12 +173,18 @@ def stop(self, time_sec, response_code, sent_bytes): tag = context.tag if context != self.start_context: - logger.warning( + logger.error( "Context have unexpectedly changed %r, %r", context, self.start_context, ) return + else: + logger.error( + "Trying to stop RequestMetrics in the sentinel context.\n%s", + "".join(traceback.format_stack()), + ) + return response_code = str(response_code) @@ -183,7 +193,7 @@ def stop(self, time_sec, response_code, sent_bytes): response_count.labels(self.method, self.name, tag).inc() response_timer.labels(self.method, self.name, tag, response_code).observe( - time_sec - self.start + time_sec - self.start_ts ) resource_usage = context.get_resource_usage() @@ -213,6 +223,12 @@ def stop(self, time_sec, response_code, sent_bytes): def update_metrics(self): """Updates the in flight metrics with values from this request.""" + if not self.start_context: + logger.error( + "Tried to update a RequestMetric from the sentinel context.\n%s", + "".join(traceback.format_stack()), + ) + return new_stats = self.start_context.get_resource_usage() diff = new_stats - self._request_stats diff --git a/synapse/http/server.py b/synapse/http/server.py index b79fa722e931..91badb0b0ac9 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -21,8 +21,8 @@ import urllib from http import HTTPStatus from inspect import isawaitable -from io import BytesIO from typing import ( + TYPE_CHECKING, Any, Awaitable, Callable, @@ -37,7 +37,7 @@ ) import jinja2 -from canonicaljson import iterencode_canonical_json +from canonicaljson import encode_canonical_json from typing_extensions import Protocol from zope.interface import implementer @@ -45,7 +45,7 @@ from twisted.python import failure from twisted.web import resource from twisted.web.server import NOT_DONE_YET, Request -from twisted.web.static import File, NoRangeStaticProducer +from twisted.web.static import File from twisted.web.util import redirectTo from synapse.api.errors import ( @@ -56,10 +56,14 @@ UnrecognizedRequestError, ) from synapse.http.site import SynapseRequest -from synapse.logging.context import preserve_fn +from synapse.logging.context import defer_to_thread, preserve_fn, run_in_background from synapse.logging.opentracing import trace_servlet from synapse.util import json_encoder from synapse.util.caches import intern_dict +from synapse.util.iterutils import chunk_seq + +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -94,7 +98,7 @@ def return_json_error(f: failure.Failure, request: SynapseRequest) -> None: "Failed handle request via %r: %r", request.request_metrics.name, request, - exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore + exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type] ) # Only respond with an error response if we haven't already started writing, @@ -146,7 +150,7 @@ def return_html_error( logger.error( "Failed handle request %r", request, - exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore + exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type] ) else: code = HTTPStatus.INTERNAL_SERVER_ERROR @@ -155,7 +159,7 @@ def return_html_error( logger.error( "Failed handle request %r", request, - exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore + exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type] ) if isinstance(error_template, str): @@ -320,7 +324,7 @@ def __init__(self, canonical_json=False, extract_context=False): def _send_response( self, - request: Request, + request: SynapseRequest, code: int, response_object: Any, ): @@ -343,6 +347,11 @@ def _send_error_response( return_json_error(f, request) +_PathEntry = collections.namedtuple( + "_PathEntry", ["pattern", "callback", "servlet_classname"] +) + + class JsonResource(DirectServeJsonResource): """This implements the HttpServer interface and provides JSON support for Resources. @@ -359,14 +368,10 @@ class JsonResource(DirectServeJsonResource): isLeaf = True - _PathEntry = collections.namedtuple( - "_PathEntry", ["pattern", "callback", "servlet_classname"] - ) - - def __init__(self, hs, canonical_json=True, extract_context=False): + def __init__(self, hs: "HomeServer", canonical_json=True, extract_context=False): super().__init__(canonical_json, extract_context) self.clock = hs.get_clock() - self.path_regexs = {} + self.path_regexs: Dict[bytes, List[_PathEntry]] = {} self.hs = hs def register_paths(self, method, path_patterns, callback, servlet_classname): @@ -391,7 +396,7 @@ def register_paths(self, method, path_patterns, callback, servlet_classname): for path_pattern in path_patterns: logger.debug("Registering for %s %s", method, path_pattern.pattern) self.path_regexs.setdefault(method, []).append( - self._PathEntry(path_pattern, callback, servlet_classname) + _PathEntry(path_pattern, callback, servlet_classname) ) def _get_handler_for_request( @@ -561,9 +566,20 @@ def __init__( self._iterator = iterator self._paused = False - # Register the producer and start producing data. - self._request.registerProducer(self, True) - self.resumeProducing() + try: + self._request.registerProducer(self, True) + except AttributeError as e: + # Calling self._request.registerProducer might raise an AttributeError since + # the underlying Twisted code calls self._request.channel.registerProducer, + # however self._request.channel will be None if the connection was lost. + logger.info("Connection disconnected before response was written: %r", e) + + # We drop our references to data we'll not use. + self._request = None + self._iterator = iter(()) + else: + # Start producing if `registerProducer` was successful + self.resumeProducing() def _send_data(self, data: List[bytes]) -> None: """ @@ -620,16 +636,15 @@ def stopProducing(self) -> None: self._request = None -def _encode_json_bytes(json_object: Any) -> Iterator[bytes]: +def _encode_json_bytes(json_object: Any) -> bytes: """ Encode an object into JSON. Returns an iterator of bytes. """ - for chunk in json_encoder.iterencode(json_object): - yield chunk.encode("utf-8") + return json_encoder.encode(json_object).encode("utf-8") def respond_with_json( - request: Request, + request: SynapseRequest, code: int, json_object: Any, send_cors: bool = False, @@ -659,7 +674,7 @@ def respond_with_json( return None if canonical_json: - encoder = iterencode_canonical_json + encoder = encode_canonical_json else: encoder = _encode_json_bytes @@ -670,7 +685,9 @@ def respond_with_json( if send_cors: set_cors_headers(request) - _ByteProducer(request, encoder(json_object)) + run_in_background( + _async_write_json_to_request_in_thread, request, encoder, json_object + ) return NOT_DONE_YET @@ -706,15 +723,56 @@ def respond_with_json_bytes( if send_cors: set_cors_headers(request) - # note that this is zero-copy (the bytesio shares a copy-on-write buffer with - # the original `bytes`). - bytes_io = BytesIO(json_bytes) - - producer = NoRangeStaticProducer(request, bytes_io) - producer.start() + _write_bytes_to_request(request, json_bytes) return NOT_DONE_YET +async def _async_write_json_to_request_in_thread( + request: SynapseRequest, + json_encoder: Callable[[Any], bytes], + json_object: Any, +): + """Encodes the given JSON object on a thread and then writes it to the + request. + + This is done so that encoding large JSON objects doesn't block the reactor + thread. + + Note: We don't use JsonEncoder.iterencode here as that falls back to the + Python implementation (rather than the C backend), which is *much* more + expensive. + """ + + json_str = await defer_to_thread(request.reactor, json_encoder, json_object) + + _write_bytes_to_request(request, json_str) + + +def _write_bytes_to_request(request: Request, bytes_to_write: bytes) -> None: + """Writes the bytes to the request using an appropriate producer. + + Note: This should be used instead of `Request.write` to correctly handle + large response bodies. + """ + + # The problem with dumping all of the response into the `Request` object at + # once (via `Request.write`) is that doing so starts the timeout for the + # next request to be received: so if it takes longer than 60s to stream back + # the response to the client, the client never gets it. + # + # The correct solution is to use a Producer; then the timeout is only + # started once all of the content is sent over the TCP connection. + + # To make sure we don't write all of the bytes at once we split it up into + # chunks. + chunk_size = 4096 + bytes_generator = chunk_seq(bytes_to_write, chunk_size) + + # We use a `_ByteProducer` here rather than `NoRangeStaticProducer` as the + # unit tests can't cope with being given a pull producer. + _ByteProducer(request, bytes_generator) + + def set_cors_headers(request: Request): """Set the CORS headers so that javascript running in a web browsers can use this API diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 732a1e6aeb88..6dd9b9ad0358 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -14,16 +14,28 @@ """ This module contains base REST classes for constructing REST servlets. """ import logging -from typing import Iterable, List, Mapping, Optional, Sequence, overload +from typing import ( + TYPE_CHECKING, + Iterable, + List, + Mapping, + Optional, + Sequence, + Tuple, + overload, +) from typing_extensions import Literal from twisted.web.server import Request from synapse.api.errors import Codes, SynapseError -from synapse.types import JsonDict +from synapse.types import JsonDict, RoomAlias, RoomID from synapse.util import json_decoder +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -67,6 +79,35 @@ def parse_integer( return parse_integer_from_args(args, name, default, required) +@overload +def parse_integer_from_args( + args: Mapping[bytes, Sequence[bytes]], + name: str, + default: Optional[int] = None, +) -> Optional[int]: + ... + + +@overload +def parse_integer_from_args( + args: Mapping[bytes, Sequence[bytes]], + name: str, + *, + required: Literal[True], +) -> int: + ... + + +@overload +def parse_integer_from_args( + args: Mapping[bytes, Sequence[bytes]], + name: str, + default: Optional[int] = None, + required: bool = False, +) -> Optional[int]: + ... + + def parse_integer_from_args( args: Mapping[bytes, Sequence[bytes]], name: str, @@ -560,6 +601,25 @@ def parse_string_from_args( return strings[0] +@overload +def parse_json_value_from_request(request: Request) -> JsonDict: + ... + + +@overload +def parse_json_value_from_request( + request: Request, allow_empty_body: Literal[False] +) -> JsonDict: + ... + + +@overload +def parse_json_value_from_request( + request: Request, allow_empty_body: bool = False +) -> Optional[JsonDict]: + ... + + def parse_json_value_from_request( request: Request, allow_empty_body: bool = False ) -> Optional[JsonDict]: @@ -663,3 +723,45 @@ def register(self, http_server): else: raise NotImplementedError("RestServlet must register something.") + + +class ResolveRoomIdMixin: + def __init__(self, hs: "HomeServer"): + self.room_member_handler = hs.get_room_member_handler() + + async def resolve_room_id( + self, room_identifier: str, remote_room_hosts: Optional[List[str]] = None + ) -> Tuple[str, Optional[List[str]]]: + """ + Resolve a room identifier to a room ID, if necessary. + + This also performanes checks to ensure the room ID is of the proper form. + + Args: + room_identifier: The room ID or alias. + remote_room_hosts: The potential remote room hosts to use. + + Returns: + The resolved room ID. + + Raises: + SynapseError if the room ID is of the wrong form. + """ + if RoomID.is_valid(room_identifier): + resolved_room_id = room_identifier + elif RoomAlias.is_valid(room_identifier): + room_alias = RoomAlias.from_string(room_identifier) + ( + room_id, + remote_room_hosts, + ) = await self.room_member_handler.lookup_room_alias(room_alias) + resolved_room_id = room_id.to_string() + else: + raise SynapseError( + 400, "%s was not legal room ID or room alias" % (room_identifier,) + ) + if not resolved_room_id: + raise SynapseError( + 400, "Unknown room ID or room alias %s" % room_identifier + ) + return resolved_room_id, remote_room_hosts diff --git a/synapse/http/site.py b/synapse/http/site.py index 190084e8aa68..755ad56637da 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -14,14 +14,15 @@ import contextlib import logging import time -from typing import Optional, Tuple, Union +from typing import Generator, Optional, Tuple, Union import attr from zope.interface import implementer from twisted.internet.interfaces import IAddress, IReactorTime from twisted.python.failure import Failure -from twisted.web.resource import IResource +from twisted.web.http import HTTPChannel +from twisted.web.resource import IResource, Resource from twisted.web.server import Request, Site from synapse.config.server import ListenerConfig @@ -61,10 +62,18 @@ class SynapseRequest(Request): logcontext: the log context for this request """ - def __init__(self, channel, *args, max_request_body_size=1024, **kw): - Request.__init__(self, channel, *args, **kw) + def __init__( + self, + channel: HTTPChannel, + site: "SynapseSite", + *args, + max_request_body_size: int = 1024, + **kw, + ): + super().__init__(channel, *args, **kw) self._max_request_body_size = max_request_body_size - self.site: SynapseSite = channel.site + self.synapse_site = site + self.reactor = site.reactor self._channel = channel # this is used by the tests self.start_time = 0.0 @@ -83,13 +92,13 @@ def __init__(self, channel, *args, max_request_body_size=1024, **kw): self._is_processing = False # the time when the asynchronous request handler completed its processing - self._processing_finished_time = None + self._processing_finished_time: Optional[float] = None # what time we finished sending the response to the client (or the connection # dropped) - self.finish_time = None + self.finish_time: Optional[float] = None - def __repr__(self): + def __repr__(self) -> str: # We overwrite this so that we don't log ``access_token`` return "<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>" % ( self.__class__.__name__, @@ -97,10 +106,10 @@ def __repr__(self): self.get_method(), self.get_redacted_uri(), self.clientproto.decode("ascii", errors="replace"), - self.site.site_tag, + self.synapse_site.site_tag, ) - def handleContentChunk(self, data): + def handleContentChunk(self, data: bytes) -> None: # we should have a `content` by now. assert self.content, "handleContentChunk() called before gotLength()" if self.content.tell() + len(data) > self._max_request_body_size: @@ -139,7 +148,7 @@ def requester(self, value: Union[Requester, str]) -> None: # If there's no authenticated entity, it was the requester. self.logcontext.request.authenticated_entity = authenticated_entity or requester - def get_request_id(self): + def get_request_id(self) -> str: return "%s-%i" % (self.get_method(), self.request_seq) def get_redacted_uri(self) -> str: @@ -205,7 +214,7 @@ def get_authenticated_entity(self) -> Tuple[Optional[str], Optional[str]]: return None, None - def render(self, resrc): + def render(self, resrc: Resource) -> None: # this is called once a Resource has been found to serve the request; in our # case the Resource in question will normally be a JsonResource. @@ -216,7 +225,7 @@ def render(self, resrc): request=ContextRequest( request_id=request_id, ip_address=self.getClientIP(), - site_tag=self.site.site_tag, + site_tag=self.synapse_site.site_tag, # The requester is going to be unknown at this point. requester=None, authenticated_entity=None, @@ -228,7 +237,7 @@ def render(self, resrc): ) # override the Server header which is set by twisted - self.setHeader("Server", self.site.server_version_string) + self.setHeader("Server", self.synapse_site.server_version_string) with PreserveLoggingContext(self.logcontext): # we start the request metrics timer here with an initial stab @@ -247,7 +256,7 @@ def render(self, resrc): requests_counter.labels(self.get_method(), self.request_metrics.name).inc() @contextlib.contextmanager - def processing(self): + def processing(self) -> Generator[None, None, None]: """Record the fact that we are processing this request. Returns a context manager; the correct way to use this is: @@ -282,7 +291,7 @@ async def handle_request(request): if self.finish_time is not None: self._finished_processing() - def finish(self): + def finish(self) -> None: """Called when all response data has been written to this Request. Overrides twisted.web.server.Request.finish to record the finish time and do @@ -295,7 +304,7 @@ def finish(self): with PreserveLoggingContext(self.logcontext): self._finished_processing() - def connectionLost(self, reason): + def connectionLost(self, reason: Union[Failure, Exception]) -> None: """Called when the client connection is closed before the response is written. Overrides twisted.web.server.Request.connectionLost to record the finish time and @@ -327,7 +336,7 @@ def connectionLost(self, reason): if not self._is_processing: self._finished_processing() - def _started_processing(self, servlet_name): + def _started_processing(self, servlet_name: str) -> None: """Record the fact that we are processing this request. This will log the request's arrival. Once the request completes, @@ -346,17 +355,19 @@ def _started_processing(self, servlet_name): self.start_time, name=servlet_name, method=self.get_method() ) - self.site.access_logger.debug( + self.synapse_site.access_logger.debug( "%s - %s - Received request: %s %s", self.getClientIP(), - self.site.site_tag, + self.synapse_site.site_tag, self.get_method(), self.get_redacted_uri(), ) - def _finished_processing(self): + def _finished_processing(self) -> None: """Log the completion of this request and update the metrics""" assert self.logcontext is not None + assert self.finish_time is not None + usage = self.logcontext.get_resource_usage() if self._processing_finished_time is None: @@ -384,15 +395,15 @@ def _finished_processing(self): # authenticated (e.g. and admin is puppetting a user) then we log both. requester, authenticated_entity = self.get_authenticated_entity() if authenticated_entity: - requester = f"{authenticated_entity}.{requester}" + requester = f"{authenticated_entity}|{requester}" - self.site.access_logger.log( + self.synapse_site.access_logger.log( log_level, "%s - %s - {%s}" " Processed request: %.3fsec/%.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" ' %sB %s "%s %s %s" "%s" [%d dbevts]', self.getClientIP(), - self.site.site_tag, + self.synapse_site.site_tag, requester, processing_time, response_send_time, @@ -437,7 +448,7 @@ class XForwardedForRequest(SynapseRequest): _forwarded_for: "Optional[_XForwardedForAddress]" = None _forwarded_https: bool = False - def requestReceived(self, command, path, version): + def requestReceived(self, command: bytes, path: bytes, version: bytes) -> None: # this method is called by the Channel once the full request has been # received, to dispatch the request to a resource. # We can use it to set the IP address and protocol according to the @@ -445,7 +456,7 @@ def requestReceived(self, command, path, version): self._process_forwarded_headers() return super().requestReceived(command, path, version) - def _process_forwarded_headers(self): + def _process_forwarded_headers(self) -> None: headers = self.requestHeaders.getRawHeaders(b"x-forwarded-for") if not headers: return @@ -470,7 +481,7 @@ def _process_forwarded_headers(self): ) self._forwarded_https = True - def isSecure(self): + def isSecure(self) -> bool: if self._forwarded_https: return True return super().isSecure() @@ -520,7 +531,7 @@ def __init__( site_tag: str, config: ListenerConfig, resource: IResource, - server_version_string, + server_version_string: str, max_request_body_size: int, reactor: IReactorTime, ): @@ -540,19 +551,23 @@ def __init__( Site.__init__(self, resource, reactor=reactor) self.site_tag = site_tag + self.reactor = reactor assert config.http_options is not None proxied = config.http_options.x_forwarded request_class = XForwardedForRequest if proxied else SynapseRequest - def request_factory(channel, queued) -> Request: + def request_factory(channel, queued: bool) -> Request: return request_class( - channel, max_request_body_size=max_request_body_size, queued=queued + channel, + self, + max_request_body_size=max_request_body_size, + queued=queued, ) self.requestFactory = request_factory # type: ignore self.access_logger = logging.getLogger(logger_name) self.server_version_string = server_version_string.encode("ascii") - def log(self, request): + def log(self, request: SynapseRequest) -> None: pass diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 6e82f7c7f17a..b78d6e17c93c 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -65,6 +65,12 @@ def _format(self, record: logging.LogRecord, event: dict) -> str: if key not in _IGNORED_LOG_RECORD_ATTRIBUTES: event[key] = value + if record.exc_info: + exc_type, exc_value, _ = record.exc_info + if exc_type: + event["exc_type"] = f"{exc_type.__name__}" + event["exc_value"] = f"{exc_value}" + return _encoder.encode(event) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 02e5ddd2ef2a..d8ae3188b7da 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -52,7 +52,7 @@ is_thread_resource_usage_supported = True - def get_thread_resource_usage() -> "Optional[resource._RUsage]": + def get_thread_resource_usage() -> "Optional[resource.struct_rusage]": return resource.getrusage(RUSAGE_THREAD) @@ -61,7 +61,7 @@ def get_thread_resource_usage() -> "Optional[resource._RUsage]": # won't track resource usage. is_thread_resource_usage_supported = False - def get_thread_resource_usage() -> "Optional[resource._RUsage]": + def get_thread_resource_usage() -> "Optional[resource.struct_rusage]": return None @@ -220,16 +220,16 @@ def __init__(self) -> None: self.scope = None self.tag = None - def __str__(self): + def __str__(self) -> str: return "sentinel" def copy_to(self, record): pass - def start(self, rusage: "Optional[resource._RUsage]"): + def start(self, rusage: "Optional[resource.struct_rusage]"): pass - def stop(self, rusage: "Optional[resource._RUsage]"): + def stop(self, rusage: "Optional[resource.struct_rusage]"): pass def add_database_transaction(self, duration_sec): @@ -241,7 +241,7 @@ def add_database_scheduled(self, sched_sec): def record_event_fetch(self, event_count): pass - def __bool__(self): + def __bool__(self) -> Literal[False]: return False @@ -289,7 +289,7 @@ def __init__( # The thread resource usage when the logcontext became active. None # if the context is not currently active. - self.usage_start: Optional[resource._RUsage] = None + self.usage_start: Optional[resource.struct_rusage] = None self.main_thread = get_thread_id() self.request = None @@ -410,7 +410,7 @@ def copy_to(self, record) -> None: # we also track the current scope: record.scope = self.scope - def start(self, rusage: "Optional[resource._RUsage]") -> None: + def start(self, rusage: "Optional[resource.struct_rusage]") -> None: """ Record that this logcontext is currently running. @@ -435,7 +435,7 @@ def start(self, rusage: "Optional[resource._RUsage]") -> None: else: self.usage_start = rusage - def stop(self, rusage: "Optional[resource._RUsage]") -> None: + def stop(self, rusage: "Optional[resource.struct_rusage]") -> None: """ Record that this logcontext is no longer running. @@ -490,7 +490,7 @@ def get_resource_usage(self) -> ContextResourceUsage: return res - def _get_cputime(self, current: "resource._RUsage") -> Tuple[float, float]: + def _get_cputime(self, current: "resource.struct_rusage") -> Tuple[float, float]: """Get the cpu usage time between start() and the given rusage Args: diff --git a/synapse/logging/handlers.py b/synapse/logging/handlers.py index af5fc407a8e5..478b5274942b 100644 --- a/synapse/logging/handlers.py +++ b/synapse/logging/handlers.py @@ -3,7 +3,7 @@ from logging import Handler, LogRecord from logging.handlers import MemoryHandler from threading import Thread -from typing import Optional +from typing import Optional, cast from twisted.internet.interfaces import IReactorCore @@ -56,7 +56,7 @@ def on_reactor_running(): if reactor is None: from twisted.internet import reactor as global_reactor - reactor_to_use = global_reactor # type: ignore[assignment] + reactor_to_use = cast(IReactorCore, global_reactor) else: reactor_to_use = reactor diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index ecd51f1b4a26..20d23a426064 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -236,8 +236,17 @@ class _DummyTagNames: try: from rust_python_jaeger_reporter import Reporter + # jaeger-client 4.7.0 requires that reporters inherit from BaseReporter, which + # didn't exist before that version. + try: + from jaeger_client.reporter import BaseReporter + except ImportError: + + class BaseReporter: # type: ignore[no-redef] + pass + @attr.s(slots=True, frozen=True) - class _WrappedRustReporter: + class _WrappedRustReporter(BaseReporter): """Wrap the reporter to ensure `report_span` never throws.""" _reporter = attr.ib(type=Reporter, default=attr.Factory(Reporter)) @@ -330,6 +339,7 @@ def ensure_active_span_inner_2(*args, **kwargs): "There was no active span when trying to %s." " Did you forget to start one or did a context slip?", message, + stack_info=True, ) return ret @@ -354,7 +364,7 @@ def noop_context_manager(*args, **kwargs): def init_tracer(hs: "HomeServer"): """Set the whitelists and initialise the JaegerClient tracer""" global opentracing - if not hs.config.opentracer_enabled: + if not hs.config.tracing.opentracer_enabled: # We don't have a tracer opentracing = None return @@ -368,13 +378,13 @@ def init_tracer(hs: "HomeServer"): # Pull out the jaeger config if it was given. Otherwise set it to something sensible. # See https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/config.py - set_homeserver_whitelist(hs.config.opentracer_whitelist) + set_homeserver_whitelist(hs.config.tracing.opentracer_whitelist) from jaeger_client.metrics.prometheus import PrometheusMetricsFactory config = JaegerConfig( - config=hs.config.jaeger_config, - service_name=f"{hs.config.server_name} {hs.get_instance_name()}", + config=hs.config.tracing.jaeger_config, + service_name=f"{hs.config.server.server_name} {hs.get_instance_name()}", scope_manager=LogContextScopeManager(hs.config), metrics_factory=PrometheusMetricsFactory(), ) @@ -382,6 +392,7 @@ def init_tracer(hs: "HomeServer"): # If we have the rust jaeger reporter available let's use that. if RustReporter: logger.info("Using rust_python_jaeger_reporter library") + assert config.sampler is not None tracer = config.create_tracer(RustReporter(), config.sampler) opentracing.set_global_tracer(tracer) else: @@ -796,6 +807,14 @@ def err_back(result): result.addCallbacks(call_back, err_back) else: + if inspect.isawaitable(result): + logger.error( + "@trace may not have wrapped %s correctly! " + "The function is not async but returned a %s.", + func.__qualname__, + type(result).__name__, + ) + scope.__exit__(None, None, None) return result diff --git a/synapse/logging/utils.py b/synapse/logging/utils.py index 08895e72eedd..4a01b902c255 100644 --- a/synapse/logging/utils.py +++ b/synapse/logging/utils.py @@ -16,6 +16,7 @@ import logging from functools import wraps from inspect import getcallargs +from typing import Callable, TypeVar, cast _TIME_FUNC_ID = 0 @@ -41,7 +42,10 @@ def _log_debug_as_f(f, msg, msg_args): logger.handle(record) -def log_function(f): +F = TypeVar("F", bound=Callable) + + +def log_function(f: F) -> F: """Function decorator that logs every call to that function.""" func_name = f.__name__ @@ -69,4 +73,4 @@ def format(value): return f(*args, **kwargs) wrapped.__name__ = func_name - return wrapped + return cast(F, wrapped) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index f237b8a2369e..ceef57ad883f 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -20,10 +20,25 @@ import platform import threading import time -from typing import Callable, Dict, Iterable, Optional, Tuple, Union +from typing import ( + Any, + Callable, + Dict, + Generic, + Iterable, + Mapping, + Optional, + Sequence, + Set, + Tuple, + Type, + TypeVar, + Union, + cast, +) import attr -from prometheus_client import Counter, Gauge, Histogram +from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram, Metric from prometheus_client.core import ( REGISTRY, CounterMetricFamily, @@ -32,6 +47,8 @@ ) from twisted.internet import reactor +from twisted.internet.base import ReactorBase +from twisted.python.threadpool import ThreadPool import synapse from synapse.metrics._exposition import ( @@ -53,7 +70,7 @@ class RegistryProxy: @staticmethod - def collect(): + def collect() -> Iterable[Metric]: for metric in REGISTRY.collect(): if not metric.name.startswith("__"): yield metric @@ -67,9 +84,13 @@ class LaterGauge: labels = attr.ib(hash=False, type=Optional[Iterable[str]]) # callback: should either return a value (if there are no labels for this metric), # or dict mapping from a label tuple to a value - caller = attr.ib(type=Callable[[], Union[Dict[Tuple[str, ...], float], float]]) + caller = attr.ib( + type=Callable[ + [], Union[Mapping[Tuple[str, ...], Union[int, float]], Union[int, float]] + ] + ) - def collect(self): + def collect(self) -> Iterable[Metric]: g = GaugeMetricFamily(self.name, self.desc, labels=self.labels) @@ -80,18 +101,18 @@ def collect(self): yield g return - if isinstance(calls, dict): + if isinstance(calls, (int, float)): + g.add_metric([], calls) + else: for k, v in calls.items(): g.add_metric(k, v) - else: - g.add_metric([], calls) yield g - def __attrs_post_init__(self): + def __attrs_post_init__(self) -> None: self._register() - def _register(self): + def _register(self) -> None: if self.name in all_gauges.keys(): logger.warning("%s already registered, reregistering" % (self.name,)) REGISTRY.unregister(all_gauges.pop(self.name)) @@ -100,7 +121,12 @@ def _register(self): all_gauges[self.name] = self -class InFlightGauge: +# `MetricsEntry` only makes sense when it is a `Protocol`, +# but `Protocol` can't be used as a `TypeVar` bound. +MetricsEntry = TypeVar("MetricsEntry") + + +class InFlightGauge(Generic[MetricsEntry]): """Tracks number of things (e.g. requests, Measure blocks, etc) in flight at any given time. @@ -110,14 +136,19 @@ class InFlightGauge: callbacks. Args: - name (str) - desc (str) - labels (list[str]) - sub_metrics (list[str]): A list of sub metrics that the callbacks - will update. + name + desc + labels + sub_metrics: A list of sub metrics that the callbacks will update. """ - def __init__(self, name, desc, labels, sub_metrics): + def __init__( + self, + name: str, + desc: str, + labels: Sequence[str], + sub_metrics: Sequence[str], + ): self.name = name self.desc = desc self.labels = labels @@ -125,19 +156,25 @@ def __init__(self, name, desc, labels, sub_metrics): # Create a class which have the sub_metrics values as attributes, which # default to 0 on initialization. Used to pass to registered callbacks. - self._metrics_class = attr.make_class( + self._metrics_class: Type[MetricsEntry] = attr.make_class( "_MetricsEntry", attrs={x: attr.ib(0) for x in sub_metrics}, slots=True ) # Counts number of in flight blocks for a given set of label values - self._registrations: Dict = {} + self._registrations: Dict[ + Tuple[str, ...], Set[Callable[[MetricsEntry], None]] + ] = {} # Protects access to _registrations self._lock = threading.Lock() self._register_with_collector() - def register(self, key, callback): + def register( + self, + key: Tuple[str, ...], + callback: Callable[[MetricsEntry], None], + ) -> None: """Registers that we've entered a new block with labels `key`. `callback` gets called each time the metrics are collected. The same @@ -153,13 +190,17 @@ def register(self, key, callback): with self._lock: self._registrations.setdefault(key, set()).add(callback) - def unregister(self, key, callback): + def unregister( + self, + key: Tuple[str, ...], + callback: Callable[[MetricsEntry], None], + ) -> None: """Registers that we've exited a block with labels `key`.""" with self._lock: self._registrations.setdefault(key, set()).discard(callback) - def collect(self): + def collect(self) -> Iterable[Metric]: """Called by prometheus client when it reads metrics. Note: may be called by a separate thread. @@ -195,7 +236,7 @@ def collect(self): gauge.add_metric(key, getattr(metrics, name)) yield gauge - def _register_with_collector(self): + def _register_with_collector(self) -> None: if self.name in all_gauges.keys(): logger.warning("%s already registered, reregistering" % (self.name,)) REGISTRY.unregister(all_gauges.pop(self.name)) @@ -225,7 +266,7 @@ def __init__( name: str, documentation: str, buckets: Iterable[float], - registry=REGISTRY, + registry: CollectorRegistry = REGISTRY, ): """ Args: @@ -252,12 +293,12 @@ def __init__( registry.register(self) - def collect(self): + def collect(self) -> Iterable[Metric]: # Don't report metrics unless we've already collected some data if self._metric is not None: yield self._metric - def update_data(self, values: Iterable[float]): + def update_data(self, values: Iterable[float]) -> None: """Update the data to be reported by the metric The existing data is cleared, and each measurement in the input is assigned @@ -299,7 +340,7 @@ def _values_to_metric(self, values: Iterable[float]) -> GaugeHistogramMetricFami class CPUMetrics: - def __init__(self): + def __init__(self) -> None: ticks_per_sec = 100 try: # Try and get the system config @@ -309,7 +350,7 @@ def __init__(self): self.ticks_per_sec = ticks_per_sec - def collect(self): + def collect(self) -> Iterable[Metric]: if not HAVE_PROC_SELF_STAT: return @@ -359,7 +400,7 @@ def collect(self): class GCCounts: - def collect(self): + def collect(self) -> Iterable[Metric]: cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"]) for n, m in enumerate(gc.get_count()): cm.add_metric([str(n)], m) @@ -377,7 +418,7 @@ def collect(self): class PyPyGCStats: - def collect(self): + def collect(self) -> Iterable[Metric]: # @stats is a pretty-printer object with __str__() returning a nice table, # plus some fields that contain data from that table. @@ -522,9 +563,45 @@ def collect(self): labelnames=("type", "reason"), ) +threadpool_total_threads = Gauge( + "synapse_threadpool_total_threads", + "Total number of threads currently in the threadpool", + ["name"], +) + +threadpool_total_working_threads = Gauge( + "synapse_threadpool_working_threads", + "Number of threads currently working in the threadpool", + ["name"], +) + +threadpool_total_min_threads = Gauge( + "synapse_threadpool_min_threads", + "Minimum number of threads configured in the threadpool", + ["name"], +) + +threadpool_total_max_threads = Gauge( + "synapse_threadpool_max_threads", + "Maximum number of threads configured in the threadpool", + ["name"], +) + + +def register_threadpool(name: str, threadpool: ThreadPool) -> None: + """Add metrics for the threadpool.""" + + threadpool_total_min_threads.labels(name).set(threadpool.min) + threadpool_total_max_threads.labels(name).set(threadpool.max) + + threadpool_total_threads.labels(name).set_function(lambda: len(threadpool.threads)) + threadpool_total_working_threads.labels(name).set_function( + lambda: len(threadpool.working) + ) + class ReactorLastSeenMetric: - def collect(self): + def collect(self) -> Iterable[Metric]: cm = GaugeMetricFamily( "python_twisted_reactor_last_seen", "Seconds since the Twisted reactor was last seen", @@ -543,9 +620,12 @@ def collect(self): _last_gc = [0.0, 0.0, 0.0] -def runUntilCurrentTimer(reactor, func): +F = TypeVar("F", bound=Callable[..., Any]) + + +def runUntilCurrentTimer(reactor: ReactorBase, func: F) -> F: @functools.wraps(func) - def f(*args, **kwargs): + def f(*args: Any, **kwargs: Any) -> Any: now = reactor.seconds() num_pending = 0 @@ -608,7 +688,7 @@ def f(*args, **kwargs): return ret - return f + return cast(F, f) try: @@ -636,5 +716,5 @@ def f(*args, **kwargs): "start_http_server", "LaterGauge", "InFlightGauge", - "BucketCollector", + "GaugeBucketCollector", ] diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py index bb9bcb5592ed..353d0a63b618 100644 --- a/synapse/metrics/_exposition.py +++ b/synapse/metrics/_exposition.py @@ -25,27 +25,25 @@ import threading from http.server import BaseHTTPRequestHandler, HTTPServer from socketserver import ThreadingMixIn -from typing import Dict, List +from typing import Any, Dict, List, Type, Union from urllib.parse import parse_qs, urlparse -from prometheus_client import REGISTRY +from prometheus_client import REGISTRY, CollectorRegistry +from prometheus_client.core import Sample from twisted.web.resource import Resource +from twisted.web.server import Request from synapse.util import caches CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8" -INF = float("inf") -MINUS_INF = float("-inf") - - -def floatToGoString(d): +def floatToGoString(d: Union[int, float]) -> str: d = float(d) - if d == INF: + if d == math.inf: return "+Inf" - elif d == MINUS_INF: + elif d == -math.inf: return "-Inf" elif math.isnan(d): return "NaN" @@ -60,7 +58,7 @@ def floatToGoString(d): return s -def sample_line(line, name): +def sample_line(line: Sample, name: str) -> str: if line.labels: labelstr = "{{{0}}}".format( ",".join( @@ -82,7 +80,7 @@ def sample_line(line, name): return "{}{} {}{}\n".format(name, labelstr, floatToGoString(line.value), timestamp) -def generate_latest(registry, emit_help=False): +def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> bytes: # Trigger the cache metrics to be rescraped, which updates the common # metrics but do not produce metrics themselves @@ -187,7 +185,7 @@ class MetricsHandler(BaseHTTPRequestHandler): registry = REGISTRY - def do_GET(self): + def do_GET(self) -> None: registry = self.registry params = parse_qs(urlparse(self.path).query) @@ -207,11 +205,11 @@ def do_GET(self): self.end_headers() self.wfile.write(output) - def log_message(self, format, *args): + def log_message(self, format: str, *args: Any) -> None: """Log nothing.""" @classmethod - def factory(cls, registry): + def factory(cls, registry: CollectorRegistry) -> Type: """Returns a dynamic MetricsHandler class tied to the passed registry. """ @@ -236,7 +234,9 @@ class _ThreadingSimpleServer(ThreadingMixIn, HTTPServer): daemon_threads = True -def start_http_server(port, addr="", registry=REGISTRY): +def start_http_server( + port: int, addr: str = "", registry: CollectorRegistry = REGISTRY +) -> None: """Starts an HTTP server for prometheus metrics as a daemon thread""" CustomMetricsHandler = MetricsHandler.factory(registry) httpd = _ThreadingSimpleServer((addr, port), CustomMetricsHandler) @@ -252,10 +252,10 @@ class MetricsResource(Resource): isLeaf = True - def __init__(self, registry=REGISTRY): + def __init__(self, registry: CollectorRegistry = REGISTRY): self.registry = registry - def render_GET(self, request): + def render_GET(self, request: Request) -> bytes: request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii")) response = generate_latest(self.registry) request.setHeader(b"Content-Length", str(len(response))) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 3a14260752ed..53c508af9170 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -15,19 +15,37 @@ import logging import threading from functools import wraps -from typing import TYPE_CHECKING, Dict, Optional, Set, Union +from types import TracebackType +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Dict, + Iterable, + Optional, + Set, + Type, + TypeVar, + Union, + cast, +) +from prometheus_client import Metric from prometheus_client.core import REGISTRY, Counter, Gauge from twisted.internet import defer -from synapse.logging.context import LoggingContext, PreserveLoggingContext +from synapse.logging.context import ( + ContextResourceUsage, + LoggingContext, + PreserveLoggingContext, +) from synapse.logging.opentracing import ( SynapseTags, noop_context_manager, start_active_span, ) -from synapse.util.async_helpers import maybe_awaitable if TYPE_CHECKING: import resource @@ -116,7 +134,7 @@ class _Collector: before they are returned. """ - def collect(self): + def collect(self) -> Iterable[Metric]: global _background_processes_active_since_last_scrape # We swap out the _background_processes set with an empty one so that @@ -144,12 +162,12 @@ def collect(self): class _BackgroundProcess: - def __init__(self, desc, ctx): + def __init__(self, desc: str, ctx: LoggingContext): self.desc = desc self._context = ctx - self._reported_stats = None + self._reported_stats: Optional[ContextResourceUsage] = None - def update_metrics(self): + def update_metrics(self) -> None: """Updates the metrics with values from this process.""" new_stats = self._context.get_resource_usage() if self._reported_stats is None: @@ -169,7 +187,16 @@ def update_metrics(self): ) -def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwargs): +R = TypeVar("R") + + +def run_as_background_process( + desc: str, + func: Callable[..., Awaitable[Optional[R]]], + *args: Any, + bg_start_span: bool = True, + **kwargs: Any, +) -> "defer.Deferred[Optional[R]]": """Run the given function in its own logcontext, with resource metrics This should be used to wrap processes which are fired off to run in the @@ -189,11 +216,13 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar args: positional args for func kwargs: keyword args for func - Returns: Deferred which returns the result of func, but note that it does not - follow the synapse logcontext rules. + Returns: + Deferred which returns the result of func, or `None` if func raises. + Note that the returned Deferred does not follow the synapse logcontext + rules. """ - async def run(): + async def run() -> Optional[R]: with _bg_metrics_lock: count = _background_process_counts.get(desc, 0) _background_process_counts[desc] = count + 1 @@ -210,12 +239,13 @@ async def run(): else: ctx = noop_context_manager() with ctx: - return await maybe_awaitable(func(*args, **kwargs)) + return await func(*args, **kwargs) except Exception: logger.exception( "Background process '%s' threw an exception", desc, ) + return None finally: _background_process_in_flight_count.labels(desc).dec() @@ -225,19 +255,24 @@ async def run(): return defer.ensureDeferred(run()) -def wrap_as_background_process(desc): +F = TypeVar("F", bound=Callable[..., Awaitable[Optional[Any]]]) + + +def wrap_as_background_process(desc: str) -> Callable[[F], F]: """Decorator that wraps a function that gets called as a background process. - Equivalent of calling the function with `run_as_background_process` + Equivalent to calling the function with `run_as_background_process` """ - def wrap_as_background_process_inner(func): + def wrap_as_background_process_inner(func: F) -> F: @wraps(func) - def wrap_as_background_process_inner_2(*args, **kwargs): + def wrap_as_background_process_inner_2( + *args: Any, **kwargs: Any + ) -> "defer.Deferred[Optional[R]]": return run_as_background_process(desc, func, *args, **kwargs) - return wrap_as_background_process_inner_2 + return cast(F, wrap_as_background_process_inner_2) return wrap_as_background_process_inner @@ -265,7 +300,7 @@ def __init__(self, name: str, instance_id: Optional[Union[int, str]] = None): super().__init__("%s-%s" % (name, instance_id)) self._proc = _BackgroundProcess(name, self) - def start(self, rusage: "Optional[resource._RUsage]"): + def start(self, rusage: "Optional[resource.struct_rusage]") -> None: """Log context has started running (again).""" super().start(rusage) @@ -276,7 +311,12 @@ def start(self, rusage: "Optional[resource._RUsage]"): with _bg_metrics_lock: _background_processes_active_since_last_scrape.add(self._proc) - def __exit__(self, type, value, traceback) -> None: + def __exit__( + self, + type: Optional[Type[BaseException]], + value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> None: """Log context has finished.""" super().__exit__(type, value, traceback) diff --git a/synapse/metrics/jemalloc.py b/synapse/metrics/jemalloc.py index 29ab6c0229df..98ed9c0829e8 100644 --- a/synapse/metrics/jemalloc.py +++ b/synapse/metrics/jemalloc.py @@ -16,14 +16,16 @@ import logging import os import re -from typing import Optional +from typing import Iterable, Optional + +from prometheus_client import Metric from synapse.metrics import REGISTRY, GaugeMetricFamily logger = logging.getLogger(__name__) -def _setup_jemalloc_stats(): +def _setup_jemalloc_stats() -> None: """Checks to see if jemalloc is loaded, and hooks up a collector to record statistics exposed by jemalloc. """ @@ -135,7 +137,7 @@ def _jemalloc_refresh_stats() -> None: class JemallocCollector: """Metrics for internal jemalloc stats.""" - def collect(self): + def collect(self) -> Iterable[Metric]: _jemalloc_refresh_stats() g = GaugeMetricFamily( @@ -185,7 +187,7 @@ def collect(self): logger.debug("Added jemalloc stats") -def setup_jemalloc_stats(): +def setup_jemalloc_stats() -> None: """Try to setup jemalloc stats, if jemalloc is loaded.""" try: diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 473812b8e295..662e60bc3394 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -24,14 +24,56 @@ List, Optional, Tuple, + TypeVar, + Union, ) +import attr import jinja2 from twisted.internet import defer -from twisted.web.resource import IResource +from twisted.web.resource import Resource +from synapse.api.errors import SynapseError from synapse.events import EventBase +from synapse.events.presence_router import ( + GET_INTERESTED_USERS_CALLBACK, + GET_USERS_FOR_STATES_CALLBACK, + PresenceRouter, +) +from synapse.events.spamcheck import ( + CHECK_EVENT_FOR_SPAM_CALLBACK, + CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK, + CHECK_REGISTRATION_FOR_SPAM_CALLBACK, + CHECK_USERNAME_FOR_SPAM_CALLBACK, + USER_MAY_CREATE_ROOM_ALIAS_CALLBACK, + USER_MAY_CREATE_ROOM_CALLBACK, + USER_MAY_CREATE_ROOM_WITH_INVITES_CALLBACK, + USER_MAY_INVITE_CALLBACK, + USER_MAY_JOIN_ROOM_CALLBACK, + USER_MAY_PUBLISH_ROOM_CALLBACK, + USER_MAY_SEND_3PID_INVITE_CALLBACK, +) +from synapse.events.third_party_rules import ( + CHECK_EVENT_ALLOWED_CALLBACK, + CHECK_THREEPID_CAN_BE_INVITED_CALLBACK, + CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK, + ON_CREATE_ROOM_CALLBACK, + ON_NEW_EVENT_CALLBACK, +) +from synapse.handlers.account_validity import ( + IS_USER_EXPIRED_CALLBACK, + ON_LEGACY_ADMIN_REQUEST, + ON_LEGACY_RENEW_CALLBACK, + ON_LEGACY_SEND_MAIL_CALLBACK, + ON_USER_REGISTRATION_CALLBACK, +) +from synapse.handlers.auth import ( + CHECK_3PID_AUTH_CALLBACK, + CHECK_AUTH_CALLBACK, + ON_LOGGED_OUT_CALLBACK, + AuthHandler, +) from synapse.http.client import SimpleHttpClient from synapse.http.server import ( DirectServeHtmlResource, @@ -40,23 +82,49 @@ ) from synapse.http.servlet import parse_json_object_from_request from synapse.http.site import SynapseRequest -from synapse.logging.context import make_deferred_yieldable, run_in_background +from synapse.logging.context import ( + defer_to_thread, + make_deferred_yieldable, + run_in_background, +) from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.rest.client.login import LoginResponse +from synapse.storage import DataStore +from synapse.storage.background_updates import ( + DEFAULT_BATCH_SIZE_CALLBACK, + MIN_BATCH_SIZE_CALLBACK, + ON_UPDATE_CALLBACK, +) from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.roommember import ProfileInfo from synapse.storage.state import StateFilter -from synapse.types import JsonDict, Requester, UserID, create_requester +from synapse.types import ( + DomainSpecificString, + JsonDict, + Requester, + StateMap, + UserID, + UserInfo, + create_requester, +) from synapse.util import Clock +from synapse.util.async_helpers import maybe_awaitable from synapse.util.caches.descriptors import cached if TYPE_CHECKING: + from synapse.app.generic_worker import GenericWorkerSlavedStore from synapse.server import HomeServer + +T = TypeVar("T") + """ This package defines the 'stable' API which can be used by extension modules which are loaded into Synapse. """ +PRESENCE_ALL_USERS = PresenceRouter.ALL_USERS + __all__ = [ "errors", "make_deferred_yieldable", @@ -70,37 +138,59 @@ "DirectServeHtmlResource", "DirectServeJsonResource", "ModuleApi", + "PRESENCE_ALL_USERS", + "LoginResponse", + "JsonDict", + "EventBase", + "StateMap", ] logger = logging.getLogger(__name__) +@attr.s(auto_attribs=True) +class UserIpAndAgent: + """ + An IP address and user agent used by a user to connect to this homeserver. + """ + + ip: str + user_agent: str + # The time at which this user agent/ip was last seen. + last_seen: int + + class ModuleApi: """A proxy object that gets passed to various plugin modules so they can register new users etc if necessary. """ - def __init__(self, hs: "HomeServer", auth_handler): + def __init__(self, hs: "HomeServer", auth_handler: AuthHandler) -> None: self._hs = hs - self._store = hs.get_datastore() + # TODO: Fix this type hint once the types for the data stores have been ironed + # out. + self._store: Union[DataStore, "GenericWorkerSlavedStore"] = hs.get_datastore() self._auth = hs.get_auth() self._auth_handler = auth_handler self._server_name = hs.hostname - self._presence_stream = hs.get_event_sources().sources["presence"] + self._presence_stream = hs.get_event_sources().sources.presence self._state = hs.get_state_handler() self._clock: Clock = hs.get_clock() self._send_email_handler = hs.get_send_email_handler() + self.custom_template_dir = hs.config.server.custom_template_directory try: - app_name = self._hs.config.email_app_name + app_name = self._hs.config.email.email_app_name - self._from_string = self._hs.config.email_notif_from % {"app": app_name} + self._from_string = self._hs.config.email.email_notif_from % { + "app": app_name + } except (KeyError, TypeError): # If substitution failed (which can happen if the string contains # placeholders other than just "app", or if the type of the placeholder is # not a string), fall back to the bare strings. - self._from_string = self._hs.config.email_notif_from + self._from_string = self._hs.config.email.email_notif_from self._raw_from = email.utils.parseaddr(self._from_string)[1] @@ -111,26 +201,145 @@ def __init__(self, hs: "HomeServer", auth_handler): self._spam_checker = hs.get_spam_checker() self._account_validity_handler = hs.get_account_validity_handler() self._third_party_event_rules = hs.get_third_party_event_rules() + self._password_auth_provider = hs.get_password_auth_provider() + self._presence_router = hs.get_presence_router() ################################################################################# # The following methods should only be called during the module's initialisation. - @property - def register_spam_checker_callbacks(self): - """Registers callbacks for spam checking capabilities.""" - return self._spam_checker.register_callbacks + def register_spam_checker_callbacks( + self, + check_event_for_spam: Optional[CHECK_EVENT_FOR_SPAM_CALLBACK] = None, + user_may_join_room: Optional[USER_MAY_JOIN_ROOM_CALLBACK] = None, + user_may_invite: Optional[USER_MAY_INVITE_CALLBACK] = None, + user_may_send_3pid_invite: Optional[USER_MAY_SEND_3PID_INVITE_CALLBACK] = None, + user_may_create_room: Optional[USER_MAY_CREATE_ROOM_CALLBACK] = None, + user_may_create_room_with_invites: Optional[ + USER_MAY_CREATE_ROOM_WITH_INVITES_CALLBACK + ] = None, + user_may_create_room_alias: Optional[ + USER_MAY_CREATE_ROOM_ALIAS_CALLBACK + ] = None, + user_may_publish_room: Optional[USER_MAY_PUBLISH_ROOM_CALLBACK] = None, + check_username_for_spam: Optional[CHECK_USERNAME_FOR_SPAM_CALLBACK] = None, + check_registration_for_spam: Optional[ + CHECK_REGISTRATION_FOR_SPAM_CALLBACK + ] = None, + check_media_file_for_spam: Optional[CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK] = None, + ) -> None: + """Registers callbacks for spam checking capabilities. + + Added in Synapse v1.37.0. + """ + return self._spam_checker.register_callbacks( + check_event_for_spam=check_event_for_spam, + user_may_join_room=user_may_join_room, + user_may_invite=user_may_invite, + user_may_send_3pid_invite=user_may_send_3pid_invite, + user_may_create_room=user_may_create_room, + user_may_create_room_with_invites=user_may_create_room_with_invites, + user_may_create_room_alias=user_may_create_room_alias, + user_may_publish_room=user_may_publish_room, + check_username_for_spam=check_username_for_spam, + check_registration_for_spam=check_registration_for_spam, + check_media_file_for_spam=check_media_file_for_spam, + ) - @property - def register_account_validity_callbacks(self): - """Registers callbacks for account validity capabilities.""" - return self._account_validity_handler.register_account_validity_callbacks + def register_account_validity_callbacks( + self, + is_user_expired: Optional[IS_USER_EXPIRED_CALLBACK] = None, + on_user_registration: Optional[ON_USER_REGISTRATION_CALLBACK] = None, + on_legacy_send_mail: Optional[ON_LEGACY_SEND_MAIL_CALLBACK] = None, + on_legacy_renew: Optional[ON_LEGACY_RENEW_CALLBACK] = None, + on_legacy_admin_request: Optional[ON_LEGACY_ADMIN_REQUEST] = None, + ) -> None: + """Registers callbacks for account validity capabilities. + + Added in Synapse v1.39.0. + """ + return self._account_validity_handler.register_account_validity_callbacks( + is_user_expired=is_user_expired, + on_user_registration=on_user_registration, + on_legacy_send_mail=on_legacy_send_mail, + on_legacy_renew=on_legacy_renew, + on_legacy_admin_request=on_legacy_admin_request, + ) - @property - def register_third_party_rules_callbacks(self): - """Registers callbacks for third party event rules capabilities.""" - return self._third_party_event_rules.register_third_party_rules_callbacks + def register_third_party_rules_callbacks( + self, + check_event_allowed: Optional[CHECK_EVENT_ALLOWED_CALLBACK] = None, + on_create_room: Optional[ON_CREATE_ROOM_CALLBACK] = None, + check_threepid_can_be_invited: Optional[ + CHECK_THREEPID_CAN_BE_INVITED_CALLBACK + ] = None, + check_visibility_can_be_modified: Optional[ + CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK + ] = None, + on_new_event: Optional[ON_NEW_EVENT_CALLBACK] = None, + ) -> None: + """Registers callbacks for third party event rules capabilities. + + Added in Synapse v1.39.0. + """ + return self._third_party_event_rules.register_third_party_rules_callbacks( + check_event_allowed=check_event_allowed, + on_create_room=on_create_room, + check_threepid_can_be_invited=check_threepid_can_be_invited, + check_visibility_can_be_modified=check_visibility_can_be_modified, + on_new_event=on_new_event, + ) + + def register_presence_router_callbacks( + self, + get_users_for_states: Optional[GET_USERS_FOR_STATES_CALLBACK] = None, + get_interested_users: Optional[GET_INTERESTED_USERS_CALLBACK] = None, + ) -> None: + """Registers callbacks for presence router capabilities. - def register_web_resource(self, path: str, resource: IResource): + Added in Synapse v1.42.0. + """ + return self._presence_router.register_presence_router_callbacks( + get_users_for_states=get_users_for_states, + get_interested_users=get_interested_users, + ) + + def register_password_auth_provider_callbacks( + self, + check_3pid_auth: Optional[CHECK_3PID_AUTH_CALLBACK] = None, + on_logged_out: Optional[ON_LOGGED_OUT_CALLBACK] = None, + auth_checkers: Optional[ + Dict[Tuple[str, Tuple[str, ...]], CHECK_AUTH_CALLBACK] + ] = None, + ) -> None: + """Registers callbacks for password auth provider capabilities. + + Added in Synapse v1.46.0. + """ + return self._password_auth_provider.register_password_auth_provider_callbacks( + check_3pid_auth=check_3pid_auth, + on_logged_out=on_logged_out, + auth_checkers=auth_checkers, + ) + + def register_background_update_controller_callbacks( + self, + on_update: ON_UPDATE_CALLBACK, + default_batch_size: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None, + min_batch_size: Optional[MIN_BATCH_SIZE_CALLBACK] = None, + ) -> None: + """Registers background update controller callbacks. + + Added in Synapse v1.49.0. + """ + + for db in self._hs.get_datastores().databases: + db.updates.register_update_controller_callbacks( + on_update=on_update, + default_batch_size=default_batch_size, + min_batch_size=min_batch_size, + ) + + def register_web_resource(self, path: str, resource: Resource) -> None: """Registers a web resource to be served at the given path. This function should be called during initialisation of the module. @@ -138,6 +347,8 @@ def register_web_resource(self, path: str, resource: IResource): If multiple modules register a resource for the same path, the module that appears the highest in the configuration file takes priority. + Added in Synapse v1.37.0. + Args: path: The path to register the resource for. resource: The resource to attach to this path. @@ -148,32 +359,54 @@ def register_web_resource(self, path: str, resource: IResource): # The following methods can be called by the module at any point in time. @property - def http_client(self): + def http_client(self) -> SimpleHttpClient: """Allows making outbound HTTP requests to remote resources. An instance of synapse.http.client.SimpleHttpClient + + Added in Synapse v1.22.0. """ return self._http_client @property - def public_room_list_manager(self): + def public_room_list_manager(self) -> "PublicRoomListManager": """Allows adding to, removing from and checking the status of rooms in the public room list. An instance of synapse.module_api.PublicRoomListManager + + Added in Synapse v1.22.0. """ return self._public_room_list_manager @property def public_baseurl(self) -> str: - """The configured public base URL for this homeserver.""" - return self._hs.config.public_baseurl + """The configured public base URL for this homeserver. + + Added in Synapse v1.39.0. + """ + return self._hs.config.server.public_baseurl @property def email_app_name(self) -> str: - """The application name configured in the homeserver's configuration.""" + """The application name configured in the homeserver's configuration. + + Added in Synapse v1.39.0. + """ return self._hs.config.email.email_app_name + async def get_userinfo_by_id(self, user_id: str) -> Optional[UserInfo]: + """Get user info by user_id + + Added in Synapse v1.41.0. + + Args: + user_id: Fully qualified user id. + Returns: + UserInfo object if a user was found, otherwise None + """ + return await self._store.get_userinfo_by_id(user_id) + async def get_user_by_req( self, req: SynapseRequest, @@ -182,6 +415,8 @@ async def get_user_by_req( ) -> Requester: """Check the access_token provided for a request + Added in Synapse v1.39.0. + Args: req: Incoming HTTP request allow_guest: True if guest users should be allowed. If this @@ -207,6 +442,8 @@ async def get_user_by_req( async def is_user_admin(self, user_id: str) -> bool: """Checks if a user is a server admin. + Added in Synapse v1.39.0. + Args: user_id: The Matrix ID of the user to check. @@ -215,17 +452,19 @@ async def is_user_admin(self, user_id: str) -> bool: """ return await self._store.is_server_admin(UserID.from_string(user_id)) - def get_qualified_user_id(self, username): + def get_qualified_user_id(self, username: str) -> str: """Qualify a user id, if necessary Takes a user id provided by the user and adds the @ and :domain to qualify it, if necessary + Added in Synapse v0.25.0. + Args: - username (str): provided user id + username: provided user id Returns: - str: qualified @user:id + qualified @user:id """ if username.startswith("@"): return username @@ -234,6 +473,8 @@ def get_qualified_user_id(self, username): async def get_profile_for_user(self, localpart: str) -> ProfileInfo: """Look up the profile info for the user with the given localpart. + Added in Synapse v1.39.0. + Args: localpart: The localpart to look up profile information for. @@ -246,6 +487,8 @@ async def get_threepids_for_user(self, user_id: str) -> List[Dict[str, str]]: """Look up the threepids (email addresses and phone numbers) associated with the given Matrix user ID. + Added in Synapse v1.39.0. + Args: user_id: The Matrix user ID to look up threepids for. @@ -257,20 +500,27 @@ async def get_threepids_for_user(self, user_id: str) -> List[Dict[str, str]]: """ return await self._store.user_get_threepids(user_id) - def check_user_exists(self, user_id): + def check_user_exists(self, user_id: str) -> "defer.Deferred[Optional[str]]": """Check if user exists. + Added in Synapse v0.25.0. + Args: - user_id (str): Complete @user:id + user_id: Complete @user:id Returns: - Deferred[str|None]: Canonical (case-corrected) user_id, or None + Canonical (case-corrected) user_id, or None if the user is not registered. """ return defer.ensureDeferred(self._auth_handler.check_user_exists(user_id)) @defer.inlineCallbacks - def register(self, localpart, displayname=None, emails: Optional[List[str]] = None): + def register( + self, + localpart: str, + displayname: Optional[str] = None, + emails: Optional[List[str]] = None, + ) -> Generator["defer.Deferred[Any]", Any, Tuple[str, str]]: """Registers a new user with given localpart and optional displayname, emails. Also returns an access token for the new user. @@ -279,13 +529,15 @@ def register(self, localpart, displayname=None, emails: Optional[List[str]] = No return that device to the user. Prefer separate calls to register_user and register_device. + Added in Synapse v0.25.0. + Args: - localpart (str): The localpart of the new user. - displayname (str|None): The displayname of the new user. - emails (List[str]): Emails to bind to the new user. + localpart: The localpart of the new user. + displayname: The displayname of the new user. + emails: Emails to bind to the new user. Returns: - Deferred[tuple[str, str]]: a 2-tuple of (user_id, access_token) + a 2-tuple of (user_id, access_token) """ logger.warning( "Using deprecated ModuleApi.register which creates a dummy user device." @@ -295,21 +547,26 @@ def register(self, localpart, displayname=None, emails: Optional[List[str]] = No return user_id, access_token def register_user( - self, localpart, displayname=None, emails: Optional[List[str]] = None - ): + self, + localpart: str, + displayname: Optional[str] = None, + emails: Optional[List[str]] = None, + ) -> "defer.Deferred[str]": """Registers a new user with given localpart and optional displayname, emails. + Added in Synapse v1.2.0. + Args: - localpart (str): The localpart of the new user. - displayname (str|None): The displayname of the new user. - emails (List[str]): Emails to bind to the new user. + localpart: The localpart of the new user. + displayname: The displayname of the new user. + emails: Emails to bind to the new user. Raises: SynapseError if there is an error performing the registration. Check the 'errcode' property for more information on the reason for failure Returns: - defer.Deferred[str]: user_id + user_id """ return defer.ensureDeferred( self._hs.get_registration_handler().register_user( @@ -319,18 +576,25 @@ def register_user( ) ) - def register_device(self, user_id, device_id=None, initial_display_name=None): + def register_device( + self, + user_id: str, + device_id: Optional[str] = None, + initial_display_name: Optional[str] = None, + ) -> "defer.Deferred[Tuple[str, str, Optional[int], Optional[str]]]": """Register a device for a user and generate an access token. + Added in Synapse v1.2.0. + Args: - user_id (str): full canonical @user:id - device_id (str|None): The device ID to check, or None to generate + user_id: full canonical @user:id + device_id: The device ID to check, or None to generate a new one. - initial_display_name (str|None): An optional display name for the + initial_display_name: An optional display name for the device. Returns: - defer.Deferred[tuple[str, str]]: Tuple of device ID and access token + Tuple of device ID, access token, access token expiration time and refresh token """ return defer.ensureDeferred( self._hs.get_registration_handler().register_device( @@ -345,6 +609,8 @@ def record_user_external_id( ) -> defer.Deferred: """Record a mapping from an external user id to a mxid + Added in Synapse v1.9.0. + Args: auth_provider: identifier for the remote auth provider external_id: id on that system @@ -361,9 +627,12 @@ def generate_short_term_login_token( user_id: str, duration_in_ms: int = (2 * 60 * 1000), auth_provider_id: str = "", + auth_provider_session_id: Optional[str] = None, ) -> str: """Generate a login token suitable for m.login.token authentication + Added in Synapse v1.9.0. + Args: user_id: gives the ID of the user that the token is for @@ -376,13 +645,18 @@ def generate_short_term_login_token( return self._hs.get_macaroon_generator().generate_short_term_login_token( user_id, auth_provider_id, + auth_provider_session_id, duration_in_ms, ) @defer.inlineCallbacks - def invalidate_access_token(self, access_token): + def invalidate_access_token( + self, access_token: str + ) -> Generator["defer.Deferred[Any]", Any, None]: """Invalidate an access token for a user + Added in Synapse v0.25.0. + Args: access_token(str): access token @@ -410,12 +684,20 @@ def invalidate_access_token(self, access_token): self._auth_handler.delete_access_token(access_token) ) - def run_db_interaction(self, desc, func, *args, **kwargs): + def run_db_interaction( + self, + desc: str, + func: Callable[..., T], + *args: Any, + **kwargs: Any, + ) -> "defer.Deferred[T]": """Run a function with a database connection + Added in Synapse v0.25.0. + Args: - desc (str): description for the transaction, for metrics etc - func (func): function to be run. Passed a database cursor object + desc: description for the transaction, for metrics etc + func: function to be run. Passed a database cursor object as well as *args and **kwargs *args: positional args to be passed to func **kwargs: named args to be passed to func @@ -429,13 +711,15 @@ def run_db_interaction(self, desc, func, *args, **kwargs): def complete_sso_login( self, registered_user_id: str, request: SynapseRequest, client_redirect_url: str - ): + ) -> None: """Complete a SSO login by redirecting the user to a page to confirm whether they want their access token sent to `client_redirect_url`, or redirect them to that URL with a token directly if the URL matches with one of the whitelisted clients. This is deprecated in favor of complete_sso_login_async. + Added in Synapse v1.11.1. + Args: registered_user_id: The MXID that has been registered as a previous step of of this SSO login. @@ -457,11 +741,13 @@ async def complete_sso_login_async( client_redirect_url: str, new_user: bool = False, auth_provider_id: str = "", - ): + ) -> None: """Complete a SSO login by redirecting the user to a page to confirm whether they want their access token sent to `client_redirect_url`, or redirect them to that URL with a token directly if the URL matches with one of the whitelisted clients. + Added in Synapse v1.13.0. + Args: registered_user_id: The MXID that has been registered as a previous step of of this SSO login. @@ -490,6 +776,8 @@ def get_state_events_in_room( (This is exposed for compatibility with the old SpamCheckerApi. We should probably deprecate it and replace it with an async method in a subclass.) + Added in Synapse v1.22.0. + Args: room_id: The room ID to get state events in. types: The event type and state key (using None @@ -507,8 +795,107 @@ def get_state_events_in_room( state = yield defer.ensureDeferred(self._store.get_events(state_ids.values())) return state.values() + async def update_room_membership( + self, + sender: str, + target: str, + room_id: str, + new_membership: str, + content: Optional[JsonDict] = None, + ) -> EventBase: + """Updates the membership of a user to the given value. + + Added in Synapse v1.46.0. + + Args: + sender: The user performing the membership change. Must be a user local to + this homeserver. + target: The user whose membership is changing. This is often the same value + as `sender`, but it might differ in some cases (e.g. when kicking a user, + the `sender` is the user performing the kick and the `target` is the user + being kicked). + room_id: The room in which to change the membership. + new_membership: The new membership state of `target` after this operation. See + https://spec.matrix.org/unstable/client-server-api/#mroommember for the + list of allowed values. + content: Additional values to include in the resulting event's content. + + Returns: + The newly created membership event. + + Raises: + RuntimeError if the `sender` isn't a local user. + ShadowBanError if a shadow-banned requester attempts to send an invite. + SynapseError if the module attempts to send a membership event that isn't + allowed, either by the server's configuration (e.g. trying to set a + per-room display name that's too long) or by the validation rules around + membership updates (e.g. the `membership` value is invalid). + """ + if not self.is_mine(sender): + raise RuntimeError( + "Tried to send an event as a user that isn't local to this homeserver", + ) + + requester = create_requester(sender) + target_user_id = UserID.from_string(target) + + if content is None: + content = {} + + # Set the profile if not already done by the module. + if "avatar_url" not in content or "displayname" not in content: + try: + # Try to fetch the user's profile. + profile = await self._hs.get_profile_handler().get_profile( + target_user_id.to_string(), + ) + except SynapseError as e: + # If the profile couldn't be found, use default values. + profile = { + "displayname": target_user_id.localpart, + "avatar_url": None, + } + + if e.code != 404: + # If the error isn't 404, it means we tried to fetch the profile over + # federation but the remote server responded with a non-standard + # status code. + logger.error( + "Got non-404 error status when fetching profile for %s", + target_user_id.to_string(), + ) + + # Set the profile where it needs to be set. + if "avatar_url" not in content: + content["avatar_url"] = profile["avatar_url"] + + if "displayname" not in content: + content["displayname"] = profile["displayname"] + + event_id, _ = await self._hs.get_room_member_handler().update_membership( + requester=requester, + target=target_user_id, + room_id=room_id, + action=new_membership, + content=content, + ) + + # Try to retrieve the resulting event. + event = await self._hs.get_datastore().get_event(event_id) + + # update_membership is supposed to always return after the event has been + # successfully persisted. + assert event is not None + + return event + async def create_and_send_event_into_room(self, event_dict: JsonDict) -> EventBase: - """Create and send an event into a room. Membership events are currently not supported. + """Create and send an event into a room. + + Membership events are not supported by this method. To update a user's membership + in a room, please use the `update_room_membership` method instead. + + Added in Synapse v1.22.0. Args: event_dict: A dictionary representing the event to send. @@ -550,6 +937,8 @@ async def send_local_online_presence_to(self, users: Iterable[str]) -> None: Note that this method can only be run on the process that is configured to write to the presence stream. By default this is the main process. + + Added in Synapse v1.32.0. """ if self._hs._instance_name not in self._hs.config.worker.writers.presence: raise Exception( @@ -591,14 +980,21 @@ def looping_background_call( self, f: Callable, msec: float, - *args, + *args: object, desc: Optional[str] = None, - **kwargs, - ): + run_on_all_instances: bool = False, + **kwargs: object, + ) -> None: """Wraps a function as a background process and calls it repeatedly. + NOTE: Will only run on the instance that is configured to run + background processes (which is the main process by default), unless + `run_on_all_workers` is set. + Waits `msec` initially before calling `f` for the first time. + Added in Synapse v1.39.0. + Args: f: The function to call repeatedly. f can be either synchronous or asynchronous, and must follow Synapse's logcontext rules. @@ -607,19 +1003,19 @@ def looping_background_call( msec: How long to wait between calls in milliseconds. *args: Positional arguments to pass to function. desc: The background task's description. Default to the function's name. + run_on_all_instances: Whether to run this on all instances, rather + than just the instance configured to run background tasks. **kwargs: Key arguments to pass to function. """ if desc is None: desc = f.__name__ - if self._hs.config.run_background_tasks: + if self._hs.config.worker.run_background_tasks or run_on_all_instances: self._clock.looping_call( run_as_background_process, msec, desc, - f, - *args, - **kwargs, + lambda: maybe_awaitable(f(*args, **kwargs)), ) else: logger.warning( @@ -627,15 +1023,22 @@ def looping_background_call( f, ) + async def sleep(self, seconds: float) -> None: + """Sleeps for the given number of seconds.""" + + await self._clock.sleep(seconds) + async def send_mail( self, recipient: str, subject: str, html: str, text: str, - ): + ) -> None: """Send an email on behalf of the homeserver. + Added in Synapse v1.39.0. + Args: recipient: The email address for the recipient. subject: The email's subject. @@ -659,6 +1062,8 @@ def read_templates( By default, Synapse will look for these templates in its configured template directory, but another directory to search in can be provided. + Added in Synapse v1.39.0. + Args: filenames: The name of the template files to look for. custom_template_directory: An additional directory to look for the files in. @@ -667,7 +1072,135 @@ def read_templates( A list containing the loaded templates, with the orders matching the one of the filenames parameter. """ - return self._hs.config.read_templates(filenames, custom_template_directory) + return self._hs.config.server.read_templates( + filenames, + (td for td in (self.custom_template_dir, custom_template_directory) if td), + ) + + def is_mine(self, id: Union[str, DomainSpecificString]) -> bool: + """ + Checks whether an ID (user id, room, ...) comes from this homeserver. + + Added in Synapse v1.44.0. + + Args: + id: any Matrix id (e.g. user id, room id, ...), either as a raw id, + e.g. string "@user:example.com" or as a parsed UserID, RoomID, ... + Returns: + True if id comes from this homeserver, False otherwise. + """ + if isinstance(id, DomainSpecificString): + return self._hs.is_mine(id) + else: + return self._hs.is_mine_id(id) + + async def get_user_ip_and_agents( + self, user_id: str, since_ts: int = 0 + ) -> List[UserIpAndAgent]: + """ + Return the list of user IPs and agents for a user. + + Added in Synapse v1.44.0. + + Args: + user_id: the id of a user, local or remote + since_ts: a timestamp in seconds since the epoch, + or the epoch itself if not specified. + Returns: + The list of all UserIpAndAgent that the user has + used to connect to this homeserver since `since_ts`. + If the user is remote, this list is empty. + """ + # Don't hit the db if this is not a local user. + is_mine = False + try: + # Let's be defensive against ill-formed strings. + if self.is_mine(user_id): + is_mine = True + except Exception: + pass + + if is_mine: + raw_data = await self._store.get_user_ip_and_agents( + UserID.from_string(user_id), since_ts + ) + # Sanitize some of the data. We don't want to return tokens. + return [ + UserIpAndAgent( + ip=data["ip"], + user_agent=data["user_agent"], + last_seen=data["last_seen"], + ) + for data in raw_data + ] + else: + return [] + + async def get_room_state( + self, + room_id: str, + event_filter: Optional[Iterable[Tuple[str, Optional[str]]]] = None, + ) -> StateMap[EventBase]: + """Returns the current state of the given room. + + The events are returned as a mapping, in which the key for each event is a tuple + which first element is the event's type and the second one is its state key. + + Added in Synapse v1.47.0 + + Args: + room_id: The ID of the room to get state from. + event_filter: A filter to apply when retrieving events. None if no filter + should be applied. If provided, must be an iterable of tuples. A tuple's + first element is the event type and the second is the state key, or is + None if the state key should not be filtered on. + An example of a filter is: + [ + ("m.room.member", "@alice:example.com"), # Member event for @alice:example.com + ("org.matrix.some_event", ""), # State event of type "org.matrix.some_event" + # with an empty string as its state key + ("org.matrix.some_other_event", None), # State events of type "org.matrix.some_other_event" + # regardless of their state key + ] + """ + if event_filter: + # If a filter was provided, turn it into a StateFilter and retrieve a filtered + # view of the state. + state_filter = StateFilter.from_types(event_filter) + state_ids = await self._store.get_filtered_current_state_ids( + room_id, + state_filter, + ) + else: + # If no filter was provided, get the whole state. We could also reuse the call + # to get_filtered_current_state_ids above, with `state_filter = StateFilter.all()`, + # but get_filtered_current_state_ids isn't cached and `get_current_state_ids` + # is, so using the latter when we can is better for perf. + state_ids = await self._store.get_current_state_ids(room_id) + + state_events = await self._store.get_events(state_ids.values()) + + return {key: state_events[event_id] for key, event_id in state_ids.items()} + + async def defer_to_thread( + self, + f: Callable[..., T], + *args: Any, + **kwargs: Any, + ) -> T: + """Runs the given function in a separate thread from Synapse's thread pool. + + Added in Synapse v1.49.0. + + Args: + f: The function to run. + args: The function's arguments. + kwargs: The function's keyword arguments. + + Returns: + The return value of the function once ran in a thread. + """ + return await defer_to_thread(self._hs.get_reactor(), f, *args, **kwargs) class PublicRoomListManager: @@ -681,6 +1214,8 @@ def __init__(self, hs: "HomeServer"): async def room_is_in_public_room_list(self, room_id: str) -> bool: """Checks whether a room is in the public room list. + Added in Synapse v1.22.0. + Args: room_id: The ID of the room. @@ -697,6 +1232,8 @@ async def room_is_in_public_room_list(self, room_id: str) -> bool: async def add_room_to_public_room_list(self, room_id: str) -> None: """Publishes a room to the public room list. + Added in Synapse v1.22.0. + Args: room_id: The ID of the room. """ @@ -705,6 +1242,8 @@ async def add_room_to_public_room_list(self, room_id: str) -> None: async def remove_room_from_public_room_list(self, room_id: str) -> None: """Removes a room from the public room list. + Added in Synapse v1.22.0. + Args: room_id: The ID of the room. """ diff --git a/synapse/module_api/errors.py b/synapse/module_api/errors.py index 98ea911a8195..1db900e41f64 100644 --- a/synapse/module_api/errors.py +++ b/synapse/module_api/errors.py @@ -14,9 +14,16 @@ """Exception types which are exposed as part of the stable module API""" -from synapse.api.errors import ( # noqa: F401 +from synapse.api.errors import ( InvalidClientCredentialsError, RedirectException, SynapseError, ) -from synapse.config._base import ConfigError # noqa: F401 +from synapse.config._base import ConfigError + +__all__ = [ + "InvalidClientCredentialsError", + "RedirectException", + "SynapseError", + "ConfigError", +] diff --git a/synapse/notifier.py b/synapse/notifier.py index bbe337949ac5..60e5409895e6 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -220,6 +220,8 @@ def __init__(self, hs: "synapse.server.HomeServer"): # down. self.remote_server_up_callbacks: List[Callable[[str], None]] = [] + self._third_party_rules = hs.get_third_party_event_rules() + self.clock = hs.get_clock() self.appservice_handler = hs.get_application_service_handler() self._pusher_pool = hs.get_pusherpool() @@ -267,7 +269,7 @@ def add_replication_callback(self, cb: Callable[[], None]): """ self.replication_callbacks.append(cb) - def on_new_room_event( + async def on_new_room_event( self, event: EventBase, event_pos: PersistedEventPosition, @@ -275,9 +277,10 @@ def on_new_room_event( extra_users: Optional[Collection[UserID]] = None, ): """Unwraps event and calls `on_new_room_event_args`.""" - self.on_new_room_event_args( + await self.on_new_room_event_args( event_pos=event_pos, room_id=event.room_id, + event_id=event.event_id, event_type=event.type, state_key=event.get("state_key"), membership=event.content.get("membership"), @@ -285,9 +288,10 @@ def on_new_room_event( extra_users=extra_users or [], ) - def on_new_room_event_args( + async def on_new_room_event_args( self, room_id: str, + event_id: str, event_type: str, state_key: Optional[str], membership: Optional[str], @@ -302,7 +306,10 @@ def on_new_room_event_args( listening to the room, and any listeners for the users in the `extra_users` param. - The events can be peristed out of order. The notifier will wait + This also notifies modules listening on new events via the + `on_new_event` callback. + + The events can be persisted out of order. The notifier will wait until all previous events have been persisted before notifying the client streams. """ @@ -318,6 +325,8 @@ def on_new_room_event_args( ) self._notify_pending_new_room_events(max_room_stream_token) + await self._third_party_rules.on_new_event(event_id) + self.notify_replication() def _notify_pending_new_room_events(self, max_room_stream_token: RoomStreamToken): @@ -374,22 +383,6 @@ def _notify_app_services(self, max_room_stream_token: RoomStreamToken): except Exception: logger.exception("Error notifying application services of event") - def _notify_app_services_ephemeral( - self, - stream_key: str, - new_token: Union[int, RoomStreamToken], - users: Optional[Collection[Union[str, UserID]]] = None, - ): - try: - stream_token = None - if isinstance(new_token, int): - stream_token = new_token - self.appservice_handler.notify_interested_services_ephemeral( - stream_key, stream_token, users or [] - ) - except Exception: - logger.exception("Error notifying application services of event") - def _notify_pusher_pool(self, max_room_stream_token: RoomStreamToken): try: self._pusher_pool.on_new_notifications(max_room_stream_token) @@ -402,10 +395,17 @@ def on_new_event( new_token: Union[int, RoomStreamToken], users: Optional[Collection[Union[str, UserID]]] = None, rooms: Optional[Collection[str]] = None, - ): + ) -> None: """Used to inform listeners that something has happened event wise. Will wake up all listeners for the given users and rooms. + + Args: + stream_key: The stream the event came from. + new_token: The value of the new stream token. + users: The users that should be informed of the new event. + rooms: A collection of room IDs for which each joined member will be + informed of the new event. """ users = users or [] rooms = rooms or [] @@ -444,12 +444,15 @@ def on_new_event( self.notify_replication() - # Notify appservices - self._notify_app_services_ephemeral( - stream_key, - new_token, - users, - ) + # Notify appservices. + try: + self.appservice_handler.notify_interested_services_ephemeral( + stream_key, + new_token, + users, + ) + except Exception: + logger.exception("Error notifying application services of event") def on_new_replication_data(self) -> None: """Used to inform replication listeners that something has happened @@ -584,7 +587,7 @@ async def check_for_updates( events: List[EventBase] = [] end_token = from_token - for name, source in self.event_sources.sources.items(): + for name, source in self.event_sources.sources.get_sources(): keyname = "%s_key" % name before_id = getattr(before_token, keyname) after_id = getattr(after_token, keyname) diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index 2c23afe8e3ab..820f6f3f7ec0 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -94,7 +94,7 @@ def on_new_notifications(self, max_token: RoomStreamToken) -> None: self._start_processing() @abc.abstractmethod - def _start_processing(self): + def _start_processing(self) -> None: """Start processing push notifications.""" raise NotImplementedError() diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index c337e530d3cf..009d8e77b05b 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -232,6 +232,8 @@ async def action_for_event_by_user( # that user, as they might not be already joined. if event.type == EventTypes.Member and event.state_key == uid: display_name = event.content.get("displayname", None) + if not isinstance(display_name, str): + display_name = None if count_as_unread: # Add an element for the current user if the event needs to be marked as @@ -268,7 +270,7 @@ def _condition_checker( evaluator: PushRuleEvaluatorForEvent, conditions: List[dict], uid: str, - display_name: str, + display_name: Optional[str], cache: Dict[str, bool], ) -> bool: for cond in conditions: @@ -290,6 +292,12 @@ def _condition_checker( return True +MemberMap = Dict[str, Tuple[str, str]] +Rule = Dict[str, dict] +RulesByUser = Dict[str, List[Rule]] +StateGroup = Union[object, int] + + @attr.s(slots=True) class RulesForRoomData: """The data stored in the cache by `RulesForRoom`. @@ -299,16 +307,16 @@ class RulesForRoomData: """ # event_id -> (user_id, state) - member_map = attr.ib(type=Dict[str, Tuple[str, str]], factory=dict) + member_map = attr.ib(type=MemberMap, factory=dict) # user_id -> rules - rules_by_user = attr.ib(type=Dict[str, List[Dict[str, dict]]], factory=dict) + rules_by_user = attr.ib(type=RulesByUser, factory=dict) # The last state group we updated the caches for. If the state_group of # a new event comes along, we know that we can just return the cached # result. # On invalidation of the rules themselves (if the user changes them), # we invalidate everything and set state_group to `object()` - state_group = attr.ib(type=Union[object, int], factory=object) + state_group = attr.ib(type=StateGroup, factory=object) # A sequence number to keep track of when we're allowed to update the # cache. We bump the sequence number when we invalidate the cache. If @@ -532,7 +540,13 @@ async def _update_rules_with_member_event_ids( self.update_cache(sequence, members, ret_rules_by_user, state_group) - def update_cache(self, sequence, members, rules_by_user, state_group) -> None: + def update_cache( + self, + sequence: int, + members: MemberMap, + rules_by_user: RulesByUser, + state_group: StateGroup, + ) -> None: if sequence == self.data.sequence: self.data.member_map.update(members) self.data.rules_by_user = rules_by_user diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py index 1fc9716a3422..c5708cd8885b 100644 --- a/synapse/push/clientformat.py +++ b/synapse/push/clientformat.py @@ -19,7 +19,9 @@ from synapse.types import UserID -def format_push_rules_for_user(user: UserID, ruleslist) -> Dict[str, Dict[str, list]]: +def format_push_rules_for_user( + user: UserID, ruleslist: List +) -> Dict[str, Dict[str, list]]: """Converts a list of rawrules and a enabled map into nested dictionaries to match the Matrix client-server format for push rules""" diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index e08e125cb8a5..4f13c0418ab9 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -21,6 +21,8 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.push import Pusher, PusherConfig, PusherConfigException, ThrottleParams from synapse.push.mailer import Mailer +from synapse.push.push_types import EmailReason +from synapse.storage.databases.main.event_push_actions import EmailPushAction from synapse.util.threepids import validate_email if TYPE_CHECKING: @@ -184,13 +186,13 @@ async def _unsafe_process(self) -> None: should_notify_at = max(notif_ready_at, room_ready_at) - if should_notify_at < self.clock.time_msec(): + if should_notify_at <= self.clock.time_msec(): # one of our notifications is ready for sending, so we send # *one* email updating the user on their notifications, # we then consider all previously outstanding notifications # to be delivered. - reason = { + reason: EmailReason = { "room_id": push_action["room_id"], "now": self.clock.time_msec(), "received_at": received_at, @@ -275,7 +277,7 @@ def room_ready_to_notify_at(self, room_id: str) -> int: return may_send_at async def sent_notif_update_throttle( - self, room_id: str, notified_push_action: dict + self, room_id: str, notified_push_action: EmailPushAction ) -> None: # We have sent a notification, so update the throttle accordingly. # If the event that triggered the notif happened more than @@ -315,7 +317,9 @@ async def sent_notif_update_throttle( self.pusher_id, room_id, self.throttle_params[room_id] ) - async def send_notification(self, push_actions: List[dict], reason: dict) -> None: + async def send_notification( + self, push_actions: List[EmailPushAction], reason: EmailReason + ) -> None: logger.info("Sending notif email for user %r", self.user_id) await self.mailer.send_notification_mail( diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 36aabd84220b..3fa603ccb7f7 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -26,6 +26,7 @@ from synapse.logging import opentracing from synapse.metrics.background_process_metrics import run_as_background_process from synapse.push import Pusher, PusherConfig, PusherConfigException +from synapse.storage.databases.main.event_push_actions import HttpPushAction from . import push_rule_evaluator, push_tools @@ -73,7 +74,9 @@ def __init__(self, hs: "HomeServer", pusher_config: PusherConfig): self.failing_since = pusher_config.failing_since self.timed_call: Optional[IDelayedCall] = None self._is_processing = False - self._group_unread_count_by_room = hs.config.push_group_unread_count_by_room + self._group_unread_count_by_room = ( + hs.config.push.push_group_unread_count_by_room + ) self._pusherpool = hs.get_pusherpool() self.data = pusher_config.data @@ -271,7 +274,7 @@ async def _unsafe_process(self) -> None: ) break - async def _process_one(self, push_action: dict) -> bool: + async def _process_one(self, push_action: HttpPushAction) -> bool: if "notify" not in push_action["actions"]: return True @@ -365,7 +368,7 @@ async def _build_notification_dict( if event.type == "m.room.member" and event.is_state(): d["notification"]["membership"] = event.content["membership"] d["notification"]["user_is_target"] = event.state_key == self.user_id - if self.hs.config.push_include_content and event.content: + if self.hs.config.push.push_include_content and event.content: d["notification"]["content"] = event.content # We no longer send aliases separately, instead, we send the human @@ -401,10 +404,10 @@ async def dispatch_push( rejected = resp["rejected"] return rejected - async def _send_badge(self, badge): + async def _send_badge(self, badge: int) -> None: """ Args: - badge (int): number of unread messages + badge: number of unread messages """ logger.debug("Sending updated badge count %d to %s", badge, self.name) d = { diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 941fb238b798..ba4f866487ec 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -14,7 +14,7 @@ import logging import urllib.parse -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, TypeVar +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, TypeVar import bleach import jinja2 @@ -28,6 +28,14 @@ descriptor_from_member_events, name_from_member_event, ) +from synapse.push.push_types import ( + EmailReason, + MessageVars, + NotifVars, + RoomVars, + TemplateVars, +) +from synapse.storage.databases.main.event_push_actions import EmailPushAction from synapse.storage.state import StateFilter from synapse.types import StateMap, UserID from synapse.util.async_helpers import concurrently_execute @@ -110,7 +118,7 @@ def __init__( self.state_handler = self.hs.get_state_handler() self.storage = hs.get_storage() self.app_name = app_name - self.email_subjects: EmailSubjectConfig = hs.config.email_subjects + self.email_subjects: EmailSubjectConfig = hs.config.email.email_subjects logger.info("Created Mailer for app_name %s" % app_name) @@ -130,17 +138,17 @@ async def send_password_reset_mail( """ params = {"token": token, "client_secret": client_secret, "sid": sid} link = ( - self.hs.config.public_baseurl + self.hs.config.server.public_baseurl + "_synapse/client/password_reset/email/submit_token?%s" % urllib.parse.urlencode(params) ) - template_vars = {"link": link} + template_vars: TemplateVars = {"link": link} await self.send_email( email_address, self.email_subjects.password_reset - % {"server_name": self.hs.config.server_name}, + % {"server_name": self.hs.config.server.server_name}, template_vars, ) @@ -160,17 +168,17 @@ async def send_registration_mail( """ params = {"token": token, "client_secret": client_secret, "sid": sid} link = ( - self.hs.config.public_baseurl + self.hs.config.server.public_baseurl + "_matrix/client/unstable/registration/email/submit_token?%s" % urllib.parse.urlencode(params) ) - template_vars = {"link": link} + template_vars: TemplateVars = {"link": link} await self.send_email( email_address, self.email_subjects.email_validation - % {"server_name": self.hs.config.server_name}, + % {"server_name": self.hs.config.server.server_name}, template_vars, ) @@ -191,17 +199,17 @@ async def send_add_threepid_mail( """ params = {"token": token, "client_secret": client_secret, "sid": sid} link = ( - self.hs.config.public_baseurl + self.hs.config.server.public_baseurl + "_matrix/client/unstable/add_threepid/email/submit_token?%s" % urllib.parse.urlencode(params) ) - template_vars = {"link": link} + template_vars: TemplateVars = {"link": link} await self.send_email( email_address, self.email_subjects.email_validation - % {"server_name": self.hs.config.server_name}, + % {"server_name": self.hs.config.server.server_name}, template_vars, ) @@ -210,8 +218,8 @@ async def send_notification_mail( app_id: str, user_id: str, email_address: str, - push_actions: Iterable[Dict[str, Any]], - reason: Dict[str, Any], + push_actions: Iterable[EmailPushAction], + reason: EmailReason, ) -> None: """ Send email regarding a user's room notifications @@ -230,7 +238,7 @@ async def send_notification_mail( [pa["event_id"] for pa in push_actions] ) - notifs_by_room: Dict[str, List[Dict[str, Any]]] = {} + notifs_by_room: Dict[str, List[EmailPushAction]] = {} for pa in push_actions: notifs_by_room.setdefault(pa["room_id"], []).append(pa) @@ -258,7 +266,7 @@ async def _fetch_room_state(room_id: str) -> None: # actually sort our so-called rooms_in_order list, most recent room first rooms_in_order.sort(key=lambda r: -(notifs_by_room[r][-1]["received_ts"] or 0)) - rooms = [] + rooms: List[RoomVars] = [] for r in rooms_in_order: roomvars = await self._get_room_vars( @@ -289,7 +297,7 @@ async def _fetch_room_state(room_id: str) -> None: notifs_by_room, state_by_room, notif_events, reason ) - template_vars = { + template_vars: TemplateVars = { "user_display_name": user_display_name, "unsubscribe_link": self._make_unsubscribe_link( user_id, app_id, email_address @@ -302,10 +310,10 @@ async def _fetch_room_state(room_id: str) -> None: await self.send_email(email_address, summary_text, template_vars) async def send_email( - self, email_address: str, subject: str, extra_template_vars: Dict[str, Any] + self, email_address: str, subject: str, extra_template_vars: TemplateVars ) -> None: """Send an email with the given information and template text""" - template_vars = { + template_vars: TemplateVars = { "app_name": self.app_name, "server_name": self.hs.config.server.server_name, } @@ -327,10 +335,10 @@ async def _get_room_vars( self, room_id: str, user_id: str, - notifs: Iterable[Dict[str, Any]], + notifs: Iterable[EmailPushAction], notif_events: Dict[str, EventBase], room_state_ids: StateMap[str], - ) -> Dict[str, Any]: + ) -> RoomVars: """ Generate the variables for notifications on a per-room basis. @@ -356,12 +364,13 @@ async def _get_room_vars( room_name = await calculate_room_name(self.store, room_state_ids, user_id) - room_vars: Dict[str, Any] = { + room_vars: RoomVars = { "title": room_name, "hash": string_ordinal_total(room_id), # See sender avatar hash "notifs": [], "invite": is_invite, "link": self._make_room_link(room_id), + "avatar_url": await self._get_room_avatar(room_state_ids), } if not is_invite: @@ -393,13 +402,34 @@ async def _get_room_vars( return room_vars + async def _get_room_avatar( + self, + room_state_ids: StateMap[str], + ) -> Optional[str]: + """ + Retrieve the avatar url for this room---if it exists. + + Args: + room_state_ids: The event IDs of the current room state. + + Returns: + room's avatar url if it's present and a string; otherwise None. + """ + event_id = room_state_ids.get((EventTypes.RoomAvatar, "")) + if event_id: + ev = await self.store.get_event(event_id) + url = ev.content.get("url") + if isinstance(url, str): + return url + return None + async def _get_notif_vars( self, - notif: Dict[str, Any], + notif: EmailPushAction, user_id: str, notif_event: EventBase, room_state_ids: StateMap[str], - ) -> Dict[str, Any]: + ) -> NotifVars: """ Generate the variables for a single notification. @@ -420,7 +450,7 @@ async def _get_notif_vars( after_limit=CONTEXT_AFTER, ) - ret = { + ret: NotifVars = { "link": self._make_notif_link(notif), "ts": notif["received_ts"], "messages": [], @@ -439,8 +469,8 @@ async def _get_notif_vars( return ret async def _get_message_vars( - self, notif: Dict[str, Any], event: EventBase, room_state_ids: StateMap[str] - ) -> Optional[Dict[str, Any]]: + self, notif: EmailPushAction, event: EventBase, room_state_ids: StateMap[str] + ) -> Optional[MessageVars]: """ Generate the variables for a single event, if possible. @@ -472,7 +502,9 @@ async def _get_message_vars( if sender_state_event: sender_name = name_from_member_event(sender_state_event) - sender_avatar_url = sender_state_event.content.get("avatar_url") + sender_avatar_url: Optional[str] = sender_state_event.content.get( + "avatar_url" + ) else: # No state could be found, fallback to the MXID. sender_name = event.sender @@ -482,7 +514,7 @@ async def _get_message_vars( # sender_hash % the number of default images to choose from sender_hash = string_ordinal_total(event.sender) - ret = { + ret: MessageVars = { "event_type": event.type, "is_historical": event.event_id != notif["event_id"], "id": event.event_id, @@ -497,6 +529,8 @@ async def _get_message_vars( return ret msgtype = event.content.get("msgtype") + if not isinstance(msgtype, str): + msgtype = None ret["msgtype"] = msgtype @@ -511,7 +545,7 @@ async def _get_message_vars( return ret def _add_text_message_vars( - self, messagevars: Dict[str, Any], event: EventBase + self, messagevars: MessageVars, event: EventBase ) -> None: """ Potentially add a sanitised message body to the message variables. @@ -521,8 +555,8 @@ def _add_text_message_vars( event: The event under consideration. """ msgformat = event.content.get("format") - - messagevars["format"] = msgformat + if not isinstance(msgformat, str): + msgformat = None formatted_body = event.content.get("formatted_body") body = event.content.get("body") @@ -533,7 +567,7 @@ def _add_text_message_vars( messagevars["body_text_html"] = safe_text(body) def _add_image_message_vars( - self, messagevars: Dict[str, Any], event: EventBase + self, messagevars: MessageVars, event: EventBase ) -> None: """ Potentially add an image URL to the message variables. @@ -548,7 +582,7 @@ def _add_image_message_vars( async def _make_summary_text_single_room( self, room_id: str, - notifs: List[Dict[str, Any]], + notifs: List[EmailPushAction], room_state_ids: StateMap[str], notif_events: Dict[str, EventBase], user_id: str, @@ -663,10 +697,10 @@ async def _make_summary_text_single_room( async def _make_summary_text( self, - notifs_by_room: Dict[str, List[Dict[str, Any]]], + notifs_by_room: Dict[str, List[EmailPushAction]], room_state_ids: Dict[str, StateMap[str]], notif_events: Dict[str, EventBase], - reason: Dict[str, Any], + reason: EmailReason, ) -> str: """ Make a summary text for the email when multiple rooms have notifications. @@ -696,7 +730,7 @@ async def _make_summary_text( async def _make_summary_text_from_member_events( self, room_id: str, - notifs: List[Dict[str, Any]], + notifs: List[EmailPushAction], room_state_ids: StateMap[str], notif_events: Dict[str, EventBase], ) -> str: @@ -774,8 +808,8 @@ def _make_room_link(self, room_id: str) -> str: Returns: A link to open a room in the web client. """ - if self.hs.config.email_riot_base_url: - base_url = "%s/#/room" % (self.hs.config.email_riot_base_url) + if self.hs.config.email.email_riot_base_url: + base_url = "%s/#/room" % (self.hs.config.email.email_riot_base_url) elif self.app_name == "Vector": # need /beta for Universal Links to work on iOS base_url = "https://vector.im/beta/#/room" @@ -783,7 +817,7 @@ def _make_room_link(self, room_id: str) -> str: base_url = "https://matrix.to/#" return "%s/%s" % (base_url, room_id) - def _make_notif_link(self, notif: Dict[str, str]) -> str: + def _make_notif_link(self, notif: EmailPushAction) -> str: """ Generate a link to open an event in the web client. @@ -793,9 +827,9 @@ def _make_notif_link(self, notif: Dict[str, str]) -> str: Returns: A link to open the notification in the web client. """ - if self.hs.config.email_riot_base_url: + if self.hs.config.email.email_riot_base_url: return "%s/#/room/%s/%s" % ( - self.hs.config.email_riot_base_url, + self.hs.config.email.email_riot_base_url, notif["room_id"], notif["event_id"], ) @@ -830,7 +864,7 @@ def _make_unsubscribe_link( # XXX: make r0 once API is stable return "%s_matrix/client/unstable/pushers/remove?%s" % ( - self.hs.config.public_baseurl, + self.hs.config.server.public_baseurl, urllib.parse.urlencode(params), ) @@ -870,7 +904,7 @@ def safe_text(raw_text: str) -> jinja2.Markup: A Markup object ready to safely use in a Jinja template. """ return jinja2.Markup( - bleach.linkify(bleach.clean(raw_text, tags=[], attributes={}, strip=False)) + bleach.linkify(bleach.clean(raw_text, tags=[], attributes=[], strip=False)) ) diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 7a8dc63976e2..7f68092ec5e5 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -18,7 +18,7 @@ from typing import Any, Dict, List, Optional, Pattern, Tuple, Union from synapse.events import EventBase -from synapse.types import UserID +from synapse.types import JsonDict, UserID from synapse.util import glob_to_regex, re_word_boundary from synapse.util.caches.lrucache import LruCache @@ -129,7 +129,7 @@ def __init__( self._value_cache = _flatten_dict(event) def matches( - self, condition: Dict[str, Any], user_id: str, display_name: str + self, condition: Dict[str, Any], user_id: str, display_name: Optional[str] ) -> bool: if condition["kind"] == "event_match": return self._event_match(condition, user_id) @@ -172,7 +172,7 @@ def _event_match(self, condition: dict, user_id: str) -> bool: return _glob_matches(pattern, haystack) - def _contains_display_name(self, display_name: str) -> bool: + def _contains_display_name(self, display_name: Optional[str]) -> bool: if not display_name: return False @@ -222,7 +222,7 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool: def _flatten_dict( - d: Union[EventBase, dict], + d: Union[EventBase, JsonDict], prefix: Optional[List[str]] = None, result: Optional[Dict[str, str]] = None, ) -> Dict[str, str]: @@ -233,7 +233,7 @@ def _flatten_dict( for key, value in d.items(): if isinstance(value, str): result[".".join(prefix + [key])] = value.lower() - elif hasattr(value, "items"): + elif isinstance(value, dict): _flatten_dict(value, prefix=(prefix + [key]), result=result) return result diff --git a/synapse/push/push_types.py b/synapse/push/push_types.py new file mode 100644 index 000000000000..8d16ab62cef6 --- /dev/null +++ b/synapse/push/push_types.py @@ -0,0 +1,136 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List, Optional + +from typing_extensions import TypedDict + + +class EmailReason(TypedDict, total=False): + """ + Information on the event that triggered the email to be sent + + room_id: the ID of the room the event was sent in + now: timestamp in ms when the email is being sent out + room_name: a human-readable name for the room the event was sent in + received_at: the time in milliseconds at which the event was received + delay_before_mail_ms: the amount of time in milliseconds Synapse always waits + before ever emailing about a notification (to give the user a chance to respond + to other push or notice the window) + last_sent_ts: the time in milliseconds at which a notification was last sent + for an event in this room + throttle_ms: the minimum amount of time in milliseconds between two + notifications can be sent for this room + """ + + room_id: str + now: int + room_name: Optional[str] + received_at: int + delay_before_mail_ms: int + last_sent_ts: int + throttle_ms: int + + +class MessageVars(TypedDict, total=False): + """ + Details about a specific message to include in a notification + + event_type: the type of the event + is_historical: a boolean, which is `False` if the message is the one + that triggered the notification, `True` otherwise + id: the ID of the event + ts: the time in milliseconds at which the event was sent + sender_name: the display name for the event's sender + sender_avatar_url: the avatar URL (as a `mxc://` URL) for the event's + sender + sender_hash: a hash of the user ID of the sender + msgtype: the type of the message + body_text_html: html representation of the message + body_text_plain: plaintext representation of the message + image_url: mxc url of an image, when "msgtype" is "m.image" + """ + + event_type: str + is_historical: bool + id: str + ts: int + sender_name: str + sender_avatar_url: Optional[str] + sender_hash: int + msgtype: Optional[str] + body_text_html: str + body_text_plain: str + image_url: str + + +class NotifVars(TypedDict): + """ + Details about an event we are about to include in a notification + + link: a `matrix.to` link to the event + ts: the time in milliseconds at which the event was received + messages: a list of messages containing one message before the event, the + message in the event, and one message after the event. + """ + + link: str + ts: Optional[int] + messages: List[MessageVars] + + +class RoomVars(TypedDict): + """ + Represents a room containing events to include in the email. + + title: a human-readable name for the room + hash: a hash of the ID of the room + invite: a boolean, which is `True` if the room is an invite the user hasn't + accepted yet, `False` otherwise + notifs: a list of events, or an empty list if `invite` is `True`. + link: a `matrix.to` link to the room + avator_url: url to the room's avator + """ + + title: Optional[str] + hash: int + invite: bool + notifs: List[NotifVars] + link: str + avatar_url: Optional[str] + + +class TemplateVars(TypedDict, total=False): + """ + Generic structure for passing to the email sender, can hold all the fields used in email templates. + + app_name: name of the app/service this homeserver is associated with + server_name: name of our own homeserver + link: a link to include into the email to be sent + user_display_name: the display name for the user receiving the notification + unsubscribe_link: the link users can click to unsubscribe from email notifications + summary_text: a summary of the notification(s). The text used can be customised + by configuring the various settings in the `email.subjects` section of the + configuration file. + rooms: a list of rooms containing events to include in the email + reason: information on the event that triggered the email to be sent + """ + + app_name: str + server_name: str + link: str + user_display_name: str + unsubscribe_link: str + summary_text: str + rooms: List[RoomVars] + reason: EmailReason diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py index 021275437c5a..b57e09409159 100644 --- a/synapse/push/pusher.py +++ b/synapse/push/pusher.py @@ -35,12 +35,12 @@ def __init__(self, hs: "HomeServer"): "http": HttpPusher } - logger.info("email enable notifs: %r", hs.config.email_enable_notifs) - if hs.config.email_enable_notifs: + logger.info("email enable notifs: %r", hs.config.email.email_enable_notifs) + if hs.config.email.email_enable_notifs: self.mailers: Dict[str, Mailer] = {} - self._notif_template_html = hs.config.email_notif_template_html - self._notif_template_text = hs.config.email_notif_template_text + self._notif_template_html = hs.config.email.email_notif_template_html + self._notif_template_text = hs.config.email.email_notif_template_text self.pusher_types["email"] = self._create_email_pusher @@ -77,4 +77,4 @@ def _app_name_from_pusherdict(self, pusher_config: PusherConfig) -> str: if isinstance(brand, str): return brand - return self.config.email_app_name + return self.config.email.email_app_name diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index a1436f39305d..26735447a6f1 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -62,7 +62,7 @@ def __init__(self, hs: "HomeServer"): self.clock = self.hs.get_clock() # We shard the handling of push notifications by user ID. - self._pusher_shard_config = hs.config.push.pusher_shard_config + self._pusher_shard_config = hs.config.worker.pusher_shard_config self._instance_name = hs.get_instance_name() self._should_start_pushers = ( self._instance_name in self._pusher_shard_config.instances diff --git a/tests/rest/client/v2_alpha/__init__.py b/synapse/py.typed similarity index 100% rename from tests/rest/client/v2_alpha/__init__.py rename to synapse/py.typed diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index cdcbdd772b14..13fb69460ea9 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -48,8 +48,10 @@ # [1] https://pip.pypa.io/en/stable/reference/pip_install/#requirement-specifiers. REQUIREMENTS = [ - "jsonschema>=2.5.1", - "frozendict>=1", + # we use the TYPE_CHECKER.redefine method added in jsonschema 3.0.0 + "jsonschema>=3.0.0", + # frozendict 2.1.2 is broken on Debian 10: https://github.com/Marco-Sulla/python-frozendict/issues/41 + "frozendict>=1,<2.1.2", "unpaddedbase64>=1.1.0", "canonicaljson>=1.4.0", # we use the type definitions added in signedjson 1.1. @@ -85,7 +87,7 @@ # We enforce that we have a `cryptography` version that bundles an `openssl` # with the latest security patches. "cryptography>=3.4.7", - "ijson>=3.0", + "ijson>=3.1", ] CONDITIONAL_REQUIREMENTS = { diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py index ba8114ac9e13..1457d9d59b1f 100644 --- a/synapse/replication/http/__init__.py +++ b/synapse/replication/http/__init__.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import TYPE_CHECKING + from synapse.http.server import JsonResource from synapse.replication.http import ( account_data, @@ -26,16 +28,19 @@ streams, ) +if TYPE_CHECKING: + from synapse.server import HomeServer + REPLICATION_PREFIX = "/_synapse/replication" class ReplicationRestResource(JsonResource): - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): # We enable extracting jaeger contexts here as these are internal APIs. super().__init__(hs, canonical_json=False, extract_context=True) self.register_servlets(hs) - def register_servlets(self, hs): + def register_servlets(self, hs: "HomeServer"): send_event.register_servlets(hs, self) federation.register_servlets(hs, self) presence.register_servlets(hs, self) diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 25589b0042a0..585332b244a4 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -17,7 +17,7 @@ import re import urllib from inspect import signature -from typing import TYPE_CHECKING, Dict, List, Tuple +from typing import TYPE_CHECKING, Any, Awaitable, Callable, Dict, List, Tuple from prometheus_client import Counter, Gauge @@ -156,7 +156,7 @@ async def _handle_request(self, request, **kwargs): pass @classmethod - def make_client(cls, hs): + def make_client(cls, hs: "HomeServer"): """Create a client that makes requests. Returns a callable that accepts the same parameters as @@ -168,8 +168,8 @@ def make_client(cls, hs): client = hs.get_simple_http_client() local_instance_name = hs.get_instance_name() - master_host = hs.config.worker_replication_host - master_port = hs.config.worker_replication_http_port + master_host = hs.config.worker.worker_replication_host + master_port = hs.config.worker.worker_replication_http_port instance_map = hs.config.worker.instance_map @@ -182,85 +182,89 @@ def make_client(cls, hs): ) @trace(opname="outgoing_replication_request") - @outgoing_gauge.track_inprogress() async def send_request(*, instance_name="master", **kwargs): - if instance_name == local_instance_name: - raise Exception("Trying to send HTTP request to self") - if instance_name == "master": - host = master_host - port = master_port - elif instance_name in instance_map: - host = instance_map[instance_name].host - port = instance_map[instance_name].port - else: - raise Exception( - "Instance %r not in 'instance_map' config" % (instance_name,) + with outgoing_gauge.track_inprogress(): + if instance_name == local_instance_name: + raise Exception("Trying to send HTTP request to self") + if instance_name == "master": + host = master_host + port = master_port + elif instance_name in instance_map: + host = instance_map[instance_name].host + port = instance_map[instance_name].port + else: + raise Exception( + "Instance %r not in 'instance_map' config" % (instance_name,) + ) + + data = await cls._serialize_payload(**kwargs) + + url_args = [ + urllib.parse.quote(kwargs[name], safe="") for name in cls.PATH_ARGS + ] + + if cls.CACHE: + txn_id = random_string(10) + url_args.append(txn_id) + + if cls.METHOD == "POST": + request_func: Callable[ + ..., Awaitable[Any] + ] = client.post_json_get_json + elif cls.METHOD == "PUT": + request_func = client.put_json + elif cls.METHOD == "GET": + request_func = client.get_json + else: + # We have already asserted in the constructor that a + # compatible was picked, but lets be paranoid. + raise Exception( + "Unknown METHOD on %s replication endpoint" % (cls.NAME,) + ) + + uri = "http://%s:%s/_synapse/replication/%s/%s" % ( + host, + port, + cls.NAME, + "/".join(url_args), ) - data = await cls._serialize_payload(**kwargs) - - url_args = [ - urllib.parse.quote(kwargs[name], safe="") for name in cls.PATH_ARGS - ] - - if cls.CACHE: - txn_id = random_string(10) - url_args.append(txn_id) - - if cls.METHOD == "POST": - request_func = client.post_json_get_json - elif cls.METHOD == "PUT": - request_func = client.put_json - elif cls.METHOD == "GET": - request_func = client.get_json - else: - # We have already asserted in the constructor that a - # compatible was picked, but lets be paranoid. - raise Exception( - "Unknown METHOD on %s replication endpoint" % (cls.NAME,) - ) - - uri = "http://%s:%s/_synapse/replication/%s/%s" % ( - host, - port, - cls.NAME, - "/".join(url_args), - ) - - try: - # We keep retrying the same request for timeouts. This is so that we - # have a good idea that the request has either succeeded or failed on - # the master, and so whether we should clean up or not. - while True: - headers: Dict[bytes, List[bytes]] = {} - # Add an authorization header, if configured. - if replication_secret: - headers[b"Authorization"] = [b"Bearer " + replication_secret] - opentracing.inject_header_dict(headers, check_destination=False) - try: - result = await request_func(uri, data, headers=headers) - break - except RequestTimedOutError: - if not cls.RETRY_ON_TIMEOUT: - raise - - logger.warning("%s request timed out; retrying", cls.NAME) - - # If we timed out we probably don't need to worry about backing - # off too much, but lets just wait a little anyway. - await clock.sleep(1) - except HttpResponseException as e: - # We convert to SynapseError as we know that it was a SynapseError - # on the main process that we should send to the client. (And - # importantly, not stack traces everywhere) - _outgoing_request_counter.labels(cls.NAME, e.code).inc() - raise e.to_synapse_error() - except Exception as e: - _outgoing_request_counter.labels(cls.NAME, "ERR").inc() - raise SynapseError(502, "Failed to talk to main process") from e - - _outgoing_request_counter.labels(cls.NAME, 200).inc() - return result + try: + # We keep retrying the same request for timeouts. This is so that we + # have a good idea that the request has either succeeded or failed + # on the master, and so whether we should clean up or not. + while True: + headers: Dict[bytes, List[bytes]] = {} + # Add an authorization header, if configured. + if replication_secret: + headers[b"Authorization"] = [ + b"Bearer " + replication_secret + ] + opentracing.inject_header_dict(headers, check_destination=False) + try: + result = await request_func(uri, data, headers=headers) + break + except RequestTimedOutError: + if not cls.RETRY_ON_TIMEOUT: + raise + + logger.warning("%s request timed out; retrying", cls.NAME) + + # If we timed out we probably don't need to worry about backing + # off too much, but lets just wait a little anyway. + await clock.sleep(1) + except HttpResponseException as e: + # We convert to SynapseError as we know that it was a SynapseError + # on the main process that we should send to the client. (And + # importantly, not stack traces everywhere) + _outgoing_request_counter.labels(cls.NAME, e.code).inc() + raise e.to_synapse_error() + except Exception as e: + _outgoing_request_counter.labels(cls.NAME, "ERR").inc() + raise SynapseError(502, "Failed to talk to main process") from e + + _outgoing_request_counter.labels(cls.NAME, 200).inc() + return result return send_request diff --git a/synapse/replication/http/account_data.py b/synapse/replication/http/account_data.py index 70e951af6376..5f0f225aa953 100644 --- a/synapse/replication/http/account_data.py +++ b/synapse/replication/http/account_data.py @@ -13,10 +13,14 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING from synapse.http.servlet import parse_json_object_from_request from synapse.replication.http._base import ReplicationEndpoint +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -37,7 +41,7 @@ class ReplicationUserAccountDataRestServlet(ReplicationEndpoint): PATH_ARGS = ("user_id", "account_data_type") CACHE = False - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.handler = hs.get_account_data_handler() @@ -78,7 +82,7 @@ class ReplicationRoomAccountDataRestServlet(ReplicationEndpoint): PATH_ARGS = ("user_id", "room_id", "account_data_type") CACHE = False - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.handler = hs.get_account_data_handler() @@ -119,7 +123,7 @@ class ReplicationAddTagRestServlet(ReplicationEndpoint): PATH_ARGS = ("user_id", "room_id", "tag") CACHE = False - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.handler = hs.get_account_data_handler() @@ -162,7 +166,7 @@ class ReplicationRemoveTagRestServlet(ReplicationEndpoint): ) CACHE = False - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.handler = hs.get_account_data_handler() @@ -183,7 +187,7 @@ async def _handle_request(self, request, user_id, room_id, tag): return 200, {"max_stream_id": max_stream_id} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server): ReplicationUserAccountDataRestServlet(hs).register(http_server) ReplicationRoomAccountDataRestServlet(hs).register(http_server) ReplicationAddTagRestServlet(hs).register(http_server) diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py index 5a5818ef61e2..42dffb39cbef 100644 --- a/synapse/replication/http/devices.py +++ b/synapse/replication/http/devices.py @@ -13,9 +13,13 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING from synapse.replication.http._base import ReplicationEndpoint +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -51,7 +55,7 @@ class ReplicationUserDevicesResyncRestServlet(ReplicationEndpoint): PATH_ARGS = ("user_id",) CACHE = False - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.device_list_updater = hs.get_device_handler().device_list_updater @@ -68,5 +72,5 @@ async def _handle_request(self, request, user_id): return 200, user_devices -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server): ReplicationUserDevicesResyncRestServlet(hs).register(http_server) diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index 79cadb7b574c..5ed535c90dea 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.events import make_event_from_dict @@ -21,6 +22,9 @@ from synapse.replication.http._base import ReplicationEndpoint from synapse.util.metrics import Measure +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -56,13 +60,13 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): NAME = "fed_send_events" PATH_ARGS = () - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.store = hs.get_datastore() self.storage = hs.get_storage() self.clock = hs.get_clock() - self.federation_handler = hs.get_federation_handler() + self.federation_event_handler = hs.get_federation_event_handler() @staticmethod async def _serialize_payload(store, room_id, event_and_contexts, backfilled): @@ -127,7 +131,7 @@ async def _handle_request(self, request): logger.info("Got %d events from federation", len(event_and_contexts)) - max_stream_id = await self.federation_handler.persist_events_and_notify( + max_stream_id = await self.federation_event_handler.persist_events_and_notify( room_id, event_and_contexts, backfilled ) @@ -151,7 +155,7 @@ class ReplicationFederationSendEduRestServlet(ReplicationEndpoint): NAME = "fed_send_edu" PATH_ARGS = ("edu_type",) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.store = hs.get_datastore() @@ -194,7 +198,7 @@ class ReplicationGetQueryRestServlet(ReplicationEndpoint): # This is a query, so let's not bother caching CACHE = False - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.store = hs.get_datastore() @@ -238,7 +242,7 @@ class ReplicationCleanRoomRestServlet(ReplicationEndpoint): NAME = "fed_cleanup_room" PATH_ARGS = ("room_id",) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.store = hs.get_datastore() @@ -273,7 +277,7 @@ class ReplicationStoreRoomOnOutlierMembershipRestServlet(ReplicationEndpoint): NAME = "store_room_on_outlier_membership" PATH_ARGS = ("room_id",) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.store = hs.get_datastore() @@ -289,7 +293,7 @@ async def _handle_request(self, request, room_id): return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server): ReplicationFederationSendEventsRestServlet(hs).register(http_server) ReplicationFederationSendEduRestServlet(hs).register(http_server) ReplicationGetQueryRestServlet(hs).register(http_server) diff --git a/synapse/replication/http/login.py b/synapse/replication/http/login.py index 550bd5c95f8d..daacc34ceac4 100644 --- a/synapse/replication/http/login.py +++ b/synapse/replication/http/login.py @@ -13,10 +13,14 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING from synapse.http.servlet import parse_json_object_from_request from synapse.replication.http._base import ReplicationEndpoint +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -30,7 +34,7 @@ class RegisterDeviceReplicationServlet(ReplicationEndpoint): NAME = "device_check_registered" PATH_ARGS = ("user_id",) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.registration_handler = hs.get_registration_handler() @@ -42,6 +46,8 @@ async def _serialize_payload( is_guest, is_appservice_ghost, should_issue_refresh_token, + auth_provider_id, + auth_provider_session_id, ): """ Args: @@ -59,6 +65,8 @@ async def _serialize_payload( "is_guest": is_guest, "is_appservice_ghost": is_appservice_ghost, "should_issue_refresh_token": should_issue_refresh_token, + "auth_provider_id": auth_provider_id, + "auth_provider_session_id": auth_provider_session_id, } async def _handle_request(self, request, user_id): @@ -69,6 +77,8 @@ async def _handle_request(self, request, user_id): is_guest = content["is_guest"] is_appservice_ghost = content["is_appservice_ghost"] should_issue_refresh_token = content["should_issue_refresh_token"] + auth_provider_id = content["auth_provider_id"] + auth_provider_session_id = content["auth_provider_session_id"] res = await self.registration_handler.register_device_inner( user_id, @@ -77,10 +87,12 @@ async def _handle_request(self, request, user_id): is_guest, is_appservice_ghost=is_appservice_ghost, should_issue_refresh_token=should_issue_refresh_token, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, ) return 200, res -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server): RegisterDeviceReplicationServlet(hs).register(http_server) diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py index 34206c506066..7371c240b274 100644 --- a/synapse/replication/http/membership.py +++ b/synapse/replication/http/membership.py @@ -45,7 +45,7 @@ class ReplicationRemoteJoinRestServlet(ReplicationEndpoint): NAME = "remote_join" PATH_ARGS = ("room_id", "user_id") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.federation_handler = hs.get_federation_handler() @@ -320,7 +320,7 @@ class ReplicationUserJoinedLeftRoomRestServlet(ReplicationEndpoint): PATH_ARGS = ("room_id", "user_id", "change") CACHE = False # No point caching as should return instantly. - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.registeration_handler = hs.get_registration_handler() @@ -360,7 +360,7 @@ async def _handle_request( # type: ignore return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server): ReplicationRemoteJoinRestServlet(hs).register(http_server) ReplicationRemoteRejectInviteRestServlet(hs).register(http_server) ReplicationUserJoinedLeftRoomRestServlet(hs).register(http_server) diff --git a/synapse/replication/http/presence.py b/synapse/replication/http/presence.py index bb0024795349..63143085d521 100644 --- a/synapse/replication/http/presence.py +++ b/synapse/replication/http/presence.py @@ -117,6 +117,6 @@ async def _handle_request(self, request, user_id): ) -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server): ReplicationBumpPresenceActiveTime(hs).register(http_server) ReplicationPresenceSetState(hs).register(http_server) diff --git a/synapse/replication/http/push.py b/synapse/replication/http/push.py index 139427cb1f29..6c8db3061ee2 100644 --- a/synapse/replication/http/push.py +++ b/synapse/replication/http/push.py @@ -67,5 +67,5 @@ async def _handle_request(self, request, user_id): return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server): ReplicationRemovePusherRestServlet(hs).register(http_server) diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py index d6dd7242eb20..7adfbb666f39 100644 --- a/synapse/replication/http/register.py +++ b/synapse/replication/http/register.py @@ -13,10 +13,14 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING from synapse.http.servlet import parse_json_object_from_request from synapse.replication.http._base import ReplicationEndpoint +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -26,7 +30,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): NAME = "register_user" PATH_ARGS = ("user_id",) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.store = hs.get_datastore() self.registration_handler = hs.get_registration_handler() @@ -100,7 +104,7 @@ class ReplicationPostRegisterActionsServlet(ReplicationEndpoint): NAME = "post_register" PATH_ARGS = ("user_id",) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.store = hs.get_datastore() self.registration_handler = hs.get_registration_handler() @@ -130,6 +134,6 @@ async def _handle_request(self, request, user_id): return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server): ReplicationRegisterServlet(hs).register(http_server) ReplicationPostRegisterActionsServlet(hs).register(http_server) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index fae5ffa451d3..9f6851d0592e 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.events import make_event_from_dict @@ -22,6 +23,9 @@ from synapse.types import Requester, UserID from synapse.util.metrics import Measure +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -57,7 +61,7 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): NAME = "send_event" PATH_ARGS = ("event_id",) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.event_creation_handler = hs.get_event_creation_handler() @@ -135,5 +139,5 @@ async def _handle_request(self, request, event_id): ) -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server): ReplicationSendEventRestServlet(hs).register(http_server) diff --git a/synapse/replication/http/streams.py b/synapse/replication/http/streams.py index 9afa147d00c1..3223bc2432b2 100644 --- a/synapse/replication/http/streams.py +++ b/synapse/replication/http/streams.py @@ -13,11 +13,15 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING from synapse.api.errors import SynapseError from synapse.http.servlet import parse_integer from synapse.replication.http._base import ReplicationEndpoint +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -46,7 +50,7 @@ class ReplicationGetStreamUpdates(ReplicationEndpoint): PATH_ARGS = ("stream_name",) METHOD = "GET" - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self._instance_name = hs.get_instance_name() @@ -74,5 +78,5 @@ async def _handle_request(self, request, stream_name): ) -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server): ReplicationGetStreamUpdates(hs).register(http_server) diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py index e460dd85cd83..7ecb446e7c78 100644 --- a/synapse/replication/slave/storage/_base.py +++ b/synapse/replication/slave/storage/_base.py @@ -13,18 +13,21 @@ # limitations under the License. import logging -from typing import Optional +from typing import TYPE_CHECKING, Optional from synapse.storage.database import DatabasePool from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.engines import PostgresEngine from synapse.storage.util.id_generators import MultiWriterIdGenerator +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class BaseSlavedStore(CacheInvalidationWorkerStore): - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): super().__init__(database, db_conn, hs) if isinstance(self.database_engine, PostgresEngine): self._cache_id_gen: Optional[ diff --git a/synapse/replication/slave/storage/_slaved_id_tracker.py b/synapse/replication/slave/storage/_slaved_id_tracker.py index 2cb7489047f7..fa132d10b414 100644 --- a/synapse/replication/slave/storage/_slaved_id_tracker.py +++ b/synapse/replication/slave/storage/_slaved_id_tracker.py @@ -13,14 +13,22 @@ # limitations under the License. from typing import List, Optional, Tuple -from synapse.storage.types import Connection -from synapse.storage.util.id_generators import _load_current_id +from synapse.storage.database import LoggingDatabaseConnection +from synapse.storage.util.id_generators import AbstractStreamIdTracker, _load_current_id -class SlavedIdTracker: +class SlavedIdTracker(AbstractStreamIdTracker): + """Tracks the "current" stream ID of a stream with a single writer. + + See `AbstractStreamIdTracker` for more details. + + Note that this class does not work correctly when there are multiple + writers. + """ + def __init__( self, - db_conn: Connection, + db_conn: LoggingDatabaseConnection, table: str, column: str, extra_tables: Optional[List[Tuple[str, str]]] = None, @@ -36,17 +44,7 @@ def advance(self, instance_name: Optional[str], new_id: int): self._current = (max if self.step > 0 else min)(self._current, new_id) def get_current_token(self) -> int: - """ - - Returns: - int - """ return self._current def get_current_token_for_writer(self, instance_name: str) -> int: - """Returns the position of the given writer. - - For streams with single writers this is equivalent to - `get_current_token`. - """ return self.get_current_token() diff --git a/synapse/replication/slave/storage/client_ips.py b/synapse/replication/slave/storage/client_ips.py index 436d39c3203f..61cd7e522800 100644 --- a/synapse/replication/slave/storage/client_ips.py +++ b/synapse/replication/slave/storage/client_ips.py @@ -12,15 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import TYPE_CHECKING + from synapse.storage.database import DatabasePool from synapse.storage.databases.main.client_ips import LAST_SEEN_GRANULARITY from synapse.util.caches.lrucache import LruCache from ._base import BaseSlavedStore +if TYPE_CHECKING: + from synapse.server import HomeServer + class SlavedClientIpStore(BaseSlavedStore): - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): super().__init__(database, db_conn, hs) self.client_ip_last_seen: LruCache[tuple, int] = LruCache( diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py index 26bdead5651e..0a582960896d 100644 --- a/synapse/replication/slave/storage/devices.py +++ b/synapse/replication/slave/storage/devices.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import TYPE_CHECKING + from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.replication.tcp.streams._base import DeviceListsStream, UserSignatureStream @@ -20,9 +22,12 @@ from synapse.storage.databases.main.end_to_end_keys import EndToEndKeyWorkerStore from synapse.util.caches.stream_change_cache import StreamChangeCache +if TYPE_CHECKING: + from synapse.server import HomeServer + class SlavedDeviceStore(EndToEndKeyWorkerStore, DeviceWorkerStore, BaseSlavedStore): - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): super().__init__(database, db_conn, hs) self.hs = hs diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index d4d3f8c44876..63ed50caa5eb 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from typing import TYPE_CHECKING from synapse.storage.database import DatabasePool from synapse.storage.databases.main.event_federation import EventFederationWorkerStore @@ -30,6 +31,9 @@ from ._base import BaseSlavedStore +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -54,7 +58,7 @@ class SlavedEventStore( RelationsWorkerStore, BaseSlavedStore, ): - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): super().__init__(database, db_conn, hs) events_max = self._stream_id_gen.get_current_token() diff --git a/synapse/replication/slave/storage/filtering.py b/synapse/replication/slave/storage/filtering.py index 37875bc9730f..90284c202d55 100644 --- a/synapse/replication/slave/storage/filtering.py +++ b/synapse/replication/slave/storage/filtering.py @@ -12,14 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import TYPE_CHECKING + from synapse.storage.database import DatabasePool from synapse.storage.databases.main.filtering import FilteringStore from ._base import BaseSlavedStore +if TYPE_CHECKING: + from synapse.server import HomeServer + class SlavedFilteringStore(BaseSlavedStore): - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): super().__init__(database, db_conn, hs) # Filters are immutable so this cache doesn't need to be expired diff --git a/synapse/replication/slave/storage/groups.py b/synapse/replication/slave/storage/groups.py index e9bdc3847006..497e16c69e6a 100644 --- a/synapse/replication/slave/storage/groups.py +++ b/synapse/replication/slave/storage/groups.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import TYPE_CHECKING + from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.replication.tcp.streams import GroupServerStream @@ -19,9 +21,12 @@ from synapse.storage.databases.main.group_server import GroupServerWorkerStore from synapse.util.caches.stream_change_cache import StreamChangeCache +if TYPE_CHECKING: + from synapse.server import HomeServer + class SlavedGroupServerStore(GroupServerWorkerStore, BaseSlavedStore): - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): super().__init__(database, db_conn, hs) self.hs = hs diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index 4d5f86286213..7541e21de9dd 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.replication.tcp.streams import PushRulesStream from synapse.storage.databases.main.push_rule import PushRulesWorkerStore @@ -25,9 +24,6 @@ def get_max_push_rules_stream_id(self): return self._push_rules_stream_id_gen.get_current_token() def process_replication_rows(self, stream_name, instance_name, token, rows): - # We assert this for the benefit of mypy - assert isinstance(self._push_rules_stream_id_gen, SlavedIdTracker) - if stream_name == PushRulesStream.NAME: self._push_rules_stream_id_gen.advance(instance_name, token) for row in rows: diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py index 2672a2c94b15..cea90c0f1bf4 100644 --- a/synapse/replication/slave/storage/pushers.py +++ b/synapse/replication/slave/storage/pushers.py @@ -15,9 +15,8 @@ from typing import TYPE_CHECKING from synapse.replication.tcp.streams import PushersStream -from synapse.storage.database import DatabasePool +from synapse.storage.database import DatabasePool, LoggingDatabaseConnection from synapse.storage.databases.main.pusher import PusherWorkerStore -from synapse.storage.types import Connection from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker @@ -27,7 +26,12 @@ class SlavedPusherStore(PusherWorkerStore, BaseSlavedStore): - def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): super().__init__(database, db_conn, hs) self._pushers_id_gen = SlavedIdTracker( # type: ignore db_conn, "pushers", "id", extra_tables=[("deleted_pushers", "stream_id")] diff --git a/synapse/replication/slave/storage/room.py b/synapse/replication/slave/storage/room.py deleted file mode 100644 index 8cc6de3f4698..000000000000 --- a/synapse/replication/slave/storage/room.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2015, 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.replication.tcp.streams import PublicRoomsStream -from synapse.storage.database import DatabasePool -from synapse.storage.databases.main.room import RoomWorkerStore - -from ._base import BaseSlavedStore -from ._slaved_id_tracker import SlavedIdTracker - - -class RoomStore(RoomWorkerStore, BaseSlavedStore): - def __init__(self, database: DatabasePool, db_conn, hs): - super().__init__(database, db_conn, hs) - self._public_room_id_gen = SlavedIdTracker( - db_conn, "public_room_list_stream", "stream_id" - ) - - def get_current_public_room_stream_id(self): - return self._public_room_id_gen.get_current_token() - - def process_replication_rows(self, stream_name, instance_name, token, rows): - if stream_name == PublicRoomsStream.NAME: - self._public_room_id_gen.advance(instance_name, token) - - return super().process_replication_rows(stream_name, instance_name, token, rows) diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 3fd281171308..e29ae1e375af 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -73,7 +73,7 @@ def __init__( ): self.client_name = client_name self.command_handler = command_handler - self.server_name = hs.config.server_name + self.server_name = hs.config.server.server_name self.hs = hs self._clock = hs.get_clock() # As self.clock is defined in super class @@ -117,7 +117,7 @@ def __init__(self, hs: "HomeServer"): self._instance_name = hs.get_instance_name() self._typing_handler = hs.get_typing_handler() - self._notify_pushers = hs.config.start_pushers + self._notify_pushers = hs.config.worker.start_pushers self._pusher_pool = hs.get_pusherpool() self._presence_handler = hs.get_presence_handler() @@ -207,11 +207,12 @@ async def on_rdata( max_token = self.store.get_room_max_token() event_pos = PersistedEventPosition(instance_name, token) - self.notifier.on_new_room_event_args( + await self.notifier.on_new_room_event_args( event_pos=event_pos, max_room_stream_token=max_token, extra_users=extra_users, room_id=row.data.room_id, + event_id=row.data.event_id, event_type=row.data.type, state_key=row.data.state_key, membership=row.data.membership, diff --git a/synapse/replication/tcp/external_cache.py b/synapse/replication/tcp/external_cache.py index b402f82810fa..aaf91e5e0253 100644 --- a/synapse/replication/tcp/external_cache.py +++ b/synapse/replication/tcp/external_cache.py @@ -21,6 +21,8 @@ from synapse.util import json_decoder, json_encoder if TYPE_CHECKING: + from txredisapi import RedisProtocol + from synapse.server import HomeServer set_counter = Counter( @@ -59,7 +61,12 @@ class ExternalCache: """ def __init__(self, hs: "HomeServer"): - self._redis_connection = hs.get_outbound_redis_connection() + if hs.config.redis.redis_enabled: + self._redis_connection: Optional[ + "RedisProtocol" + ] = hs.get_outbound_redis_connection() + else: + self._redis_connection = None def _get_redis_key(self, cache_name: str, key: str) -> str: return "cache_v1:%s:%s" % (cache_name, key) diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index eae4515363f2..21293038ef84 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -138,7 +138,7 @@ def __init__(self, hs: "HomeServer"): if isinstance(stream, TypingStream): # Only add TypingStream as a source on the instance in charge of # typing. - if hs.config.worker.writers.typing == hs.get_instance_name(): + if hs.get_instance_name() in hs.config.worker.writers.typing: self._streams_to_replicate.append(stream) continue @@ -168,10 +168,13 @@ def __init__(self, hs: "HomeServer"): continue # Only add any other streams if we're on master. - if hs.config.worker_app is not None: + if hs.config.worker.worker_app is not None: continue - if stream.NAME == FederationStream.NAME and hs.config.send_federation: + if ( + stream.NAME == FederationStream.NAME + and hs.config.worker.send_federation + ): # We only support federation stream if federation sending # has been disabled on the master. continue @@ -222,10 +225,10 @@ def __init__(self, hs: "HomeServer"): }, ) - self._is_master = hs.config.worker_app is None + self._is_master = hs.config.worker.worker_app is None self._federation_sender = None - if self._is_master and not hs.config.send_federation: + if self._is_master and not hs.config.worker.send_federation: self._federation_sender = hs.get_federation_sender() self._server_notices_sender = None @@ -291,7 +294,7 @@ async def _process_command( # This shouldn't be possible raise Exception("Unrecognised command %s in stream queue", cmd.NAME) - def start_replication(self, hs): + def start_replication(self, hs: "HomeServer"): """Helper method to start a replication connection to the remote server using TCP. """ @@ -315,16 +318,24 @@ def start_replication(self, hs): hs, outbound_redis_connection ) hs.get_reactor().connectTCP( - hs.config.redis.redis_host.encode(), + hs.config.redis.redis_host, # type: ignore[arg-type] hs.config.redis.redis_port, self._factory, + timeout=30, + bindAddress=None, ) else: client_name = hs.get_instance_name() self._factory = DirectTcpReplicationClientFactory(hs, client_name, self) - host = hs.config.worker_replication_host - port = hs.config.worker_replication_port - hs.get_reactor().connectTCP(host.encode(), port, self._factory) + host = hs.config.worker.worker_replication_host + port = hs.config.worker.worker_replication_port + hs.get_reactor().connectTCP( + host, # type: ignore[arg-type] + port, + self._factory, + timeout=30, + bindAddress=None, + ) def get_streams(self) -> Dict[str, Stream]: """Get a map from stream name to all streams.""" diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 8c80153ab6e0..7bae36db169b 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -182,9 +182,13 @@ def __init__(self, clock: Clock, handler: "ReplicationCommandHandler"): # a logcontext which we use for processing incoming commands. We declare it as a # background process so that the CPU stats get reported to prometheus. - self._logging_context = BackgroundProcessLoggingContext( - "replication-conn", self.conn_id - ) + with PreserveLoggingContext(): + # thanks to `PreserveLoggingContext()`, the new logcontext is guaranteed to + # capture the sentinel context as its containing context and won't prevent + # GC of / unintentionally reactivate what would be the current context. + self._logging_context = BackgroundProcessLoggingContext( + "replication-conn", self.conn_id + ) def connectionMade(self): logger.info("[%s] Connection established", self.id()) @@ -434,8 +438,12 @@ def on_connection_closed(self): if self.transport: self.transport.unregisterProducer() - # mark the logging context as finished - self._logging_context.__exit__(None, None, None) + # mark the logging context as finished by triggering `__exit__()` + with PreserveLoggingContext(): + with self._logging_context: + pass + # the sentinel context is now active, which may not be correct. + # PreserveLoggingContext() will restore the correct logging context. def __str__(self): addr = None diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py index 8c0df627c8d0..8d28bd3f3fcc 100644 --- a/synapse/replication/tcp/redis.py +++ b/synapse/replication/tcp/redis.py @@ -100,9 +100,13 @@ def __init__(self, *args, **kwargs): # a logcontext which we use for processing incoming commands. We declare it as a # background process so that the CPU stats get reported to prometheus. - self._logging_context = BackgroundProcessLoggingContext( - "replication_command_handler" - ) + with PreserveLoggingContext(): + # thanks to `PreserveLoggingContext()`, the new logcontext is guaranteed to + # capture the sentinel context as its containing context and won't prevent + # GC of / unintentionally reactivate what would be the current context. + self._logging_context = BackgroundProcessLoggingContext( + "replication_command_handler" + ) def connectionMade(self): logger.info("Connected to redis") @@ -182,8 +186,12 @@ def connectionLost(self, reason): super().connectionLost(reason) self.synapse_handler.lost_connection(self) - # mark the logging context as finished - self._logging_context.__exit__(None, None, None) + # mark the logging context as finished by triggering `__exit__()` + with PreserveLoggingContext(): + with self._logging_context: + pass + # the sentinel context is now active, which may not be correct. + # PreserveLoggingContext() will restore the correct logging context. def send_command(self, cmd: Command): """Send a command if connection has been established. @@ -364,6 +372,12 @@ def lazyConnection( factory.continueTrying = reconnect reactor = hs.get_reactor() - reactor.connectTCP(host.encode(), port, factory, timeout=30, bindAddress=None) + reactor.connectTCP( + host, # type: ignore[arg-type] + port, + factory, + timeout=30, + bindAddress=None, + ) return factory.handler diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index bd47d8425814..a9d85f4f6cd0 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -16,10 +16,11 @@ import logging import random +from typing import TYPE_CHECKING from prometheus_client import Counter -from twisted.internet.protocol import Factory +from twisted.internet.protocol import ServerFactory from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.tcp.commands import PositionCommand @@ -27,6 +28,9 @@ from synapse.replication.tcp.streams import EventsStream from synapse.util.metrics import Measure +if TYPE_CHECKING: + from synapse.server import HomeServer + stream_updates_counter = Counter( "synapse_replication_tcp_resource_stream_updates", "", ["stream_name"] ) @@ -34,13 +38,13 @@ logger = logging.getLogger(__name__) -class ReplicationStreamProtocolFactory(Factory): +class ReplicationStreamProtocolFactory(ServerFactory): """Factory for new replication connections.""" - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self.command_handler = hs.get_tcp_replication() self.clock = hs.get_clock() - self.server_name = hs.config.server_name + self.server_name = hs.config.server.server_name # If we've created a `ReplicationStreamProtocolFactory` then we're # almost certainly registering a replication listener, so let's ensure @@ -65,13 +69,13 @@ class ReplicationStreamer: data is available it will propagate to all connected clients. """ - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() self.clock = hs.get_clock() self.notifier = hs.get_notifier() self._instance_name = hs.get_instance_name() - self._replication_torture_level = hs.config.replication_torture_level + self._replication_torture_level = hs.config.server.replication_torture_level self.notifier.add_replication_callback(self.on_notifier_poke) diff --git a/synapse/replication/tcp/streams/__init__.py b/synapse/replication/tcp/streams/__init__.py index 4c0023c68aee..f41eabd85e58 100644 --- a/synapse/replication/tcp/streams/__init__.py +++ b/synapse/replication/tcp/streams/__init__.py @@ -32,7 +32,6 @@ GroupServerStream, PresenceFederationStream, PresenceStream, - PublicRoomsStream, PushersStream, PushRulesStream, ReceiptsStream, @@ -57,7 +56,6 @@ PushRulesStream, PushersStream, CachesStream, - PublicRoomsStream, DeviceListsStream, ToDeviceStream, FederationStream, @@ -79,7 +77,6 @@ "PushRulesStream", "PushersStream", "CachesStream", - "PublicRoomsStream", "DeviceListsStream", "ToDeviceStream", "TagAccountDataStream", diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 3716c41bea7b..743a01da08f0 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -241,7 +241,7 @@ class BackfillStream(Stream): NAME = "backfill" ROW_TYPE = BackfillStreamRow - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() super().__init__( hs.get_instance_name(), @@ -328,8 +328,7 @@ class TypingStream(Stream): ROW_TYPE = TypingStreamRow def __init__(self, hs: "HomeServer"): - writer_instance = hs.config.worker.writers.typing - if writer_instance == hs.get_instance_name(): + if hs.get_instance_name() in hs.config.worker.writers.typing: # On the writer, query the typing handler typing_writer_handler = hs.get_typing_writer_handler() update_function: Callable[ @@ -363,7 +362,7 @@ class ReceiptsStream(Stream): NAME = "receipts" ROW_TYPE = ReceiptsStreamRow - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): store = hs.get_datastore() super().__init__( hs.get_instance_name(), @@ -380,7 +379,7 @@ class PushRulesStream(Stream): NAME = "push_rules" ROW_TYPE = PushRulesStreamRow - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() super().__init__( @@ -405,7 +404,7 @@ class PushersStream(Stream): NAME = "pushers" ROW_TYPE = PushersStreamRow - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): store = hs.get_datastore() super().__init__( @@ -438,7 +437,7 @@ class CachesStreamRow: NAME = "caches" ROW_TYPE = CachesStreamRow - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): store = hs.get_datastore() super().__init__( hs.get_instance_name(), @@ -447,31 +446,6 @@ def __init__(self, hs): ) -class PublicRoomsStream(Stream): - """The public rooms list changed""" - - PublicRoomsStreamRow = namedtuple( - "PublicRoomsStreamRow", - ( - "room_id", # str - "visibility", # str - "appservice_id", # str, optional - "network_id", # str, optional - ), - ) - - NAME = "public_rooms" - ROW_TYPE = PublicRoomsStreamRow - - def __init__(self, hs): - store = hs.get_datastore() - super().__init__( - hs.get_instance_name(), - current_token_without_instance(store.get_current_public_room_stream_id), - store.get_all_new_public_rooms, - ) - - class DeviceListsStream(Stream): """Either a user has updated their devices or a remote server needs to be told about a device update. @@ -484,7 +458,7 @@ class DeviceListsStreamRow: NAME = "device_lists" ROW_TYPE = DeviceListsStreamRow - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): store = hs.get_datastore() super().__init__( hs.get_instance_name(), @@ -501,7 +475,7 @@ class ToDeviceStream(Stream): NAME = "to_device" ROW_TYPE = ToDeviceStreamRow - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): store = hs.get_datastore() super().__init__( hs.get_instance_name(), @@ -520,7 +494,7 @@ class TagAccountDataStream(Stream): NAME = "tag_account_data" ROW_TYPE = TagAccountDataStreamRow - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): store = hs.get_datastore() super().__init__( hs.get_instance_name(), @@ -607,7 +581,7 @@ class GroupServerStream(Stream): NAME = "groups" ROW_TYPE = GroupsStreamRow - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): store = hs.get_datastore() super().__init__( hs.get_instance_name(), @@ -624,7 +598,7 @@ class UserSignatureStream(Stream): NAME = "user_signature" ROW_TYPE = UserSignatureStreamRow - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): store = hs.get_datastore() super().__init__( hs.get_instance_name(), diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py index a030e9299edc..a390cfcb74d5 100644 --- a/synapse/replication/tcp/streams/events.py +++ b/synapse/replication/tcp/streams/events.py @@ -14,7 +14,7 @@ # limitations under the License. import heapq from collections.abc import Iterable -from typing import TYPE_CHECKING, List, Optional, Tuple, Type +from typing import TYPE_CHECKING, Optional, Tuple, Type import attr @@ -157,7 +157,7 @@ async def _update_function( # now we fetch up to that many rows from the events table - event_rows: List[Tuple] = await self._store.get_all_new_forward_event_rows( + event_rows = await self._store.get_all_new_forward_event_rows( instance_name, from_token, current_token, target_row_count ) @@ -191,7 +191,7 @@ async def _update_function( # finally, fetch the ex-outliers rows. We assume there are few enough of these # not to bother with the limit. - ex_outliers_rows: List[Tuple] = await self._store.get_ex_outlier_stream_rows( + ex_outliers_rows = await self._store.get_ex_outlier_stream_rows( instance_name, from_token, upper_limit ) diff --git a/synapse/replication/tcp/streams/federation.py b/synapse/replication/tcp/streams/federation.py index c445af9bd986..0600cdbf363d 100644 --- a/synapse/replication/tcp/streams/federation.py +++ b/synapse/replication/tcp/streams/federation.py @@ -42,7 +42,7 @@ class FederationStream(Stream): ROW_TYPE = FederationStreamRow def __init__(self, hs: "HomeServer"): - if hs.config.worker_app is None: + if hs.config.worker.worker_app is None: # master process: get updates from the FederationRemoteSendQueue. # (if the master is configured to send federation itself, federation_sender # will be a real FederationSender, which has stubs for current_token and diff --git a/synapse/res/providers.json b/synapse/res/providers.json new file mode 100644 index 000000000000..f1838f955901 --- /dev/null +++ b/synapse/res/providers.json @@ -0,0 +1,17 @@ +[ + { + "provider_name": "Twitter", + "provider_url": "http://www.twitter.com/", + "endpoints": [ + { + "schemes": [ + "https://twitter.com/*/status/*", + "https://*.twitter.com/*/status/*", + "https://twitter.com/*/moments/*", + "https://*.twitter.com/*/moments/*" + ], + "url": "https://publish.twitter.com/oembed" + } + ] + } +] \ No newline at end of file diff --git a/synapse/res/templates/recaptcha.html b/synapse/res/templates/recaptcha.html index 63944dc60814..b3db06ef9761 100644 --- a/synapse/res/templates/recaptcha.html +++ b/synapse/res/templates/recaptcha.html @@ -16,6 +16,9 @@
+ {% if error is defined %} +

Error: {{ error }}

+ {% endif %}

Hello! We need to prevent computer programs and other automated things from creating accounts on this server. diff --git a/synapse/res/templates/registration_token.html b/synapse/res/templates/registration_token.html new file mode 100644 index 000000000000..4577ce17023e --- /dev/null +++ b/synapse/res/templates/registration_token.html @@ -0,0 +1,23 @@ + + +Authentication + + + + + +

+ {% if error is defined %} +

Error: {{ error }}

+ {% endif %} +

+ Please enter a registration token. +

+ + + +
+ + + diff --git a/synapse/res/templates/terms.html b/synapse/res/templates/terms.html index dfef9897ee40..369ff446d230 100644 --- a/synapse/res/templates/terms.html +++ b/synapse/res/templates/terms.html @@ -8,6 +8,9 @@
+ {% if error is defined %} +

Error: {{ error }}

+ {% endif %}

Please click the button below if you agree to the privacy policy of this homeserver. diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index d29f2fea5ed3..cebdeecb8127 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -12,41 +12,40 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from synapse.http.server import JsonResource +from typing import TYPE_CHECKING, Callable + +from synapse.http.server import HttpServer, JsonResource from synapse.rest import admin -from synapse.rest.client import versions -from synapse.rest.client.v1 import ( - directory, - events, - initial_sync, - login as v1_login, - logout, - presence, - profile, - push_rule, - pusher, - room, - voip, -) -from synapse.rest.client.v2_alpha import ( +from synapse.rest.client import ( account, account_data, account_validity, auth, capabilities, devices, + directory, + events, filter, groups, + initial_sync, keys, knock, + login as v1_login, + logout, notifications, openid, password_policy, + presence, + profile, + push_rule, + pusher, read_marker, receipts, register, relations, report_event, + room, + room_batch, room_keys, room_upgrade_rest_servlet, sendtodevice, @@ -56,8 +55,15 @@ thirdparty, tokenrefresh, user_directory, + versions, + voip, ) +if TYPE_CHECKING: + from synapse.server import HomeServer + +RegisterServletsFunc = Callable[["HomeServer", HttpServer], None] + class ClientRestResource(JsonResource): """Matrix Client API REST resource. @@ -69,12 +75,12 @@ class ClientRestResource(JsonResource): * etc """ - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): JsonResource.__init__(self, hs, canonical_json=False) self.register_servlets(self, hs) @staticmethod - def register_servlets(client_resource, hs): + def register_servlets(client_resource: HttpServer, hs: "HomeServer") -> None: versions.register_servlets(hs, client_resource) # Deprecated in r0 @@ -84,7 +90,6 @@ def register_servlets(client_resource, hs): # Partially deprecated in r0 events.register_servlets(hs, client_resource) - # "v1" + "r0" room.register_servlets(hs, client_resource) v1_login.register_servlets(hs, client_resource) profile.register_servlets(hs, client_resource) @@ -94,8 +99,6 @@ def register_servlets(client_resource, hs): pusher.register_servlets(hs, client_resource) push_rule.register_servlets(hs, client_resource) logout.register_servlets(hs, client_resource) - - # "v2" sync.register_servlets(hs, client_resource) filter.register_servlets(hs, client_resource) account.register_servlets(hs, client_resource) @@ -117,6 +120,7 @@ def register_servlets(client_resource, hs): user_directory.register_servlets(hs, client_resource) groups.register_servlets(hs, client_resource) room_upgrade_rest_servlet.register_servlets(hs, client_resource) + room_batch.register_servlets(hs, client_resource) capabilities.register_servlets(hs, client_resource) account_validity.register_servlets(hs, client_resource) relations.register_servlets(hs, client_resource) diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 7222c75e97c0..bcc44c6afcbc 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -17,6 +17,7 @@ import logging import platform +from http import HTTPStatus from typing import TYPE_CHECKING, Optional, Tuple import synapse @@ -25,6 +26,11 @@ from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.site import SynapseRequest from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin +from synapse.rest.admin.background_updates import ( + BackgroundUpdateEnabledRestServlet, + BackgroundUpdateRestServlet, + BackgroundUpdateStartJobRestServlet, +) from synapse.rest.admin.devices import ( DeleteDevicesRestServlet, DeviceRestServlet, @@ -34,11 +40,21 @@ EventReportDetailRestServlet, EventReportsRestServlet, ) +from synapse.rest.admin.federation import ( + DestinationsRestServlet, + ListDestinationsRestServlet, +) from synapse.rest.admin.groups import DeleteGroupAdminRestServlet from synapse.rest.admin.media import ListMediaInRoom, register_servlets_for_media_repo -from synapse.rest.admin.purge_room_servlet import PurgeRoomServlet +from synapse.rest.admin.registration_tokens import ( + ListRegistrationTokensRestServlet, + NewRegistrationTokenRestServlet, + RegistrationTokenRestServlet, +) from synapse.rest.admin.rooms import ( - DeleteRoomRestServlet, + BlockRoomRestServlet, + DeleteRoomStatusByDeleteIdRestServlet, + DeleteRoomStatusByRoomIdRestServlet, ForwardExtremitiesRestServlet, JoinRoomAliasServlet, ListRoomRestServlet, @@ -46,12 +62,13 @@ RoomEventContextServlet, RoomMembersRestServlet, RoomRestServlet, + RoomRestV2Servlet, RoomStateRestServlet, - ShutdownRoomRestServlet, PowerLevelsRestServlet ) from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet from synapse.rest.admin.statistics import UserMediaStatisticsRestServlet +from synapse.rest.admin.username_available import UsernameAvailableRestServlet from synapse.rest.admin.users import ( AccountValidityRenewServlet, DeactivateAccountRestServlet, @@ -61,7 +78,6 @@ SearchUsersRestServlet, ShadowBanRestServlet, UserAdminServlet, - UserMediaRestServlet, UserMembershipRestServlet, UserRegisterServlet, UserRestServletV2, @@ -88,7 +104,7 @@ def __init__(self, hs: "HomeServer"): } def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - return 200, self.res + return HTTPStatus.OK, self.res class PurgeHistoryRestServlet(RestServlet): @@ -120,7 +136,7 @@ async def on_POST( event = await self.store.get_event(event_id) if event.room_id != room_id: - raise SynapseError(400, "Event is for wrong room.") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Event is for wrong room.") # RoomStreamToken expects [int] not Optional[int] assert event.internal_metadata.stream_ordering is not None @@ -134,7 +150,9 @@ async def on_POST( ts = body["purge_up_to_ts"] if not isinstance(ts, int): raise SynapseError( - 400, "purge_up_to_ts must be an int", errcode=Codes.BAD_JSON + HTTPStatus.BAD_REQUEST, + "purge_up_to_ts must be an int", + errcode=Codes.BAD_JSON, ) stream_ordering = await self.store.find_first_stream_ordering_after_ts(ts) @@ -150,7 +168,9 @@ async def on_POST( stream_ordering, ) raise SynapseError( - 404, "there is no event to be purged", errcode=Codes.NOT_FOUND + HTTPStatus.NOT_FOUND, + "there is no event to be purged", + errcode=Codes.NOT_FOUND, ) (stream, topo, _event_id) = r token = "t%d-%d" % (topo, stream) @@ -163,7 +183,7 @@ async def on_POST( ) else: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "must specify purge_up_to_event_id or purge_up_to_ts", errcode=Codes.BAD_JSON, ) @@ -172,7 +192,7 @@ async def on_POST( room_id, token, delete_local_events=delete_local_events ) - return 200, {"purge_id": purge_id} + return HTTPStatus.OK, {"purge_id": purge_id} class PurgeHistoryStatusRestServlet(RestServlet): @@ -191,7 +211,7 @@ async def on_GET( if purge_status is None: raise NotFoundError("purge id '%s' not found" % purge_id) - return 200, purge_status.asdict() + return HTTPStatus.OK, purge_status.asdict() ######################################################################################## @@ -215,17 +235,17 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: Register all the admin servlets. """ register_servlets_for_client_rest_resource(hs, http_server) + BlockRoomRestServlet(hs).register(http_server) ListRoomRestServlet(hs).register(http_server) RoomStateRestServlet(hs).register(http_server) RoomRestServlet(hs).register(http_server) + RoomRestV2Servlet(hs).register(http_server) RoomMembersRestServlet(hs).register(http_server) - DeleteRoomRestServlet(hs).register(http_server) + DeleteRoomStatusByDeleteIdRestServlet(hs).register(http_server) + DeleteRoomStatusByRoomIdRestServlet(hs).register(http_server) JoinRoomAliasServlet(hs).register(http_server) - PurgeRoomServlet(hs).register(http_server) - SendServerNoticeServlet(hs).register(http_server) VersionServlet(hs).register(http_server) UserAdminServlet(hs).register(http_server) - UserMediaRestServlet(hs).register(http_server) UserMembershipRestServlet(hs).register(http_server) UserTokenRestServlet(hs).register(http_server) UserRestServletV2(hs).register(http_server) @@ -243,6 +263,19 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: RoomEventContextServlet(hs).register(http_server) RateLimitRestServlet(hs).register(http_server) PowerLevelsRestServlet(hs).register(http_server) + UsernameAvailableRestServlet(hs).register(http_server) + ListRegistrationTokensRestServlet(hs).register(http_server) + NewRegistrationTokenRestServlet(hs).register(http_server) + RegistrationTokenRestServlet(hs).register(http_server) + DestinationsRestServlet(hs).register(http_server) + ListDestinationsRestServlet(hs).register(http_server) + + # Some servlets only get registered for the main process. + if hs.config.worker.worker_app is None: + SendServerNoticeServlet(hs).register(http_server) + BackgroundUpdateEnabledRestServlet(hs).register(http_server) + BackgroundUpdateRestServlet(hs).register(http_server) + BackgroundUpdateStartJobRestServlet(hs).register(http_server) def register_servlets_for_client_rest_resource( @@ -255,14 +288,13 @@ def register_servlets_for_client_rest_resource( PurgeHistoryRestServlet(hs).register(http_server) ResetPasswordRestServlet(hs).register(http_server) SearchUsersRestServlet(hs).register(http_server) - ShutdownRoomRestServlet(hs).register(http_server) UserRegisterServlet(hs).register(http_server) DeleteGroupAdminRestServlet(hs).register(http_server) AccountValidityRenewServlet(hs).register(http_server) # Load the media repo ones if we're using them. Otherwise load the servlets which # don't need a media repo (typically readonly admin APIs). - if hs.config.can_load_media_repo: + if hs.config.media.can_load_media_repo: register_servlets_for_media_repo(hs, http_server) else: ListMediaInRoom(hs).register(http_server) diff --git a/synapse/rest/admin/_base.py b/synapse/rest/admin/_base.py index d9a2f6ca157f..399b205aaf81 100644 --- a/synapse/rest/admin/_base.py +++ b/synapse/rest/admin/_base.py @@ -13,6 +13,7 @@ # limitations under the License. import re +from http import HTTPStatus from typing import Iterable, Pattern from synapse.api.auth import Auth @@ -62,4 +63,4 @@ async def assert_user_is_admin(auth: Auth, user_id: UserID) -> None: """ is_admin = await auth.is_server_admin(user_id) if not is_admin: - raise AuthError(403, "You are not a server admin") + raise AuthError(HTTPStatus.FORBIDDEN, "You are not a server admin") diff --git a/synapse/rest/admin/background_updates.py b/synapse/rest/admin/background_updates.py new file mode 100644 index 000000000000..479672d4d568 --- /dev/null +++ b/synapse/rest/admin/background_updates.py @@ -0,0 +1,176 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from http import HTTPStatus +from typing import TYPE_CHECKING, Tuple + +from synapse.api.errors import SynapseError +from synapse.http.servlet import ( + RestServlet, + assert_params_in_dict, + parse_json_object_from_request, +) +from synapse.http.site import SynapseRequest +from synapse.rest.admin._base import admin_patterns, assert_user_is_admin +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class BackgroundUpdateEnabledRestServlet(RestServlet): + """Allows temporarily disabling background updates""" + + PATTERNS = admin_patterns("/background_updates/enabled$") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._data_stores = hs.get_datastores() + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + requester = await self._auth.get_user_by_req(request) + await assert_user_is_admin(self._auth, requester.user) + + # We need to check that all configured databases have updates enabled. + # (They *should* all be in sync.) + enabled = all(db.updates.enabled for db in self._data_stores.databases) + + return HTTPStatus.OK, {"enabled": enabled} + + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + requester = await self._auth.get_user_by_req(request) + await assert_user_is_admin(self._auth, requester.user) + + body = parse_json_object_from_request(request) + + enabled = body.get("enabled", True) + + if not isinstance(enabled, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "'enabled' parameter must be a boolean" + ) + + for db in self._data_stores.databases: + db.updates.enabled = enabled + + # If we're re-enabling them ensure that we start the background + # process again. + if enabled: + db.updates.start_doing_background_updates() + + return HTTPStatus.OK, {"enabled": enabled} + + +class BackgroundUpdateRestServlet(RestServlet): + """Fetch information about background updates""" + + PATTERNS = admin_patterns("/background_updates/status$") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._data_stores = hs.get_datastores() + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + requester = await self._auth.get_user_by_req(request) + await assert_user_is_admin(self._auth, requester.user) + + # We need to check that all configured databases have updates enabled. + # (They *should* all be in sync.) + enabled = all(db.updates.enabled for db in self._data_stores.databases) + + current_updates = {} + + for db in self._data_stores.databases: + update = db.updates.get_current_update() + if not update: + continue + + current_updates[db.name()] = { + "name": update.name, + "total_item_count": update.total_item_count, + "total_duration_ms": update.total_duration_ms, + "average_items_per_ms": update.average_items_per_ms(), + } + + return HTTPStatus.OK, {"enabled": enabled, "current_updates": current_updates} + + +class BackgroundUpdateStartJobRestServlet(RestServlet): + """Allows to start specific background updates""" + + PATTERNS = admin_patterns("/background_updates/start_job") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() + + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + requester = await self._auth.get_user_by_req(request) + await assert_user_is_admin(self._auth, requester.user) + + body = parse_json_object_from_request(request) + assert_params_in_dict(body, ["job_name"]) + + job_name = body["job_name"] + + if job_name == "populate_stats_process_rooms": + jobs = [ + { + "update_name": "populate_stats_process_rooms", + "progress_json": "{}", + }, + ] + elif job_name == "regenerate_directory": + jobs = [ + { + "update_name": "populate_user_directory_createtables", + "progress_json": "{}", + "depends_on": "", + }, + { + "update_name": "populate_user_directory_process_rooms", + "progress_json": "{}", + "depends_on": "populate_user_directory_createtables", + }, + { + "update_name": "populate_user_directory_process_users", + "progress_json": "{}", + "depends_on": "populate_user_directory_process_rooms", + }, + { + "update_name": "populate_user_directory_cleanup", + "progress_json": "{}", + "depends_on": "populate_user_directory_process_users", + }, + ] + else: + raise SynapseError(HTTPStatus.BAD_REQUEST, "Invalid job_name") + + try: + await self._store.db_pool.simple_insert_many( + table="background_updates", + values=jobs, + desc=f"admin_api_run_{job_name}", + ) + except self._store.db_pool.engine.module.IntegrityError: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Job %s is already in queue of background updates." % (job_name,), + ) + + self._store.db_pool.updates.start_doing_background_updates() + + return HTTPStatus.OK, {} diff --git a/synapse/rest/admin/devices.py b/synapse/rest/admin/devices.py index 5715190a78cc..2e5a6600d337 100644 --- a/synapse/rest/admin/devices.py +++ b/synapse/rest/admin/devices.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from http import HTTPStatus from typing import TYPE_CHECKING, Tuple from synapse.api.errors import NotFoundError, SynapseError @@ -47,13 +48,13 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() async def on_GET( - self, request: SynapseRequest, user_id, device_id: str + self, request: SynapseRequest, user_id: str, device_id: str ) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self.auth, request) target_user = UserID.from_string(user_id) if not self.hs.is_mine(target_user): - raise SynapseError(400, "Can only lookup local users") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only lookup local users") u = await self.store.get_user_by_id(target_user.to_string()) if u is None: @@ -62,7 +63,7 @@ async def on_GET( device = await self.device_handler.get_device( target_user.to_string(), device_id ) - return 200, device + return HTTPStatus.OK, device async def on_DELETE( self, request: SynapseRequest, user_id: str, device_id: str @@ -71,14 +72,14 @@ async def on_DELETE( target_user = UserID.from_string(user_id) if not self.hs.is_mine(target_user): - raise SynapseError(400, "Can only lookup local users") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only lookup local users") u = await self.store.get_user_by_id(target_user.to_string()) if u is None: raise NotFoundError("Unknown user") await self.device_handler.delete_device(target_user.to_string(), device_id) - return 200, {} + return HTTPStatus.OK, {} async def on_PUT( self, request: SynapseRequest, user_id: str, device_id: str @@ -87,7 +88,7 @@ async def on_PUT( target_user = UserID.from_string(user_id) if not self.hs.is_mine(target_user): - raise SynapseError(400, "Can only lookup local users") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only lookup local users") u = await self.store.get_user_by_id(target_user.to_string()) if u is None: @@ -97,7 +98,7 @@ async def on_PUT( await self.device_handler.update_device( target_user.to_string(), device_id, body ) - return 200, {} + return HTTPStatus.OK, {} class DevicesRestServlet(RestServlet): @@ -110,7 +111,7 @@ class DevicesRestServlet(RestServlet): def __init__(self, hs: "HomeServer"): """ Args: - hs (synapse.server.HomeServer): server + hs: server """ self.hs = hs self.auth = hs.get_auth() @@ -124,14 +125,14 @@ async def on_GET( target_user = UserID.from_string(user_id) if not self.hs.is_mine(target_user): - raise SynapseError(400, "Can only lookup local users") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only lookup local users") u = await self.store.get_user_by_id(target_user.to_string()) if u is None: raise NotFoundError("Unknown user") devices = await self.device_handler.get_devices_by_user(target_user.to_string()) - return 200, {"devices": devices, "total": len(devices)} + return HTTPStatus.OK, {"devices": devices, "total": len(devices)} class DeleteDevicesRestServlet(RestServlet): @@ -155,7 +156,7 @@ async def on_POST( target_user = UserID.from_string(user_id) if not self.hs.is_mine(target_user): - raise SynapseError(400, "Can only lookup local users") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only lookup local users") u = await self.store.get_user_by_id(target_user.to_string()) if u is None: @@ -167,4 +168,4 @@ async def on_POST( await self.device_handler.delete_devices( target_user.to_string(), body["devices"] ) - return 200, {} + return HTTPStatus.OK, {} diff --git a/synapse/rest/admin/event_reports.py b/synapse/rest/admin/event_reports.py index bbfcaf723b7b..5ee8b11110e0 100644 --- a/synapse/rest/admin/event_reports.py +++ b/synapse/rest/admin/event_reports.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +from http import HTTPStatus from typing import TYPE_CHECKING, Tuple from synapse.api.errors import Codes, NotFoundError, SynapseError @@ -66,21 +67,23 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: if start < 0: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "The start parameter must be a positive integer.", errcode=Codes.INVALID_PARAM, ) if limit < 0: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "The limit parameter must be a positive integer.", errcode=Codes.INVALID_PARAM, ) if direction not in ("f", "b"): raise SynapseError( - 400, "Unknown direction: %s" % (direction,), errcode=Codes.INVALID_PARAM + HTTPStatus.BAD_REQUEST, + "Unknown direction: %s" % (direction,), + errcode=Codes.INVALID_PARAM, ) event_reports, total = await self.store.get_event_reports_paginate( @@ -90,7 +93,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: if (start + limit) < total: ret["next_token"] = start + len(event_reports) - return 200, ret + return HTTPStatus.OK, ret class EventReportDetailRestServlet(RestServlet): @@ -127,13 +130,17 @@ async def on_GET( try: resolved_report_id = int(report_id) except ValueError: - raise SynapseError(400, message, errcode=Codes.INVALID_PARAM) + raise SynapseError( + HTTPStatus.BAD_REQUEST, message, errcode=Codes.INVALID_PARAM + ) if resolved_report_id < 0: - raise SynapseError(400, message, errcode=Codes.INVALID_PARAM) + raise SynapseError( + HTTPStatus.BAD_REQUEST, message, errcode=Codes.INVALID_PARAM + ) ret = await self.store.get_event_report(resolved_report_id) if not ret: raise NotFoundError("Event report not found") - return 200, ret + return HTTPStatus.OK, ret diff --git a/synapse/rest/admin/federation.py b/synapse/rest/admin/federation.py new file mode 100644 index 000000000000..744687be35fc --- /dev/null +++ b/synapse/rest/admin/federation.py @@ -0,0 +1,135 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from http import HTTPStatus +from typing import TYPE_CHECKING, Tuple + +from synapse.api.errors import Codes, NotFoundError, SynapseError +from synapse.http.servlet import RestServlet, parse_integer, parse_string +from synapse.http.site import SynapseRequest +from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin +from synapse.storage.databases.main.transactions import DestinationSortOrder +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class ListDestinationsRestServlet(RestServlet): + """Get request to list all destinations. + This needs user to have administrator access in Synapse. + + GET /_synapse/admin/v1/federation/destinations?from=0&limit=10 + + returns: + 200 OK with list of destinations if success otherwise an error. + + The parameters `from` and `limit` are required only for pagination. + By default, a `limit` of 100 is used. + The parameter `destination` can be used to filter by destination. + The parameter `order_by` can be used to order the result. + """ + + PATTERNS = admin_patterns("/federation/destinations$") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) + + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) + + if start < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter from must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + if limit < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter limit must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + destination = parse_string(request, "destination") + + order_by = parse_string( + request, + "order_by", + default=DestinationSortOrder.DESTINATION.value, + allowed_values=[dest.value for dest in DestinationSortOrder], + ) + + direction = parse_string(request, "dir", default="f", allowed_values=("f", "b")) + + destinations, total = await self._store.get_destinations_paginate( + start, limit, destination, order_by, direction + ) + response = {"destinations": destinations, "total": total} + if (start + limit) < total: + response["next_token"] = str(start + len(destinations)) + + return HTTPStatus.OK, response + + +class DestinationsRestServlet(RestServlet): + """Get details of a destination. + This needs user to have administrator access in Synapse. + + GET /_synapse/admin/v1/federation/destinations/ + + returns: + 200 OK with details of a destination if success otherwise an error. + """ + + PATTERNS = admin_patterns("/federation/destinations/(?P[^/]+)$") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() + + async def on_GET( + self, request: SynapseRequest, destination: str + ) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) + + destination_retry_timings = await self._store.get_destination_retry_timings( + destination + ) + + if not destination_retry_timings: + raise NotFoundError("Unknown destination") + + last_successful_stream_ordering = ( + await self._store.get_destination_last_successful_stream_ordering( + destination + ) + ) + + response = { + "destination": destination, + "failure_ts": destination_retry_timings.failure_ts, + "retry_last_ts": destination_retry_timings.retry_last_ts, + "retry_interval": destination_retry_timings.retry_interval, + "last_successful_stream_ordering": last_successful_stream_ordering, + } + + return HTTPStatus.OK, response diff --git a/synapse/rest/admin/groups.py b/synapse/rest/admin/groups.py index 68a3ba3cb7ac..a27110388f4f 100644 --- a/synapse/rest/admin/groups.py +++ b/synapse/rest/admin/groups.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from http import HTTPStatus from typing import TYPE_CHECKING, Tuple from synapse.api.errors import SynapseError @@ -43,7 +44,7 @@ async def on_POST( await assert_user_is_admin(self.auth, requester.user) if not self.is_mine_id(group_id): - raise SynapseError(400, "Can only delete local groups") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only delete local groups") await self.group_server.delete_group(group_id, requester.user.to_string()) - return 200, {} + return HTTPStatus.OK, {} diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index 0a19a333d7f7..9e23e2d8fc00 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -14,18 +14,20 @@ # limitations under the License. import logging +from http import HTTPStatus from typing import TYPE_CHECKING, Tuple from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.http.server import HttpServer -from synapse.http.servlet import RestServlet, parse_boolean, parse_integer +from synapse.http.servlet import RestServlet, parse_boolean, parse_integer, parse_string from synapse.http.site import SynapseRequest from synapse.rest.admin._base import ( admin_patterns, assert_requester_is_admin, assert_user_is_admin, ) -from synapse.types import JsonDict +from synapse.storage.databases.main.media_repository import MediaSortOrder +from synapse.types import JsonDict, UserID if TYPE_CHECKING: from synapse.server import HomeServer @@ -39,7 +41,7 @@ class QuarantineMediaInRoom(RestServlet): """ PATTERNS = [ - *admin_patterns("/room/(?P[^/]+)/media/quarantine"), + *admin_patterns("/room/(?P[^/]+)/media/quarantine$"), # This path kept around for legacy reasons *admin_patterns("/quarantine_media/(?P[^/]+)"), ] @@ -61,7 +63,7 @@ async def on_POST( room_id, requester.user.to_string() ) - return 200, {"num_quarantined": num_quarantined} + return HTTPStatus.OK, {"num_quarantined": num_quarantined} class QuarantineMediaByUser(RestServlet): @@ -69,7 +71,7 @@ class QuarantineMediaByUser(RestServlet): this server. """ - PATTERNS = admin_patterns("/user/(?P[^/]+)/media/quarantine") + PATTERNS = admin_patterns("/user/(?P[^/]+)/media/quarantine$") def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() @@ -88,7 +90,7 @@ async def on_POST( user_id, requester.user.to_string() ) - return 200, {"num_quarantined": num_quarantined} + return HTTPStatus.OK, {"num_quarantined": num_quarantined} class QuarantineMediaByID(RestServlet): @@ -117,7 +119,7 @@ async def on_POST( server_name, media_id, requester.user.to_string() ) - return 200, {} + return HTTPStatus.OK, {} class UnquarantineMediaByID(RestServlet): @@ -146,7 +148,7 @@ async def on_POST( # Remove from quarantine this media id await self.store.quarantine_media_by_id(server_name, media_id, None) - return 200, {} + return HTTPStatus.OK, {} class ProtectMediaByID(RestServlet): @@ -169,7 +171,7 @@ async def on_POST( # Protect this media id await self.store.mark_local_media_as_safe(media_id, safe=True) - return 200, {} + return HTTPStatus.OK, {} class UnprotectMediaByID(RestServlet): @@ -192,13 +194,13 @@ async def on_POST( # Unprotect this media id await self.store.mark_local_media_as_safe(media_id, safe=False) - return 200, {} + return HTTPStatus.OK, {} class ListMediaInRoom(RestServlet): """Lists all of the media in a given room.""" - PATTERNS = admin_patterns("/room/(?P[^/]+)/media") + PATTERNS = admin_patterns("/room/(?P[^/]+)/media$") def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() @@ -210,15 +212,15 @@ async def on_GET( requester = await self.auth.get_user_by_req(request) is_admin = await self.auth.is_server_admin(requester.user) if not is_admin: - raise AuthError(403, "You are not a server admin") + raise AuthError(HTTPStatus.FORBIDDEN, "You are not a server admin") local_mxcs, remote_mxcs = await self.store.get_media_mxcs_in_room(room_id) - return 200, {"local": local_mxcs, "remote": remote_mxcs} + return HTTPStatus.OK, {"local": local_mxcs, "remote": remote_mxcs} class PurgeMediaCacheRestServlet(RestServlet): - PATTERNS = admin_patterns("/purge_media_cache") + PATTERNS = admin_patterns("/purge_media_cache$") def __init__(self, hs: "HomeServer"): self.media_repository = hs.get_media_repository() @@ -230,9 +232,23 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: before_ts = parse_integer(request, "before_ts", required=True) logger.info("before_ts: %r", before_ts) + if before_ts < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter before_ts must be a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + elif before_ts < 30000000000: # Dec 1970 in milliseconds, Aug 2920 in seconds + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter before_ts you provided is from the year 1970. " + + "Double check that you are providing a timestamp in milliseconds.", + errcode=Codes.INVALID_PARAM, + ) + ret = await self.media_repository.delete_old_remote_media(before_ts) - return 200, ret + return HTTPStatus.OK, ret class DeleteMediaByID(RestServlet): @@ -252,15 +268,17 @@ async def on_DELETE( await assert_requester_is_admin(self.auth, request) if self.server_name != server_name: - raise SynapseError(400, "Can only delete local media") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only delete local media") if await self.store.get_local_media(media_id) is None: raise NotFoundError("Unknown media") logging.info("Deleting local media by ID: %s", media_id) - deleted_media, total = await self.media_repository.delete_local_media(media_id) - return 200, {"deleted_media": deleted_media, "total": total} + deleted_media, total = await self.media_repository.delete_local_media_ids( + [media_id] + ) + return HTTPStatus.OK, {"deleted_media": deleted_media, "total": total} class DeleteMediaByDateSize(RestServlet): @@ -268,7 +286,7 @@ class DeleteMediaByDateSize(RestServlet): timestamp and size. """ - PATTERNS = admin_patterns("/media/(?P[^/]+)/delete") + PATTERNS = admin_patterns("/media/(?P[^/]+)/delete$") def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() @@ -287,19 +305,26 @@ async def on_POST( if before_ts < 0: raise SynapseError( - 400, - "Query parameter before_ts must be a string representing a positive integer.", + HTTPStatus.BAD_REQUEST, + "Query parameter before_ts must be a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + elif before_ts < 30000000000: # Dec 1970 in milliseconds, Aug 2920 in seconds + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter before_ts you provided is from the year 1970. " + + "Double check that you are providing a timestamp in milliseconds.", errcode=Codes.INVALID_PARAM, ) if size_gt < 0: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "Query parameter size_gt must be a string representing a positive integer.", errcode=Codes.INVALID_PARAM, ) if self.server_name != server_name: - raise SynapseError(400, "Can only delete local media") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only delete local media") logging.info( "Deleting local media by timestamp: %s, size larger than: %s, keep profile media: %s" @@ -309,7 +334,166 @@ async def on_POST( deleted_media, total = await self.media_repository.delete_old_local_media( before_ts, size_gt, keep_profiles ) - return 200, {"deleted_media": deleted_media, "total": total} + return HTTPStatus.OK, {"deleted_media": deleted_media, "total": total} + + +class UserMediaRestServlet(RestServlet): + """ + Gets information about all uploaded local media for a specific `user_id`. + With DELETE request you can delete all this media. + + Example: + http://localhost:8008/_synapse/admin/v1/users/@user:server/media + + Args: + The parameters `from` and `limit` are required for pagination. + By default, a `limit` of 100 is used. + Returns: + A list of media and an integer representing the total number of + media that exist given for this user + """ + + PATTERNS = admin_patterns("/users/(?P[^/]+)/media$") + + def __init__(self, hs: "HomeServer"): + self.is_mine = hs.is_mine + self.auth = hs.get_auth() + self.store = hs.get_datastore() + self.media_repository = hs.get_media_repository() + + async def on_GET( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: + # This will always be set by the time Twisted calls us. + assert request.args is not None + + await assert_requester_is_admin(self.auth, request) + + if not self.is_mine(UserID.from_string(user_id)): + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only look up local users") + + user = await self.store.get_user_by_id(user_id) + if user is None: + raise NotFoundError("Unknown user") + + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) + + if start < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter from must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + if limit < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter limit must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + # If neither `order_by` nor `dir` is set, set the default order + # to newest media is on top for backward compatibility. + if b"order_by" not in request.args and b"dir" not in request.args: + order_by = MediaSortOrder.CREATED_TS.value + direction = "b" + else: + order_by = parse_string( + request, + "order_by", + default=MediaSortOrder.CREATED_TS.value, + allowed_values=( + MediaSortOrder.MEDIA_ID.value, + MediaSortOrder.UPLOAD_NAME.value, + MediaSortOrder.CREATED_TS.value, + MediaSortOrder.LAST_ACCESS_TS.value, + MediaSortOrder.MEDIA_LENGTH.value, + MediaSortOrder.MEDIA_TYPE.value, + MediaSortOrder.QUARANTINED_BY.value, + MediaSortOrder.SAFE_FROM_QUARANTINE.value, + ), + ) + direction = parse_string( + request, "dir", default="f", allowed_values=("f", "b") + ) + + media, total = await self.store.get_local_media_by_user_paginate( + start, limit, user_id, order_by, direction + ) + + ret = {"media": media, "total": total} + if (start + limit) < total: + ret["next_token"] = start + len(media) + + return HTTPStatus.OK, ret + + async def on_DELETE( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: + # This will always be set by the time Twisted calls us. + assert request.args is not None + + await assert_requester_is_admin(self.auth, request) + + if not self.is_mine(UserID.from_string(user_id)): + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only look up local users") + + user = await self.store.get_user_by_id(user_id) + if user is None: + raise NotFoundError("Unknown user") + + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) + + if start < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter from must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + if limit < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter limit must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + # If neither `order_by` nor `dir` is set, set the default order + # to newest media is on top for backward compatibility. + if b"order_by" not in request.args and b"dir" not in request.args: + order_by = MediaSortOrder.CREATED_TS.value + direction = "b" + else: + order_by = parse_string( + request, + "order_by", + default=MediaSortOrder.CREATED_TS.value, + allowed_values=( + MediaSortOrder.MEDIA_ID.value, + MediaSortOrder.UPLOAD_NAME.value, + MediaSortOrder.CREATED_TS.value, + MediaSortOrder.LAST_ACCESS_TS.value, + MediaSortOrder.MEDIA_LENGTH.value, + MediaSortOrder.MEDIA_TYPE.value, + MediaSortOrder.QUARANTINED_BY.value, + MediaSortOrder.SAFE_FROM_QUARANTINE.value, + ), + ) + direction = parse_string( + request, "dir", default="f", allowed_values=("f", "b") + ) + + media, _ = await self.store.get_local_media_by_user_paginate( + start, limit, user_id, order_by, direction + ) + + deleted_media, total = await self.media_repository.delete_local_media_ids( + ([row["media_id"] for row in media]) + ) + + return HTTPStatus.OK, {"deleted_media": deleted_media, "total": total} def register_servlets_for_media_repo(hs: "HomeServer", http_server: HttpServer) -> None: @@ -326,3 +510,4 @@ def register_servlets_for_media_repo(hs: "HomeServer", http_server: HttpServer) ListMediaInRoom(hs).register(http_server) DeleteMediaByID(hs).register(http_server) DeleteMediaByDateSize(hs).register(http_server) + UserMediaRestServlet(hs).register(http_server) diff --git a/synapse/rest/admin/registration_tokens.py b/synapse/rest/admin/registration_tokens.py new file mode 100644 index 000000000000..891b98c0888a --- /dev/null +++ b/synapse/rest/admin/registration_tokens.py @@ -0,0 +1,338 @@ +# Copyright 2021 Callum Brown +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import string +from http import HTTPStatus +from typing import TYPE_CHECKING, Tuple + +from synapse.api.errors import Codes, NotFoundError, SynapseError +from synapse.http.servlet import ( + RestServlet, + parse_boolean, + parse_json_object_from_request, +) +from synapse.http.site import SynapseRequest +from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class ListRegistrationTokensRestServlet(RestServlet): + """List registration tokens. + + To list all tokens: + + GET /_synapse/admin/v1/registration_tokens + + 200 OK + + { + "registration_tokens": [ + { + "token": "abcd", + "uses_allowed": 3, + "pending": 0, + "completed": 1, + "expiry_time": null + }, + { + "token": "wxyz", + "uses_allowed": null, + "pending": 0, + "completed": 9, + "expiry_time": 1625394937000 + } + ] + } + + The optional query parameter `valid` can be used to filter the response. + If it is `true`, only valid tokens are returned. If it is `false`, only + tokens that have expired or have had all uses exhausted are returned. + If it is omitted, all tokens are returned regardless of validity. + """ + + PATTERNS = admin_patterns("/registration_tokens$") + + def __init__(self, hs: "HomeServer"): + self.hs = hs + self.auth = hs.get_auth() + self.store = hs.get_datastore() + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self.auth, request) + valid = parse_boolean(request, "valid") + token_list = await self.store.get_registration_tokens(valid) + return HTTPStatus.OK, {"registration_tokens": token_list} + + +class NewRegistrationTokenRestServlet(RestServlet): + """Create a new registration token. + + For example, to create a token specifying some fields: + + POST /_synapse/admin/v1/registration_tokens/new + + { + "token": "defg", + "uses_allowed": 1 + } + + 200 OK + + { + "token": "defg", + "uses_allowed": 1, + "pending": 0, + "completed": 0, + "expiry_time": null + } + + Defaults are used for any fields not specified. + """ + + PATTERNS = admin_patterns("/registration_tokens/new$") + + def __init__(self, hs: "HomeServer"): + self.hs = hs + self.auth = hs.get_auth() + self.store = hs.get_datastore() + self.clock = hs.get_clock() + # A string of all the characters allowed to be in a registration_token + self.allowed_chars = string.ascii_letters + string.digits + "._~-" + self.allowed_chars_set = set(self.allowed_chars) + + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self.auth, request) + body = parse_json_object_from_request(request) + + if "token" in body: + token = body["token"] + if not isinstance(token, str): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "token must be a string", + Codes.INVALID_PARAM, + ) + if not (0 < len(token) <= 64): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "token must not be empty and must not be longer than 64 characters", + Codes.INVALID_PARAM, + ) + if not set(token).issubset(self.allowed_chars_set): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "token must consist only of characters matched by the regex [A-Za-z0-9-_]", + Codes.INVALID_PARAM, + ) + + else: + # Get length of token to generate (default is 16) + length = body.get("length", 16) + if not isinstance(length, int): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "length must be an integer", + Codes.INVALID_PARAM, + ) + if not (0 < length <= 64): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "length must be greater than zero and not greater than 64", + Codes.INVALID_PARAM, + ) + + # Generate token + token = await self.store.generate_registration_token( + length, self.allowed_chars + ) + + uses_allowed = body.get("uses_allowed", None) + if not ( + uses_allowed is None + or (isinstance(uses_allowed, int) and uses_allowed >= 0) + ): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "uses_allowed must be a non-negative integer or null", + Codes.INVALID_PARAM, + ) + + expiry_time = body.get("expiry_time", None) + if not isinstance(expiry_time, (int, type(None))): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "expiry_time must be an integer or null", + Codes.INVALID_PARAM, + ) + if isinstance(expiry_time, int) and expiry_time < self.clock.time_msec(): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "expiry_time must not be in the past", + Codes.INVALID_PARAM, + ) + + created = await self.store.create_registration_token( + token, uses_allowed, expiry_time + ) + if not created: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + f"Token already exists: {token}", + Codes.INVALID_PARAM, + ) + + resp = { + "token": token, + "uses_allowed": uses_allowed, + "pending": 0, + "completed": 0, + "expiry_time": expiry_time, + } + return HTTPStatus.OK, resp + + +class RegistrationTokenRestServlet(RestServlet): + """Retrieve, update, or delete the given token. + + For example, + + to retrieve a token: + + GET /_synapse/admin/v1/registration_tokens/abcd + + 200 OK + + { + "token": "abcd", + "uses_allowed": 3, + "pending": 0, + "completed": 1, + "expiry_time": null + } + + + to update a token: + + PUT /_synapse/admin/v1/registration_tokens/defg + + { + "uses_allowed": 5, + "expiry_time": 4781243146000 + } + + 200 OK + + { + "token": "defg", + "uses_allowed": 5, + "pending": 0, + "completed": 0, + "expiry_time": 4781243146000 + } + + + to delete a token: + + DELETE /_synapse/admin/v1/registration_tokens/wxyz + + 200 OK + + {} + """ + + PATTERNS = admin_patterns("/registration_tokens/(?P[^/]*)$") + + def __init__(self, hs: "HomeServer"): + self.hs = hs + self.clock = hs.get_clock() + self.auth = hs.get_auth() + self.store = hs.get_datastore() + + async def on_GET(self, request: SynapseRequest, token: str) -> Tuple[int, JsonDict]: + """Retrieve a registration token.""" + await assert_requester_is_admin(self.auth, request) + token_info = await self.store.get_one_registration_token(token) + + # If no result return a 404 + if token_info is None: + raise NotFoundError(f"No such registration token: {token}") + + return HTTPStatus.OK, token_info + + async def on_PUT(self, request: SynapseRequest, token: str) -> Tuple[int, JsonDict]: + """Update a registration token.""" + await assert_requester_is_admin(self.auth, request) + body = parse_json_object_from_request(request) + new_attributes = {} + + # Only add uses_allowed to new_attributes if it is present and valid + if "uses_allowed" in body: + uses_allowed = body["uses_allowed"] + if not ( + uses_allowed is None + or (isinstance(uses_allowed, int) and uses_allowed >= 0) + ): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "uses_allowed must be a non-negative integer or null", + Codes.INVALID_PARAM, + ) + new_attributes["uses_allowed"] = uses_allowed + + if "expiry_time" in body: + expiry_time = body["expiry_time"] + if not isinstance(expiry_time, (int, type(None))): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "expiry_time must be an integer or null", + Codes.INVALID_PARAM, + ) + if isinstance(expiry_time, int) and expiry_time < self.clock.time_msec(): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "expiry_time must not be in the past", + Codes.INVALID_PARAM, + ) + new_attributes["expiry_time"] = expiry_time + + if len(new_attributes) == 0: + # Nothing to update, get token info to return + token_info = await self.store.get_one_registration_token(token) + else: + token_info = await self.store.update_registration_token( + token, new_attributes + ) + + # If no result return a 404 + if token_info is None: + raise NotFoundError(f"No such registration token: {token}") + + return HTTPStatus.OK, token_info + + async def on_DELETE( + self, request: SynapseRequest, token: str + ) -> Tuple[int, JsonDict]: + """Delete a registration token.""" + await assert_requester_is_admin(self.auth, request) + + if await self.store.delete_registration_token(token): + return HTTPStatus.OK, {} + + raise NotFoundError(f"No such registration token: {token}") diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index cef9f8a33827..a8b36d325b7c 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -13,7 +13,7 @@ # limitations under the License. import logging from http import HTTPStatus -from typing import TYPE_CHECKING, List, Optional, Tuple +from typing import TYPE_CHECKING, List, Optional, Tuple, cast from urllib import parse as urlparse from copy import deepcopy @@ -21,6 +21,7 @@ from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.api.filtering import Filter from synapse.http.servlet import ( + ResolveRoomIdMixin, RestServlet, assert_params_in_dict, parse_integer, @@ -34,7 +35,7 @@ assert_user_is_admin, ) from synapse.storage.databases.main.room import RoomSortOrder -from synapse.types import JsonDict, RoomAlias, RoomID, UserID, create_requester +from synapse.types import JsonDict, RoomID, UserID, create_requester from synapse.util import json_decoder if TYPE_CHECKING: @@ -48,116 +49,137 @@ logger = logging.getLogger(__name__) -class ResolveRoomIdMixin: - def __init__(self, hs: "HomeServer"): - self.room_member_handler = hs.get_room_member_handler() - - async def resolve_room_id( - self, room_identifier: str, remote_room_hosts: Optional[List[str]] = None - ) -> Tuple[str, Optional[List[str]]]: - """ - Resolve a room identifier to a room ID, if necessary. - - This also performanes checks to ensure the room ID is of the proper form. - - Args: - room_identifier: The room ID or alias. - remote_room_hosts: The potential remote room hosts to use. +class RoomRestV2Servlet(RestServlet): + """Delete a room from server asynchronously with a background task. - Returns: - The resolved room ID. - - Raises: - SynapseError if the room ID is of the wrong form. - """ - if RoomID.is_valid(room_identifier): - resolved_room_id = room_identifier - elif RoomAlias.is_valid(room_identifier): - room_alias = RoomAlias.from_string(room_identifier) - ( - room_id, - remote_room_hosts, - ) = await self.room_member_handler.lookup_room_alias(room_alias) - resolved_room_id = room_id.to_string() - else: - raise SynapseError( - 400, "%s was not legal room ID or room alias" % (room_identifier,) - ) - if not resolved_room_id: - raise SynapseError( - 400, "Unknown room ID or room alias %s" % room_identifier - ) - return resolved_room_id, remote_room_hosts + It is a combination and improvement of shutdown and purge room. + Shuts down a room by removing all local users from the room. + Blocking all future invites and joins to the room is optional. -class ShutdownRoomRestServlet(RestServlet): - """Shuts down a room by removing all local users from the room and blocking - all future invites and joins to the room. Any local aliases will be repointed - to a new room created by `new_room_user_id` and kicked users will be auto + If desired any local aliases will be repointed to a new room + created by `new_room_user_id` and kicked users will be auto- joined to the new room. + + If 'purge' is true, it will remove all traces of a room from the database. """ - PATTERNS = admin_patterns("/shutdown_room/(?P[^/]+)") + PATTERNS = admin_patterns("/rooms/(?P[^/]+)$", "v2") def __init__(self, hs: "HomeServer"): - self.hs = hs - self.auth = hs.get_auth() - self.room_shutdown_handler = hs.get_room_shutdown_handler() + self._auth = hs.get_auth() + self._store = hs.get_datastore() + self._pagination_handler = hs.get_pagination_handler() - async def on_POST( + async def on_DELETE( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + + requester = await self._auth.get_user_by_req(request) + await assert_user_is_admin(self._auth, requester.user) content = parse_json_object_from_request(request) - assert_params_in_dict(content, ["new_room_user_id"]) - ret = await self.room_shutdown_handler.shutdown_room( + block = content.get("block", False) + if not isinstance(block, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Param 'block' must be a boolean, if given", + Codes.BAD_JSON, + ) + + purge = content.get("purge", True) + if not isinstance(purge, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Param 'purge' must be a boolean, if given", + Codes.BAD_JSON, + ) + + force_purge = content.get("force_purge", False) + if not isinstance(force_purge, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Param 'force_purge' must be a boolean, if given", + Codes.BAD_JSON, + ) + + if not RoomID.is_valid(room_id): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "%s is not a legal room ID" % (room_id,) + ) + + delete_id = self._pagination_handler.start_shutdown_and_purge_room( room_id=room_id, - new_room_user_id=content["new_room_user_id"], + new_room_user_id=content.get("new_room_user_id"), new_room_name=content.get("room_name"), message=content.get("message"), requester_user_id=requester.user.to_string(), - block=True, + block=block, + purge=purge, + force_purge=force_purge, ) - return (200, ret) + return HTTPStatus.OK, {"delete_id": delete_id} -class DeleteRoomRestServlet(RestServlet): - """Delete a room from server. +class DeleteRoomStatusByRoomIdRestServlet(RestServlet): + """Get the status of the delete room background task.""" - It is a combination and improvement of shutdown and purge room. + PATTERNS = admin_patterns("/rooms/(?P[^/]+)/delete_status$", "v2") - Shuts down a room by removing all local users from the room. - Blocking all future invites and joins to the room is optional. + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._pagination_handler = hs.get_pagination_handler() - If desired any local aliases will be repointed to a new room - created by `new_room_user_id` and kicked users will be auto- - joined to the new room. + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: - If 'purge' is true, it will remove all traces of a room from the database. - """ + await assert_requester_is_admin(self._auth, request) + + if not RoomID.is_valid(room_id): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "%s is not a legal room ID" % (room_id,) + ) + + delete_ids = self._pagination_handler.get_delete_ids_by_room(room_id) + if delete_ids is None: + raise NotFoundError("No delete task for room_id '%s' found" % room_id) + + response = [] + for delete_id in delete_ids: + delete = self._pagination_handler.get_delete_status(delete_id) + if delete: + response += [ + { + "delete_id": delete_id, + **delete.asdict(), + } + ] + return HTTPStatus.OK, {"results": cast(JsonDict, response)} + + +class DeleteRoomStatusByDeleteIdRestServlet(RestServlet): + """Get the status of the delete room background task.""" - PATTERNS = admin_patterns("/rooms/(?P[^/]+)/delete$") + PATTERNS = admin_patterns("/rooms/delete_status/(?P[^/]+)$", "v2") def __init__(self, hs: "HomeServer"): - self.hs = hs - self.auth = hs.get_auth() - self.room_shutdown_handler = hs.get_room_shutdown_handler() - self.pagination_handler = hs.get_pagination_handler() + self._auth = hs.get_auth() + self._pagination_handler = hs.get_pagination_handler() - async def on_POST( - self, request: SynapseRequest, room_id: str + async def on_GET( + self, request: SynapseRequest, delete_id: str ) -> Tuple[int, JsonDict]: - return await _delete_room( - request, - room_id, - self.auth, - self.room_shutdown_handler, - self.pagination_handler, - ) + + await assert_requester_is_admin(self._auth, request) + + delete_status = self._pagination_handler.get_delete_status(delete_id) + if delete_status is None: + raise NotFoundError("delete id '%s' not found" % delete_id) + + return HTTPStatus.OK, cast(JsonDict, delete_status.asdict()) class ListRoomRestServlet(RestServlet): @@ -200,15 +222,15 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: RoomSortOrder.STATE_EVENTS.value, ): raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "Unknown value for order_by: %s" % (order_by,), errcode=Codes.INVALID_PARAM, ) - search_term = parse_string(request, "search_term") + search_term = parse_string(request, "search_term", encoding="utf-8") if search_term == "": raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "search_term cannot be an empty string", errcode=Codes.INVALID_PARAM, ) @@ -216,7 +238,9 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: direction = parse_string(request, "dir", default="f") if direction not in ("f", "b"): raise SynapseError( - 400, "Unknown direction: %s" % (direction,), errcode=Codes.INVALID_PARAM + HTTPStatus.BAD_REQUEST, + "Unknown direction: %s" % (direction,), + errcode=Codes.INVALID_PARAM, ) reverse_order = True if direction == "b" else False @@ -248,7 +272,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: else: response["prev_batch"] = 0 - return 200, response + return HTTPStatus.OK, response """ Create a new room. It is possible to set an explicit @@ -310,7 +334,7 @@ async def on_GET( members = await self.store.get_users_in_room(room_id) ret["joined_local_devices"] = await self.store.count_devices_by_users(members) - return (200, ret) + return HTTPStatus.OK, ret async def on_PUT(self, request, room_id): await assert_requester_is_admin(self.auth, request) @@ -363,7 +387,7 @@ async def on_PUT(self, request, room_id): async def on_DELETE( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: - return await _delete_room( + return await self._delete_room( request, room_id, self.auth, @@ -371,6 +395,71 @@ async def on_DELETE( self.pagination_handler, ) + async def _delete_room( + self, + request: SynapseRequest, + room_id: str, + auth: "Auth", + room_shutdown_handler: "RoomShutdownHandler", + pagination_handler: "PaginationHandler", + ) -> Tuple[int, JsonDict]: + requester = await auth.get_user_by_req(request) + await assert_user_is_admin(auth, requester.user) + + content = parse_json_object_from_request(request) + + block = content.get("block", False) + if not isinstance(block, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Param 'block' must be a boolean, if given", + Codes.BAD_JSON, + ) + + purge = content.get("purge", True) + if not isinstance(purge, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Param 'purge' must be a boolean, if given", + Codes.BAD_JSON, + ) + + force_purge = content.get("force_purge", False) + if not isinstance(force_purge, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Param 'force_purge' must be a boolean, if given", + Codes.BAD_JSON, + ) + + ret = await room_shutdown_handler.shutdown_room( + room_id=room_id, + new_room_user_id=content.get("new_room_user_id"), + new_room_name=content.get("room_name"), + message=content.get("message"), + requester_user_id=requester.user.to_string(), + block=block, + ) + + # Purge room + if purge: + try: + await pagination_handler.purge_room(room_id, force=force_purge) + except NotFoundError: + if block: + # We can block unknown rooms with this endpoint, in which case + # a failed purge is expected. + pass + else: + # But otherwise, we expect this purge to have succeeded. + raise + + # Cast safety: cast away the knowledge that this is a TypedDict. + # See https://github.com/python/mypy/issues/4976#issuecomment-579883622 + # for some discussion on why this is necessary. Either way, + # `ret` is an opaque dictionary blob as far as the rest of the app cares. + return HTTPStatus.OK, cast(JsonDict, ret) + class RoomMembersRestServlet(RestServlet): """ @@ -396,7 +485,7 @@ async def on_GET( members = await self.store.get_users_in_room(room_id) ret = {"members": members, "total": len(members)} - return 200, ret + return HTTPStatus.OK, ret class RoomStateRestServlet(RestServlet): @@ -426,16 +515,10 @@ async def on_GET( event_ids = await self.store.get_current_state_ids(room_id) events = await self.store.get_events(event_ids.values()) now = self.clock.time_msec() - room_state = await self._event_serializer.serialize_events( - events.values(), - now, - # We don't bother bundling aggregations in when asked for state - # events, as clients won't use them. - bundle_aggregations=False, - ) + room_state = await self._event_serializer.serialize_events(events.values(), now) ret = {"state": room_state} - return 200, ret + return HTTPStatus.OK, ret class JoinRoomAliasServlet(ResolveRoomIdMixin, RestServlet): @@ -464,7 +547,10 @@ async def on_POST( target_user = UserID.from_string(content["user_id"]) if not self.hs.is_mine(target_user): - raise SynapseError(400, "This endpoint can only be used with local users") + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "This endpoint can only be used with local users", + ) if not await self.admin_handler.get_user(target_user): raise NotFoundError("User not found") @@ -510,7 +596,7 @@ async def on_POST( ratelimit=False, ) - return 200, {"room_id": room_id} + return HTTPStatus.OK, {"room_id": room_id} class MakeRoomAdminRestServlet(ResolveRoomIdMixin, RestServlet): @@ -551,7 +637,7 @@ async def on_POST( # Figure out which local users currently have power in the room, if any. room_state = await self.state_handler.get_current_state(room_id) if not room_state: - raise SynapseError(400, "Server not in room") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Server not in room") create_event = room_state[(EventTypes.Create, "")] power_levels = room_state.get((EventTypes.PowerLevels, "")) @@ -565,7 +651,9 @@ async def on_POST( admin_users.sort(key=lambda user: user_power[user]) if not admin_users: - raise SynapseError(400, "No local admin user in room") + raise SynapseError( + HTTPStatus.BAD_REQUEST, "No local admin user in room" + ) admin_user_id = None @@ -582,7 +670,7 @@ async def on_POST( if not admin_user_id: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "No local admin user in room", ) @@ -593,7 +681,7 @@ async def on_POST( admin_user_id = create_event.sender if not self.is_mine_id(admin_user_id): raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "No local admin user in room", ) @@ -622,7 +710,8 @@ async def on_POST( except AuthError: # The admin user we found turned out not to have enough power. raise SynapseError( - 400, "No local admin user in room with power to update power levels." + HTTPStatus.BAD_REQUEST, + "No local admin user in room with power to update power levels.", ) # Now we check if the user we're granting admin rights to is already in @@ -636,7 +725,7 @@ async def on_POST( ) if is_joined: - return 200, {} + return HTTPStatus.OK, {} join_rules = room_state.get((EventTypes.JoinRules, "")) is_public = False @@ -644,7 +733,7 @@ async def on_POST( is_public = join_rules.content.get("join_rule") == JoinRules.PUBLIC if is_public: - return 200, {} + return HTTPStatus.OK, {} await self.room_member_handler.update_membership( fake_requester, @@ -653,7 +742,7 @@ async def on_POST( action=Membership.INVITE, ) - return 200, {} + return HTTPStatus.OK, {} class ForwardExtremitiesRestServlet(ResolveRoomIdMixin, RestServlet): @@ -685,7 +774,7 @@ async def on_DELETE( room_id, _ = await self.resolve_room_id(room_identifier) deleted_count = await self.store.delete_forward_extremities_for_room(room_id) - return 200, {"deleted": deleted_count} + return HTTPStatus.OK, {"deleted": deleted_count} async def on_GET( self, request: SynapseRequest, room_identifier: str @@ -696,7 +785,7 @@ async def on_GET( room_id, _ = await self.resolve_room_id(room_identifier) extremities = await self.store.get_forward_extremities_for_room(room_id) - return 200, {"count": len(extremities), "results": extremities} + return HTTPStatus.OK, {"count": len(extremities), "results": extremities} class RoomEventContextServlet(RestServlet): @@ -711,6 +800,7 @@ class RoomEventContextServlet(RestServlet): def __init__(self, hs: "HomeServer"): super().__init__() + self._hs = hs self.clock = hs.get_clock() self.room_context_handler = hs.get_room_context_handler() self._event_serializer = hs.get_event_client_serializer() @@ -728,7 +818,9 @@ async def on_GET( filter_str = parse_string(request, "filter", encoding="utf-8") if filter_str: filter_json = urlparse.unquote(filter_str) - event_filter: Optional[Filter] = Filter(json_decoder.decode(filter_json)) + event_filter: Optional[Filter] = Filter( + self._hs, json_decoder.decode(filter_json) + ) else: event_filter = None @@ -742,7 +834,9 @@ async def on_GET( ) if not results: - raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND) + raise SynapseError( + HTTPStatus.NOT_FOUND, "Event not found.", errcode=Codes.NOT_FOUND + ) time_now = self.clock.time_msec() results["events_before"] = await self._event_serializer.serialize_events( @@ -755,13 +849,10 @@ async def on_GET( results["events_after"], time_now ) results["state"] = await self._event_serializer.serialize_events( - results["state"], - time_now, - # No need to bundle aggregations for state events - bundle_aggregations=False, + results["state"], time_now ) - return 200, results + return HTTPStatus.OK, results class PowerLevelsRestServlet(RestServlet): @@ -813,53 +904,64 @@ async def on_GET(self, request, room_id): return 200, ret -async def _delete_room( - request: SynapseRequest, - room_id: str, - auth: "Auth", - room_shutdown_handler: "RoomShutdownHandler", - pagination_handler: "PaginationHandler", -) -> Tuple[int, JsonDict]: - requester = await auth.get_user_by_req(request) - await assert_user_is_admin(auth, requester.user) - - content = parse_json_object_from_request(request) - - block = content.get("block", False) - if not isinstance(block, bool): - raise SynapseError( - HTTPStatus.BAD_REQUEST, - "Param 'block' must be a boolean, if given", - Codes.BAD_JSON, - ) +class BlockRoomRestServlet(RestServlet): + """ + Manage blocking of rooms. + On PUT: Add or remove a room from blocking list. + On GET: Get blocking status of room and user who has blocked this room. + """ - purge = content.get("purge", True) - if not isinstance(purge, bool): - raise SynapseError( - HTTPStatus.BAD_REQUEST, - "Param 'purge' must be a boolean, if given", - Codes.BAD_JSON, - ) + PATTERNS = admin_patterns("/rooms/(?P[^/]+)/block$") - force_purge = content.get("force_purge", False) - if not isinstance(force_purge, bool): - raise SynapseError( - HTTPStatus.BAD_REQUEST, - "Param 'force_purge' must be a boolean, if given", - Codes.BAD_JSON, - ) + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() - ret = await room_shutdown_handler.shutdown_room( - room_id=room_id, - new_room_user_id=content.get("new_room_user_id"), - new_room_name=content.get("room_name"), - message=content.get("message"), - requester_user_id=requester.user.to_string(), - block=block, - ) + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) - # Purge room - if purge: - await pagination_handler.purge_room(room_id, force=force_purge) + if not RoomID.is_valid(room_id): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "%s is not a legal room ID" % (room_id,) + ) + + blocked_by = await self._store.room_is_blocked_by(room_id) + # Test `not None` if `user_id` is an empty string + # if someone add manually an entry in database + if blocked_by is not None: + response = {"block": True, "user_id": blocked_by} + else: + response = {"block": False} + + return HTTPStatus.OK, response + + async def on_PUT( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: + requester = await self._auth.get_user_by_req(request) + await assert_user_is_admin(self._auth, requester.user) + + content = parse_json_object_from_request(request) + + if not RoomID.is_valid(room_id): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "%s is not a legal room ID" % (room_id,) + ) + + assert_params_in_dict(content, ["block"]) + block = content.get("block") + if not isinstance(block, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Param 'block' must be a boolean.", + Codes.BAD_JSON, + ) + + if block: + await self._store.block_room(room_id, requester.user.to_string()) + else: + await self._store.unblock_room(room_id) - return (200, ret) + return HTTPStatus.OK, {"block": block} diff --git a/synapse/rest/admin/server_notice_servlet.py b/synapse/rest/admin/server_notice_servlet.py index b5e4c474efc8..b295fb078bc7 100644 --- a/synapse/rest/admin/server_notice_servlet.py +++ b/synapse/rest/admin/server_notice_servlet.py @@ -11,10 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Optional, Tuple +from http import HTTPStatus +from typing import TYPE_CHECKING, Awaitable, Optional, Tuple from synapse.api.constants import EventTypes -from synapse.api.errors import SynapseError +from synapse.api.errors import NotFoundError, SynapseError from synapse.http.server import HttpServer from synapse.http.servlet import ( RestServlet, @@ -53,9 +54,11 @@ class SendServerNoticeServlet(RestServlet): def __init__(self, hs: "HomeServer"): self.hs = hs self.auth = hs.get_auth() + self.server_notices_manager = hs.get_server_notices_manager() + self.admin_handler = hs.get_admin_handler() self.txns = HttpTransactionCache(hs) - def register(self, json_resource: HttpServer): + def register(self, json_resource: HttpServer) -> None: PATTERN = "/send_server_notice" json_resource.register_paths( "POST", admin_patterns(PATTERN + "$"), self.on_POST, self.__class__.__name__ @@ -79,24 +82,33 @@ async def on_POST( # We grab the server notices manager here as its initialisation has a check for worker processes, # but worker processes still need to initialise SendServerNoticeServlet (as it is part of the # admin api). - if not self.hs.get_server_notices_manager().is_enabled(): - raise SynapseError(400, "Server notices are not enabled on this server") + if not self.server_notices_manager.is_enabled(): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "Server notices are not enabled on this server" + ) - user_id = body["user_id"] - UserID.from_string(user_id) - if not self.hs.is_mine_id(user_id): - raise SynapseError(400, "Server notices can only be sent to local users") + target_user = UserID.from_string(body["user_id"]) + if not self.hs.is_mine(target_user): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "Server notices can only be sent to local users" + ) - event = await self.hs.get_server_notices_manager().send_notice( - user_id=body["user_id"], + if not await self.admin_handler.get_user(target_user): + raise NotFoundError("User not found") + + event = await self.server_notices_manager.send_notice( + user_id=target_user.to_string(), type=event_type, state_key=state_key, event_content=body["content"], + txn_id=txn_id, ) - return 200, {"event_id": event.event_id} + return HTTPStatus.OK, {"event_id": event.event_id} - def on_PUT(self, request: SynapseRequest, txn_id: str) -> Tuple[int, JsonDict]: + def on_PUT( + self, request: SynapseRequest, txn_id: str + ) -> Awaitable[Tuple[int, JsonDict]]: return self.txns.fetch_or_execute_request( request, self.on_POST, request, txn_id ) diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py index 948de94ccd6a..ca41fd45f2bd 100644 --- a/synapse/rest/admin/statistics.py +++ b/synapse/rest/admin/statistics.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +from http import HTTPStatus from typing import TYPE_CHECKING, Tuple from synapse.api.errors import Codes, SynapseError @@ -53,7 +54,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: UserSortOrder.DISPLAYNAME.value, ): raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "Unknown value for order_by: %s" % (order_by,), errcode=Codes.INVALID_PARAM, ) @@ -61,7 +62,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: start = parse_integer(request, "from", default=0) if start < 0: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "Query parameter from must be a string representing a positive integer.", errcode=Codes.INVALID_PARAM, ) @@ -69,7 +70,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: limit = parse_integer(request, "limit", default=100) if limit < 0: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "Query parameter limit must be a string representing a positive integer.", errcode=Codes.INVALID_PARAM, ) @@ -77,7 +78,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: from_ts = parse_integer(request, "from_ts", default=0) if from_ts < 0: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "Query parameter from_ts must be a string representing a positive integer.", errcode=Codes.INVALID_PARAM, ) @@ -86,13 +87,13 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: if until_ts is not None: if until_ts < 0: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "Query parameter until_ts must be a string representing a positive integer.", errcode=Codes.INVALID_PARAM, ) if until_ts <= from_ts: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "Query parameter until_ts must be greater than from_ts.", errcode=Codes.INVALID_PARAM, ) @@ -100,7 +101,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: search_term = parse_string(request, "search_term") if search_term == "": raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "Query parameter search_term cannot be an empty string.", errcode=Codes.INVALID_PARAM, ) @@ -108,7 +109,9 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: direction = parse_string(request, "dir", default="f") if direction not in ("f", "b"): raise SynapseError( - 400, "Unknown direction: %s" % (direction,), errcode=Codes.INVALID_PARAM + HTTPStatus.BAD_REQUEST, + "Unknown direction: %s" % (direction,), + errcode=Codes.INVALID_PARAM, ) users_media, total = await self.store.get_users_media_usage_paginate( @@ -118,4 +121,4 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: if (start + limit) < total: ret["next_token"] = start + len(users_media) - return 200, ret + return HTTPStatus.OK, ret diff --git a/synapse/rest/admin/purge_room_servlet.py b/synapse/rest/admin/username_available.py similarity index 51% rename from synapse/rest/admin/purge_room_servlet.py rename to synapse/rest/admin/username_available.py index 2365ff7a0f26..2bf1472967dd 100644 --- a/synapse/rest/admin/purge_room_servlet.py +++ b/synapse/rest/admin/username_available.py @@ -11,48 +11,41 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging +from http import HTTPStatus from typing import TYPE_CHECKING, Tuple -from synapse.http.servlet import ( - RestServlet, - assert_params_in_dict, - parse_json_object_from_request, -) +from synapse.http.servlet import RestServlet, parse_string from synapse.http.site import SynapseRequest -from synapse.rest.admin import assert_requester_is_admin -from synapse.rest.admin._base import admin_patterns +from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin from synapse.types import JsonDict if TYPE_CHECKING: from synapse.server import HomeServer +logger = logging.getLogger(__name__) -class PurgeRoomServlet(RestServlet): - """Servlet which will remove all trace of a room from the database - POST /_synapse/admin/v1/purge_room - { - "room_id": "!room:id" - } +class UsernameAvailableRestServlet(RestServlet): + """An admin API to check if a given username is available, regardless of whether registration is enabled. - returns: - - {} + Example: + GET /_synapse/admin/v1/username_available?username=foo + 200 OK + { + "available": true + } """ - PATTERNS = admin_patterns("/purge_room$") + PATTERNS = admin_patterns("/username_available") def __init__(self, hs: "HomeServer"): - self.hs = hs self.auth = hs.get_auth() - self.pagination_handler = hs.get_pagination_handler() + self.registration_handler = hs.get_registration_handler() - async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self.auth, request) - body = parse_json_object_from_request(request) - assert_params_in_dict(body, ("room_id",)) - - await self.pagination_handler.purge_room(body["room_id"]) - - return 200, {} + username = parse_string(request, "username", required=True) + await self.registration_handler.check_username(username) + return HTTPStatus.OK, {"available": True} diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index eef76ab18a94..2a60b602b1f8 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -34,8 +34,8 @@ assert_requester_is_admin, assert_user_is_admin, ) -from synapse.rest.client.v2_alpha._base import client_patterns -from synapse.storage.databases.main.media_repository import MediaSortOrder +from synapse.rest.client._base import client_patterns +from synapse.storage.databases.main.registration import ExternalIDReuseException from synapse.storage.databases.main.stats import UserSortOrder from synapse.types import JsonDict, UserID @@ -79,14 +79,14 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: if start < 0: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "Query parameter from must be a string representing a positive integer.", errcode=Codes.INVALID_PARAM, ) if limit < 0: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "Query parameter limit must be a string representing a positive integer.", errcode=Codes.INVALID_PARAM, ) @@ -122,7 +122,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: if (start + limit) < total: ret["next_token"] = str(start + len(users)) - return 200, ret + return HTTPStatus.OK, ret class UserRestServletV2(RestServlet): @@ -172,14 +172,14 @@ async def on_GET( target_user = UserID.from_string(user_id) if not self.hs.is_mine(target_user): - raise SynapseError(400, "Can only lookup local users") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only look up local users") ret = await self.admin_handler.get_user(target_user) if not ret: raise NotFoundError("User not found") - return 200, ret + return HTTPStatus.OK, ret async def on_PUT( self, request: SynapseRequest, user_id: str @@ -191,77 +191,133 @@ async def on_PUT( body = parse_json_object_from_request(request) if not self.hs.is_mine(target_user): - raise SynapseError(400, "This endpoint can only be used with local users") + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "This endpoint can only be used with local users", + ) user = await self.admin_handler.get_user(target_user) user_id = target_user.to_string() + # check for required parameters for each threepid + threepids = body.get("threepids") + if threepids is not None: + for threepid in threepids: + assert_params_in_dict(threepid, ["medium", "address"]) + + # check for required parameters for each external_id + external_ids = body.get("external_ids") + if external_ids is not None: + for external_id in external_ids: + assert_params_in_dict(external_id, ["auth_provider", "external_id"]) + + user_type = body.get("user_type", None) + if user_type is not None and user_type not in UserTypes.ALL_USER_TYPES: + raise SynapseError(HTTPStatus.BAD_REQUEST, "Invalid user type") + + set_admin_to = body.get("admin", False) + if not isinstance(set_admin_to, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Param 'admin' must be a boolean, if given", + Codes.BAD_JSON, + ) + + password = body.get("password", None) + if password is not None: + if not isinstance(password, str) or len(password) > 512: + raise SynapseError(HTTPStatus.BAD_REQUEST, "Invalid password") + + deactivate = body.get("deactivated", False) + if not isinstance(deactivate, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "'deactivated' parameter is not of type boolean" + ) + + # convert List[Dict[str, str]] into List[Tuple[str, str]] + if external_ids is not None: + new_external_ids = [ + (external_id["auth_provider"], external_id["external_id"]) + for external_id in external_ids + ] + + # convert List[Dict[str, str]] into Set[Tuple[str, str]] + if threepids is not None: + new_threepids = { + (threepid["medium"], threepid["address"]) for threepid in threepids + } + if user: # modify user if "displayname" in body: await self.profile_handler.set_displayname( target_user, requester, body["displayname"], True ) - if "threepids" in body: - # check for required parameters for each threepid - for threepid in body["threepids"]: - assert_params_in_dict(threepid, ["medium", "address"]) + if threepids is not None: + # get changed threepids (added and removed) + # convert List[Dict[str, Any]] into Set[Tuple[str, str]] + cur_threepids = { + (threepid["medium"], threepid["address"]) + for threepid in await self.store.user_get_threepids(user_id) + } + add_threepids = new_threepids - cur_threepids + del_threepids = cur_threepids - new_threepids - # remove old threepids from user - threepids = await self.store.user_get_threepids(user_id) - for threepid in threepids: + # remove old threepids + for medium, address in del_threepids: try: await self.auth_handler.delete_threepid( - user_id, threepid["medium"], threepid["address"], None + user_id, medium, address, None ) except Exception: logger.exception("Failed to remove threepids") raise SynapseError(500, "Failed to remove threepids") - # add new threepids to user + # add new threepids current_time = self.hs.get_clock().time_msec() - for threepid in body["threepids"]: + for medium, address in add_threepids: await self.auth_handler.add_threepid( - user_id, threepid["medium"], threepid["address"], current_time + user_id, medium, address, current_time ) - if "avatar_url" in body and type(body["avatar_url"]) == str: + if external_ids is not None: + try: + await self.store.replace_user_external_id( + new_external_ids, + user_id, + ) + except ExternalIDReuseException: + raise SynapseError( + HTTPStatus.CONFLICT, "External id is already in use." + ) + + if "avatar_url" in body and isinstance(body["avatar_url"], str): await self.profile_handler.set_avatar_url( target_user, requester, body["avatar_url"], True ) if "admin" in body: - set_admin_to = bool(body["admin"]) if set_admin_to != user["admin"]: auth_user = requester.user if target_user == auth_user and not set_admin_to: - raise SynapseError(400, "You may not demote yourself.") + raise SynapseError( + HTTPStatus.BAD_REQUEST, "You may not demote yourself." + ) await self.store.set_server_admin(target_user, set_admin_to) - if "password" in body: - if not isinstance(body["password"], str) or len(body["password"]) > 512: - raise SynapseError(400, "Invalid password") - else: - new_password = body["password"] - logout_devices = True - - new_password_hash = await self.auth_handler.hash(new_password) - - await self.set_password_handler.set_password( - target_user.to_string(), - new_password_hash, - logout_devices, - requester, - ) + if password is not None: + logout_devices = True + new_password_hash = await self.auth_handler.hash(password) + + await self.set_password_handler.set_password( + target_user.to_string(), + new_password_hash, + logout_devices, + requester, + ) if "deactivated" in body: - deactivate = body["deactivated"] - if not isinstance(deactivate, bool): - raise SynapseError( - 400, "'deactivated' parameter is not of type boolean" - ) - if deactivate and not user["deactivated"]: await self.deactivate_account_handler.deactivate_account( target_user.to_string(), False, requester, by_admin=True @@ -272,55 +328,47 @@ async def on_PUT( and self.auth_handler.can_change_password() ): raise SynapseError( - 400, "Must provide a password to re-activate an account." + HTTPStatus.BAD_REQUEST, + "Must provide a password to re-activate an account.", ) await self.deactivate_account_handler.activate_account( target_user.to_string() ) + if "user_type" in body: + await self.store.set_user_type(target_user, user_type) + user = await self.admin_handler.get_user(target_user) assert user is not None - return 200, user + return HTTPStatus.OK, user else: # create user - password = body.get("password") + displayname = body.get("displayname", None) + password_hash = None if password is not None: - if not isinstance(password, str) or len(password) > 512: - raise SynapseError(400, "Invalid password") password_hash = await self.auth_handler.hash(password) - admin = body.get("admin", None) - user_type = body.get("user_type", None) - displayname = body.get("displayname", None) - - if user_type is not None and user_type not in UserTypes.ALL_USER_TYPES: - raise SynapseError(400, "Invalid user type") - user_id = await self.registration_handler.register_user( localpart=target_user.localpart, password_hash=password_hash, - admin=bool(admin), + admin=set_admin_to, default_display_name=displayname, user_type=user_type, by_admin=True, ) - if "threepids" in body: - # check for required parameters for each threepid - for threepid in body["threepids"]: - assert_params_in_dict(threepid, ["medium", "address"]) - + if threepids is not None: current_time = self.hs.get_clock().time_msec() - for threepid in body["threepids"]: + for medium, address in new_threepids: await self.auth_handler.add_threepid( - user_id, threepid["medium"], threepid["address"], current_time + user_id, medium, address, current_time ) if ( - self.hs.config.email_enable_notifs - and self.hs.config.email_notif_for_new_users + self.hs.config.email.email_enable_notifs + and self.hs.config.email.email_notif_for_new_users ): await self.pusher_pool.add_pusher( user_id=user_id, @@ -328,12 +376,25 @@ async def on_PUT( kind="email", app_id="m.email", app_display_name="Email Notifications", - device_display_name=threepid["address"], - pushkey=threepid["address"], + device_display_name=address, + pushkey=address, lang=None, # We don't know a user's language here data={}, ) + if external_ids is not None: + try: + for auth_provider, external_id in new_external_ids: + await self.store.record_user_external_id( + auth_provider, + external_id, + user_id, + ) + except ExternalIDReuseException: + raise SynapseError( + HTTPStatus.CONFLICT, "External id is already in use." + ) + if "avatar_url" in body and isinstance(body["avatar_url"], str): await self.profile_handler.set_avatar_url( target_user, requester, body["avatar_url"], True @@ -362,7 +423,7 @@ def __init__(self, hs: "HomeServer"): self.nonces: Dict[str, int] = {} self.hs = hs - def _clear_old_nonces(self): + def _clear_old_nonces(self) -> None: """ Clear out old nonces that are older than NONCE_TIMEOUT. """ @@ -380,51 +441,61 @@ def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: nonce = secrets.token_hex(64) self.nonces[nonce] = int(self.reactor.seconds()) - return 200, {"nonce": nonce} + return HTTPStatus.OK, {"nonce": nonce} async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: self._clear_old_nonces() - if not self.hs.config.registration_shared_secret: - raise SynapseError(400, "Shared secret registration is not enabled") + if not self.hs.config.registration.registration_shared_secret: + raise SynapseError( + HTTPStatus.BAD_REQUEST, "Shared secret registration is not enabled" + ) body = parse_json_object_from_request(request) if "nonce" not in body: - raise SynapseError(400, "nonce must be specified", errcode=Codes.BAD_JSON) + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "nonce must be specified", + errcode=Codes.BAD_JSON, + ) nonce = body["nonce"] if nonce not in self.nonces: - raise SynapseError(400, "unrecognised nonce") + raise SynapseError(HTTPStatus.BAD_REQUEST, "unrecognised nonce") # Delete the nonce, so it can't be reused, even if it's invalid del self.nonces[nonce] if "username" not in body: raise SynapseError( - 400, "username must be specified", errcode=Codes.BAD_JSON + HTTPStatus.BAD_REQUEST, + "username must be specified", + errcode=Codes.BAD_JSON, ) else: if not isinstance(body["username"], str) or len(body["username"]) > 512: - raise SynapseError(400, "Invalid username") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Invalid username") username = body["username"].encode("utf-8") if b"\x00" in username: - raise SynapseError(400, "Invalid username") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Invalid username") if "password" not in body: raise SynapseError( - 400, "password must be specified", errcode=Codes.BAD_JSON + HTTPStatus.BAD_REQUEST, + "password must be specified", + errcode=Codes.BAD_JSON, ) else: password = body["password"] if not isinstance(password, str) or len(password) > 512: - raise SynapseError(400, "Invalid password") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Invalid password") password_bytes = password.encode("utf-8") if b"\x00" in password_bytes: - raise SynapseError(400, "Invalid password") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Invalid password") password_hash = await self.auth_handler.hash(password) @@ -433,15 +504,17 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: displayname = body.get("displayname", None) if user_type is not None and user_type not in UserTypes.ALL_USER_TYPES: - raise SynapseError(400, "Invalid user type") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Invalid user type") if "mac" not in body: - raise SynapseError(400, "mac must be specified", errcode=Codes.BAD_JSON) + raise SynapseError( + HTTPStatus.BAD_REQUEST, "mac must be specified", errcode=Codes.BAD_JSON + ) got_mac = body["mac"] want_mac_builder = hmac.new( - key=self.hs.config.registration_shared_secret.encode(), + key=self.hs.config.registration.registration_shared_secret.encode(), digestmod=hashlib.sha1, ) want_mac_builder.update(nonce.encode("utf8")) @@ -458,10 +531,10 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: want_mac = want_mac_builder.hexdigest() if not hmac.compare_digest(want_mac.encode("ascii"), got_mac.encode("ascii")): - raise SynapseError(403, "HMAC incorrect") + raise SynapseError(HTTPStatus.FORBIDDEN, "HMAC incorrect") # Reuse the parts of RegisterRestServlet to reduce code duplication - from synapse.rest.client.v2_alpha.register import RegisterRestServlet + from synapse.rest.client.register import RegisterRestServlet register = RegisterRestServlet(self.hs) @@ -475,7 +548,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: ) result = await register._create_registration_details(user_id, body) - return 200, result + return HTTPStatus.OK, result class WhoisRestServlet(RestServlet): @@ -503,11 +576,11 @@ async def on_GET( await assert_user_is_admin(self.auth, auth_user) if not self.hs.is_mine(target_user): - raise SynapseError(400, "Can only whois a local user") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only whois a local user") ret = await self.admin_handler.get_whois(target_user) - return 200, ret + return HTTPStatus.OK, ret class DeactivateAccountRestServlet(RestServlet): @@ -526,7 +599,9 @@ async def on_POST( await assert_user_is_admin(self.auth, requester.user) if not self.is_mine(UserID.from_string(target_user_id)): - raise SynapseError(400, "Can only deactivate local users") + raise SynapseError( + HTTPStatus.BAD_REQUEST, "Can only deactivate local users" + ) if not await self.store.get_user_by_id(target_user_id): raise NotFoundError("User not found") @@ -548,7 +623,7 @@ async def on_POST( else: id_server_unbind_result = "no-support" - return 200, {"id_server_unbind_result": id_server_unbind_result} + return HTTPStatus.OK, {"id_server_unbind_result": id_server_unbind_result} class AccountValidityRenewServlet(RestServlet): @@ -571,7 +646,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: if "user_id" not in body: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "Missing property 'user_id' in the request body", ) @@ -582,7 +657,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: ) res = {"expiration_ts": expiration_ts} - return 200, res + return HTTPStatus.OK, res class ResetPasswordRestServlet(RestServlet): @@ -629,7 +704,7 @@ async def on_POST( await self._set_password_handler.set_password( target_user_id, new_password_hash, logout_devices, requester ) - return 200, {} + return HTTPStatus.OK, {} class SearchUsersRestServlet(RestServlet): @@ -663,16 +738,16 @@ async def on_GET( # To allow all users to get the users list # if not is_admin and target_user != auth_user: - # raise AuthError(403, "You are not a server admin") + # raise AuthError(HTTPStatus.FORBIDDEN, "You are not a server admin") if not self.hs.is_mine(target_user): - raise SynapseError(400, "Can only users a local user") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only users a local user") term = parse_string(request, "term", required=True) logger.info("term: %s ", term) ret = await self.store.search_users(term) - return 200, ret + return HTTPStatus.OK, ret class UserAdminServlet(RestServlet): @@ -716,11 +791,14 @@ async def on_GET( target_user = UserID.from_string(user_id) if not self.hs.is_mine(target_user): - raise SynapseError(400, "Only local users can be admins of this homeserver") + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Only local users can be admins of this homeserver", + ) is_admin = await self.store.is_server_admin(target_user) - return 200, {"admin": is_admin} + return HTTPStatus.OK, {"admin": is_admin} async def on_PUT( self, request: SynapseRequest, user_id: str @@ -736,16 +814,19 @@ async def on_PUT( assert_params_in_dict(body, ["admin"]) if not self.hs.is_mine(target_user): - raise SynapseError(400, "Only local users can be admins of this homeserver") + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Only local users can be admins of this homeserver", + ) set_admin_to = bool(body["admin"]) if target_user == auth_user and not set_admin_to: - raise SynapseError(400, "You may not demote yourself.") + raise SynapseError(HTTPStatus.BAD_REQUEST, "You may not demote yourself.") await self.store.set_server_admin(target_user, set_admin_to) - return 200, {} + return HTTPStatus.OK, {} class UserMembershipRestServlet(RestServlet): @@ -767,7 +848,7 @@ async def on_GET( room_ids = await self.store.get_rooms_for_user(user_id) ret = {"joined_rooms": list(room_ids), "total": len(room_ids)} - return 200, ret + return HTTPStatus.OK, ret class PushersRestServlet(RestServlet): @@ -796,7 +877,7 @@ async def on_GET( await assert_requester_is_admin(self.auth, request) if not self.is_mine(UserID.from_string(user_id)): - raise SynapseError(400, "Can only lookup local users") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only look up local users") if not await self.store.get_user_by_id(user_id): raise NotFoundError("User not found") @@ -805,98 +886,10 @@ async def on_GET( filtered_pushers = [p.as_dict() for p in pushers] - return 200, {"pushers": filtered_pushers, "total": len(filtered_pushers)} - - -class UserMediaRestServlet(RestServlet): - """ - Gets information about all uploaded local media for a specific `user_id`. - - Example: - http://localhost:8008/_synapse/admin/v1/users/ - @user:server/media - - Args: - The parameters `from` and `limit` are required for pagination. - By default, a `limit` of 100 is used. - Returns: - A list of media and an integer representing the total number of - media that exist given for this user - """ - - PATTERNS = admin_patterns("/users/(?P[^/]+)/media$") - - def __init__(self, hs: "HomeServer"): - self.is_mine = hs.is_mine - self.auth = hs.get_auth() - self.store = hs.get_datastore() - - async def on_GET( - self, request: SynapseRequest, user_id: str - ) -> Tuple[int, JsonDict]: - # This will always be set by the time Twisted calls us. - assert request.args is not None - - await assert_requester_is_admin(self.auth, request) - - if not self.is_mine(UserID.from_string(user_id)): - raise SynapseError(400, "Can only lookup local users") - - user = await self.store.get_user_by_id(user_id) - if user is None: - raise NotFoundError("Unknown user") - - start = parse_integer(request, "from", default=0) - limit = parse_integer(request, "limit", default=100) - - if start < 0: - raise SynapseError( - 400, - "Query parameter from must be a string representing a positive integer.", - errcode=Codes.INVALID_PARAM, - ) - - if limit < 0: - raise SynapseError( - 400, - "Query parameter limit must be a string representing a positive integer.", - errcode=Codes.INVALID_PARAM, - ) - - # If neither `order_by` nor `dir` is set, set the default order - # to newest media is on top for backward compatibility. - if b"order_by" not in request.args and b"dir" not in request.args: - order_by = MediaSortOrder.CREATED_TS.value - direction = "b" - else: - order_by = parse_string( - request, - "order_by", - default=MediaSortOrder.CREATED_TS.value, - allowed_values=( - MediaSortOrder.MEDIA_ID.value, - MediaSortOrder.UPLOAD_NAME.value, - MediaSortOrder.CREATED_TS.value, - MediaSortOrder.LAST_ACCESS_TS.value, - MediaSortOrder.MEDIA_LENGTH.value, - MediaSortOrder.MEDIA_TYPE.value, - MediaSortOrder.QUARANTINED_BY.value, - MediaSortOrder.SAFE_FROM_QUARANTINE.value, - ), - ) - direction = parse_string( - request, "dir", default="f", allowed_values=("f", "b") - ) - - media, total = await self.store.get_local_media_by_user_paginate( - start, limit, user_id, order_by, direction - ) - - ret = {"media": media, "total": total} - if (start + limit) < total: - ret["next_token"] = start + len(media) - - return 200, ret + return HTTPStatus.OK, { + "pushers": filtered_pushers, + "total": len(filtered_pushers), + } class UserTokenRestServlet(RestServlet): @@ -929,29 +922,35 @@ async def on_POST( auth_user = requester.user if not self.hs.is_mine_id(user_id): - raise SynapseError(400, "Only local users can be logged in as") + raise SynapseError( + HTTPStatus.BAD_REQUEST, "Only local users can be logged in as" + ) body = parse_json_object_from_request(request, allow_empty_body=True) valid_until_ms = body.get("valid_until_ms") if valid_until_ms and not isinstance(valid_until_ms, int): - raise SynapseError(400, "'valid_until_ms' parameter must be an int") + raise SynapseError( + HTTPStatus.BAD_REQUEST, "'valid_until_ms' parameter must be an int" + ) if auth_user.to_string() == user_id: - raise SynapseError(400, "Cannot use admin API to login as self") + raise SynapseError( + HTTPStatus.BAD_REQUEST, "Cannot use admin API to login as self" + ) - token = await self.auth_handler.get_access_token_for_user_id( + token = await self.auth_handler.create_access_token_for_user_id( user_id=auth_user.to_string(), device_id=None, valid_until_ms=valid_until_ms, puppets_user_id=user_id, ) - return 200, {"access_token": token} + return HTTPStatus.OK, {"access_token": token} class ShadowBanRestServlet(RestServlet): - """An admin API for shadow-banning a user. + """An admin API for controlling whether a user is shadow-banned. A shadow-banned users receives successful responses to their client-server API requests, but the events are not propagated into rooms. @@ -959,11 +958,19 @@ class ShadowBanRestServlet(RestServlet): Shadow-banning a user should be used as a tool of last resort and may lead to confusing or broken behaviour for the client. - Example: + Example of shadow-banning a user: POST /_synapse/admin/v1/users/@test:example.com/shadow_ban {} + 200 OK + {} + + Example of removing a user from being shadow-banned: + + DELETE /_synapse/admin/v1/users/@test:example.com/shadow_ban + {} + 200 OK {} """ @@ -981,11 +988,27 @@ async def on_POST( await assert_requester_is_admin(self.auth, request) if not self.hs.is_mine_id(user_id): - raise SynapseError(400, "Only local users can be shadow-banned") + raise SynapseError( + HTTPStatus.BAD_REQUEST, "Only local users can be shadow-banned" + ) await self.store.set_shadow_banned(UserID.from_string(user_id), True) - return 200, {} + return HTTPStatus.OK, {} + + async def on_DELETE( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self.auth, request) + + if not self.hs.is_mine_id(user_id): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "Only local users can be shadow-banned" + ) + + await self.store.set_shadow_banned(UserID.from_string(user_id), False) + + return HTTPStatus.OK, {} class RateLimitRestServlet(RestServlet): @@ -1017,7 +1040,7 @@ async def on_GET( await assert_requester_is_admin(self.auth, request) if not self.hs.is_mine_id(user_id): - raise SynapseError(400, "Can only lookup local users") + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only look up local users") if not await self.store.get_user_by_id(user_id): raise NotFoundError("User not found") @@ -1038,7 +1061,7 @@ async def on_GET( else: ret = {} - return 200, ret + return HTTPStatus.OK, ret async def on_POST( self, request: SynapseRequest, user_id: str @@ -1046,7 +1069,9 @@ async def on_POST( await assert_requester_is_admin(self.auth, request) if not self.hs.is_mine_id(user_id): - raise SynapseError(400, "Only local users can be ratelimited") + raise SynapseError( + HTTPStatus.BAD_REQUEST, "Only local users can be ratelimited" + ) if not await self.store.get_user_by_id(user_id): raise NotFoundError("User not found") @@ -1058,14 +1083,14 @@ async def on_POST( if not isinstance(messages_per_second, int) or messages_per_second < 0: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "%r parameter must be a positive int" % (messages_per_second,), errcode=Codes.INVALID_PARAM, ) if not isinstance(burst_count, int) or burst_count < 0: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "%r parameter must be a positive int" % (burst_count,), errcode=Codes.INVALID_PARAM, ) @@ -1081,7 +1106,7 @@ async def on_POST( "burst_count": ratelimit.burst_count, } - return 200, ret + return HTTPStatus.OK, ret async def on_DELETE( self, request: SynapseRequest, user_id: str @@ -1089,11 +1114,13 @@ async def on_DELETE( await assert_requester_is_admin(self.auth, request) if not self.hs.is_mine_id(user_id): - raise SynapseError(400, "Only local users can be ratelimited") + raise SynapseError( + HTTPStatus.BAD_REQUEST, "Only local users can be ratelimited" + ) if not await self.store.get_user_by_id(user_id): raise NotFoundError("User not found") await self.store.delete_ratelimit_for_user(user_id) - return 200, {} + return HTTPStatus.OK, {} diff --git a/synapse/rest/client/__init__.py b/synapse/rest/client/__init__.py index 629e2df74a4f..f9830cc51f84 100644 --- a/synapse/rest/client/__init__.py +++ b/synapse/rest/client/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2014-2016 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/rest/client/v2_alpha/_base.py b/synapse/rest/client/_base.py similarity index 88% rename from synapse/rest/client/v2_alpha/_base.py rename to synapse/rest/client/_base.py index 0443f4571c1c..b4cb90cb7667 100644 --- a/synapse/rest/client/v2_alpha/_base.py +++ b/synapse/rest/client/_base.py @@ -16,7 +16,7 @@ """ import logging import re -from typing import Iterable, Pattern +from typing import Any, Awaitable, Callable, Iterable, Pattern, Tuple, TypeVar, cast from synapse.api.errors import InteractiveAuthIncompleteError from synapse.api.urls import CLIENT_API_PREFIX @@ -27,7 +27,7 @@ def client_patterns( path_regex: str, - releases: Iterable[int] = (0,), + releases: Iterable[str] = ("r0", "v3"), unstable: bool = True, v1: bool = False, ) -> Iterable[Pattern]: @@ -52,7 +52,7 @@ def client_patterns( v1_prefix = CLIENT_API_PREFIX + "/api/v1" patterns.append(re.compile("^" + v1_prefix + path_regex)) for release in releases: - new_prefix = CLIENT_API_PREFIX + "/r%d" % (release,) + new_prefix = CLIENT_API_PREFIX + f"/{release}" patterns.append(re.compile("^" + new_prefix + path_regex)) return patterns @@ -76,7 +76,10 @@ def set_timeline_upper_limit(filter_json: JsonDict, filter_timeline_limit: int) ) -def interactive_auth_handler(orig): +C = TypeVar("C", bound=Callable[..., Awaitable[Tuple[int, JsonDict]]]) + + +def interactive_auth_handler(orig: C) -> C: """Wraps an on_POST method to handle InteractiveAuthIncompleteErrors Takes a on_POST method which returns an Awaitable (errcode, body) response @@ -91,10 +94,10 @@ async def on_POST(self, request): await self.auth_handler.check_auth """ - async def wrapped(*args, **kwargs): + async def wrapped(*args: Any, **kwargs: Any) -> Tuple[int, JsonDict]: try: return await orig(*args, **kwargs) except InteractiveAuthIncompleteError as e: return 401, e.result - return wrapped + return cast(C, wrapped) diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/account.py similarity index 84% rename from synapse/rest/client/v2_alpha/account.py rename to synapse/rest/client/account.py index fb5ad2906eaa..6b272658fc3c 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/account.py @@ -16,9 +16,11 @@ import logging import random from http import HTTPStatus -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional, Tuple from urllib.parse import urlparse +from twisted.web.server import Request + from synapse.api.constants import LoginType from synapse.api.errors import ( Codes, @@ -28,15 +30,17 @@ ) from synapse.config.emailconfig import ThreepidBehaviour from synapse.handlers.ui_auth import UIAuthSessionDataConstants -from synapse.http.server import finish_request, respond_with_html +from synapse.http.server import HttpServer, finish_request, respond_with_html from synapse.http.servlet import ( RestServlet, assert_params_in_dict, parse_json_object_from_request, parse_string, ) +from synapse.http.site import SynapseRequest from synapse.metrics import threepid_send_requests from synapse.push.mailer import Mailer +from synapse.types import JsonDict from synapse.util.msisdn import phone_number_to_msisdn from synapse.util.stringutils import assert_valid_client_secret, random_string from synapse.util.threepids import check_3pid_allowed, validate_email @@ -60,17 +64,17 @@ def __init__(self, hs: "HomeServer"): self.config = hs.config self.identity_handler = hs.get_identity_handler() - if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: self.mailer = Mailer( hs=self.hs, - app_name=self.config.email_app_name, - template_html=self.config.email_password_reset_template_html, - template_text=self.config.email_password_reset_template_text, + app_name=self.config.email.email_app_name, + template_html=self.config.email.email_password_reset_template_html, + template_text=self.config.email.email_password_reset_template_text, ) - async def on_POST(self, request): - if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.config.local_threepid_handling_disabled_due_to_email_config: + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.email.local_threepid_handling_disabled_due_to_email_config: logger.warning( "User password resets have been disabled due to lack of email config" ) @@ -115,7 +119,7 @@ async def on_POST(self, request): ) if existing_user_id is None: - if self.config.request_token_inhibit_3pid_errors: + if self.config.server.request_token_inhibit_3pid_errors: # Make the client think the operation succeeded. See the rationale in the # comments for request_token_inhibit_3pid_errors. # Also wait for some random amount of time between 100ms and 1s to make it @@ -125,12 +129,12 @@ async def on_POST(self, request): raise SynapseError(400, "Email not found", Codes.THREEPID_NOT_FOUND) - if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - assert self.hs.config.account_threepid_delegate_email + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + assert self.hs.config.registration.account_threepid_delegate_email # Have the configured identity server handle the request ret = await self.identity_handler.requestEmailToken( - self.hs.config.account_threepid_delegate_email, + self.hs.config.registration.account_threepid_delegate_email, email, client_secret, send_attempt, @@ -159,7 +163,7 @@ async def on_POST(self, request): class PasswordRestServlet(RestServlet): PATTERNS = client_patterns("/account/password$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() @@ -169,7 +173,7 @@ def __init__(self, hs): self._set_password_handler = hs.get_set_password_handler() @interactive_auth_handler - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: body = parse_json_object_from_request(request) # we do basic sanity checks here because the auth layer will store these @@ -190,6 +194,7 @@ async def on_POST(self, request): # # In the second case, we require a password to confirm their identity. + requester = None if self.auth.has_access_token(request): requester = await self.auth.get_user_by_req(request) try: @@ -206,16 +211,15 @@ async def on_POST(self, request): # If a password is available now, hash the provided password and # store it for later. if new_password: - password_hash = await self.auth_handler.hash(new_password) + new_password_hash = await self.auth_handler.hash(new_password) await self.auth_handler.set_session_data( e.session_id, UIAuthSessionDataConstants.PASSWORD_HASH, - password_hash, + new_password_hash, ) raise user_id = requester.user.to_string() else: - requester = None try: result, params, session_id = await self.auth_handler.check_ui_auth( [[LoginType.EMAIL_IDENTITY]], @@ -230,11 +234,11 @@ async def on_POST(self, request): # If a password is available now, hash the provided password and # store it for later. if new_password: - password_hash = await self.auth_handler.hash(new_password) + new_password_hash = await self.auth_handler.hash(new_password) await self.auth_handler.set_session_data( e.session_id, UIAuthSessionDataConstants.PASSWORD_HASH, - password_hash, + new_password_hash, ) raise @@ -264,7 +268,7 @@ async def on_POST(self, request): # If we have a password in this request, prefer it. Otherwise, use the # password hash from an earlier request. if new_password: - password_hash = await self.auth_handler.hash(new_password) + password_hash: Optional[str] = await self.auth_handler.hash(new_password) elif session_id is not None: password_hash = await self.auth_handler.get_session_data( session_id, UIAuthSessionDataConstants.PASSWORD_HASH, None @@ -288,7 +292,7 @@ async def on_POST(self, request): class DeactivateAccountRestServlet(RestServlet): PATTERNS = client_patterns("/account/deactivate$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() @@ -296,7 +300,7 @@ def __init__(self, hs): self._deactivate_account_handler = hs.get_deactivate_account_handler() @interactive_auth_handler - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: body = parse_json_object_from_request(request) erase = body.get("erase", False) if not isinstance(erase, bool): @@ -338,24 +342,24 @@ async def on_POST(self, request): class EmailThreepidRequestTokenRestServlet(RestServlet): PATTERNS = client_patterns("/account/3pid/email/requestToken$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.config = hs.config self.identity_handler = hs.get_identity_handler() self.store = self.hs.get_datastore() - if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: self.mailer = Mailer( hs=self.hs, - app_name=self.config.email_app_name, - template_html=self.config.email_add_threepid_template_html, - template_text=self.config.email_add_threepid_template_text, + app_name=self.config.email.email_app_name, + template_html=self.config.email.email_add_threepid_template_html, + template_text=self.config.email.email_add_threepid_template_text, ) - async def on_POST(self, request): - if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.config.local_threepid_handling_disabled_due_to_email_config: + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.email.local_threepid_handling_disabled_due_to_email_config: logger.warning( "Adding emails have been disabled due to lack of an email config" ) @@ -399,7 +403,7 @@ async def on_POST(self, request): existing_user_id = await self.store.get_user_id_by_threepid("email", email) if existing_user_id is not None: - if self.config.request_token_inhibit_3pid_errors: + if self.config.server.request_token_inhibit_3pid_errors: # Make the client think the operation succeeded. See the rationale in the # comments for request_token_inhibit_3pid_errors. # Also wait for some random amount of time between 100ms and 1s to make it @@ -409,12 +413,12 @@ async def on_POST(self, request): raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) - if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - assert self.hs.config.account_threepid_delegate_email + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + assert self.hs.config.registration.account_threepid_delegate_email # Have the configured identity server handle the request ret = await self.identity_handler.requestEmailToken( - self.hs.config.account_threepid_delegate_email, + self.hs.config.registration.account_threepid_delegate_email, email, client_secret, send_attempt, @@ -449,7 +453,7 @@ def __init__(self, hs: "HomeServer"): self.store = self.hs.get_datastore() self.identity_handler = hs.get_identity_handler() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: body = parse_json_object_from_request(request) assert_params_in_dict( body, ["client_secret", "country", "phone_number", "send_attempt"] @@ -482,7 +486,7 @@ async def on_POST(self, request): existing_user_id = await self.store.get_user_id_by_threepid("msisdn", msisdn) if existing_user_id is not None: - if self.hs.config.request_token_inhibit_3pid_errors: + if self.hs.config.server.request_token_inhibit_3pid_errors: # Make the client think the operation succeeded. See the rationale in the # comments for request_token_inhibit_3pid_errors. # Also wait for some random amount of time between 100ms and 1s to make it @@ -492,7 +496,7 @@ async def on_POST(self, request): raise SynapseError(400, "MSISDN is already in use", Codes.THREEPID_IN_USE) - if not self.hs.config.account_threepid_delegate_msisdn: + if not self.hs.config.registration.account_threepid_delegate_msisdn: logger.warning( "No upstream msisdn account_threepid_delegate configured on the server to " "handle this request" @@ -503,7 +507,7 @@ async def on_POST(self, request): ) ret = await self.identity_handler.requestMsisdnToken( - self.hs.config.account_threepid_delegate_msisdn, + self.hs.config.registration.account_threepid_delegate_msisdn, country, phone_number, client_secret, @@ -525,30 +529,26 @@ class AddThreepidEmailSubmitTokenServlet(RestServlet): "/add_threepid/email/submit_token$", releases=(), unstable=True ) - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.config = hs.config self.clock = hs.get_clock() self.store = hs.get_datastore() - if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: self._failure_email_template = ( - self.config.email_add_threepid_template_failure_html + self.config.email.email_add_threepid_template_failure_html ) - async def on_GET(self, request): - if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.config.local_threepid_handling_disabled_due_to_email_config: + async def on_GET(self, request: Request) -> None: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.email.local_threepid_handling_disabled_due_to_email_config: logger.warning( "Adding emails have been disabled due to lack of an email config" ) raise SynapseError( 400, "Adding an email to your account is disabled on this server" ) - elif self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + elif self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: raise SynapseError( 400, "This homeserver is not validating threepids. Use an identity server " @@ -575,7 +575,7 @@ async def on_GET(self, request): return None # Otherwise show the success template - html = self.config.email_add_threepid_template_success_html_content + html = self.config.email.email_add_threepid_template_success_html_content status_code = 200 except ThreepidValidationError as e: status_code = e.code @@ -596,19 +596,15 @@ class AddThreepidMsisdnSubmitTokenServlet(RestServlet): "/add_threepid/msisdn/submit_token$", releases=(), unstable=True ) - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.config = hs.config self.clock = hs.get_clock() self.store = hs.get_datastore() self.identity_handler = hs.get_identity_handler() - async def on_POST(self, request): - if not self.config.account_threepid_delegate_msisdn: + async def on_POST(self, request: Request) -> Tuple[int, JsonDict]: + if not self.config.registration.account_threepid_delegate_msisdn: raise SynapseError( 400, "This homeserver is not validating phone numbers. Use an identity server " @@ -621,7 +617,7 @@ async def on_POST(self, request): # Proxy submit_token request to msisdn threepid delegate response = await self.identity_handler.proxy_msisdn_submit_token( - self.config.account_threepid_delegate_msisdn, + self.config.registration.account_threepid_delegate_msisdn, body["client_secret"], body["sid"], body["token"], @@ -632,7 +628,7 @@ async def on_POST(self, request): class ThreepidRestServlet(RestServlet): PATTERNS = client_patterns("/account/3pid$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.identity_handler = hs.get_identity_handler() @@ -640,15 +636,15 @@ def __init__(self, hs): self.auth_handler = hs.get_auth_handler() self.datastore = self.hs.get_datastore() - async def on_GET(self, request): + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) threepids = await self.datastore.user_get_threepids(requester.user.to_string()) return 200, {"threepids": threepids} - async def on_POST(self, request): - if not self.hs.config.enable_3pid_changes: + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + if not self.hs.config.registration.enable_3pid_changes: raise SynapseError( 400, "3PID changes are disabled on this server", Codes.FORBIDDEN ) @@ -688,7 +684,7 @@ async def on_POST(self, request): class ThreepidAddRestServlet(RestServlet): PATTERNS = client_patterns("/account/3pid/add$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.identity_handler = hs.get_identity_handler() @@ -696,8 +692,8 @@ def __init__(self, hs): self.auth_handler = hs.get_auth_handler() @interactive_auth_handler - async def on_POST(self, request): - if not self.hs.config.enable_3pid_changes: + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + if not self.hs.config.registration.enable_3pid_changes: raise SynapseError( 400, "3PID changes are disabled on this server", Codes.FORBIDDEN ) @@ -738,13 +734,13 @@ async def on_POST(self, request): class ThreepidBindRestServlet(RestServlet): PATTERNS = client_patterns("/account/3pid/bind$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.identity_handler = hs.get_identity_handler() self.auth = hs.get_auth() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: body = parse_json_object_from_request(request) assert_params_in_dict(body, ["id_server", "sid", "client_secret"]) @@ -767,14 +763,14 @@ async def on_POST(self, request): class ThreepidUnbindRestServlet(RestServlet): PATTERNS = client_patterns("/account/3pid/unbind$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.identity_handler = hs.get_identity_handler() self.auth = hs.get_auth() self.datastore = self.hs.get_datastore() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: """Unbind the given 3pid from a specific identity server, or identity servers that are known to have this 3pid bound """ @@ -798,14 +794,14 @@ async def on_POST(self, request): class ThreepidDeleteRestServlet(RestServlet): PATTERNS = client_patterns("/account/3pid/delete$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() self.auth_handler = hs.get_auth_handler() - async def on_POST(self, request): - if not self.hs.config.enable_3pid_changes: + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + if not self.hs.config.registration.enable_3pid_changes: raise SynapseError( 400, "3PID changes are disabled on this server", Codes.FORBIDDEN ) @@ -835,7 +831,7 @@ async def on_POST(self, request): return 200, {"id_server_unbind_result": id_server_unbind_result} -def assert_valid_next_link(hs: "HomeServer", next_link: str): +def assert_valid_next_link(hs: "HomeServer", next_link: str) -> None: """ Raises a SynapseError if a given next_link value is invalid @@ -861,8 +857,8 @@ def assert_valid_next_link(hs: "HomeServer", next_link: str): # If the domain whitelist is set, the domain must be in it if ( valid - and hs.config.next_link_domain_whitelist is not None - and next_link_parsed.hostname not in hs.config.next_link_domain_whitelist + and hs.config.server.next_link_domain_whitelist is not None + and next_link_parsed.hostname not in hs.config.server.next_link_domain_whitelist ): valid = False @@ -877,14 +873,18 @@ def assert_valid_next_link(hs: "HomeServer", next_link: str): class WhoamiRestServlet(RestServlet): PATTERNS = client_patterns("/account/whoami$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() - async def on_GET(self, request): - requester = await self.auth.get_user_by_req(request) + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + requester = await self.auth.get_user_by_req(request, allow_guest=True) - response = {"user_id": requester.user.to_string()} + response = { + "user_id": requester.user.to_string(), + # MSC: https://github.com/matrix-org/matrix-doc/pull/3069 + "org.matrix.msc3069.is_guest": bool(requester.is_guest), + } # Appservices and similar accounts do not have device IDs # that we can report on, so exclude them for compliance. @@ -894,7 +894,7 @@ async def on_GET(self, request): return 200, response -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: EmailPasswordRequestTokenRestServlet(hs).register(http_server) PasswordRestServlet(hs).register(http_server) DeactivateAccountRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/account_data.py b/synapse/rest/client/account_data.py similarity index 79% rename from synapse/rest/client/v2_alpha/account_data.py rename to synapse/rest/client/account_data.py index 7517e9304e8d..d1badbdf3bec 100644 --- a/synapse/rest/client/v2_alpha/account_data.py +++ b/synapse/rest/client/account_data.py @@ -13,12 +13,19 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple from synapse.api.errors import AuthError, NotFoundError, SynapseError +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -32,13 +39,15 @@ class AccountDataServlet(RestServlet): "/user/(?P[^/]*)/account_data/(?P[^/]*)" ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastore() self.handler = hs.get_account_data_handler() - async def on_PUT(self, request, user_id, account_data_type): + async def on_PUT( + self, request: SynapseRequest, user_id: str, account_data_type: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) if user_id != requester.user.to_string(): raise AuthError(403, "Cannot add account data for other users.") @@ -49,7 +58,9 @@ async def on_PUT(self, request, user_id, account_data_type): return 200, {} - async def on_GET(self, request, user_id, account_data_type): + async def on_GET( + self, request: SynapseRequest, user_id: str, account_data_type: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) if user_id != requester.user.to_string(): raise AuthError(403, "Cannot get account data for other users.") @@ -76,13 +87,19 @@ class RoomAccountDataServlet(RestServlet): "/account_data/(?P[^/]*)" ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastore() self.handler = hs.get_account_data_handler() - async def on_PUT(self, request, user_id, room_id, account_data_type): + async def on_PUT( + self, + request: SynapseRequest, + user_id: str, + room_id: str, + account_data_type: str, + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) if user_id != requester.user.to_string(): raise AuthError(403, "Cannot add account data for other users.") @@ -102,7 +119,13 @@ async def on_PUT(self, request, user_id, room_id, account_data_type): return 200, {} - async def on_GET(self, request, user_id, room_id, account_data_type): + async def on_GET( + self, + request: SynapseRequest, + user_id: str, + room_id: str, + account_data_type: str, + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) if user_id != requester.user.to_string(): raise AuthError(403, "Cannot get account data for other users.") @@ -117,6 +140,6 @@ async def on_GET(self, request, user_id, room_id, account_data_type): return 200, event -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: AccountDataServlet(hs).register(http_server) RoomAccountDataServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/account_validity.py b/synapse/rest/client/account_validity.py similarity index 77% rename from synapse/rest/client/v2_alpha/account_validity.py rename to synapse/rest/client/account_validity.py index 3ebe40186153..6c24b96c547d 100644 --- a/synapse/rest/client/v2_alpha/account_validity.py +++ b/synapse/rest/client/account_validity.py @@ -13,24 +13,27 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple -from synapse.api.errors import SynapseError -from synapse.http.server import respond_with_html -from synapse.http.servlet import RestServlet +from twisted.web.server import Request + +from synapse.http.server import HttpServer, respond_with_html +from synapse.http.servlet import RestServlet, parse_string +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class AccountValidityRenewServlet(RestServlet): PATTERNS = client_patterns("/account_validity/renew$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs @@ -46,18 +49,14 @@ def __init__(self, hs): hs.config.account_validity.account_validity_invalid_token_template ) - async def on_GET(self, request): - if b"token" not in request.args: - raise SynapseError(400, "Missing renewal token") - renewal_token = request.args[b"token"][0] + async def on_GET(self, request: Request) -> None: + renewal_token = parse_string(request, "token", required=True) ( token_valid, token_stale, expiration_ts, - ) = await self.account_activity_handler.renew_account( - renewal_token.decode("utf8") - ) + ) = await self.account_activity_handler.renew_account(renewal_token) if token_valid: status_code = 200 @@ -77,11 +76,7 @@ async def on_GET(self, request): class AccountValiditySendMailServlet(RestServlet): PATTERNS = client_patterns("/account_validity/send_mail$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs @@ -91,7 +86,7 @@ def __init__(self, hs): hs.config.account_validity.account_validity_renew_by_email_enabled ) - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_expired=True) user_id = requester.user.to_string() await self.account_activity_handler.send_renewal_email_to_user(user_id) @@ -99,6 +94,6 @@ async def on_POST(self, request): return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: AccountValidityRenewServlet(hs).register(http_server) AccountValiditySendMailServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/auth.py b/synapse/rest/client/auth.py similarity index 59% rename from synapse/rest/client/v2_alpha/auth.py rename to synapse/rest/client/auth.py index 6ea1b50a625e..9c15a04338b1 100644 --- a/synapse/rest/client/v2_alpha/auth.py +++ b/synapse/rest/client/auth.py @@ -15,11 +15,14 @@ import logging from typing import TYPE_CHECKING +from twisted.web.server import Request + from synapse.api.constants import LoginType -from synapse.api.errors import SynapseError +from synapse.api.errors import LoginError, SynapseError from synapse.api.urls import CLIENT_API_PREFIX -from synapse.http.server import respond_with_html +from synapse.http.server import HttpServer, respond_with_html from synapse.http.servlet import RestServlet, parse_string +from synapse.http.site import SynapseRequest from ._base import client_patterns @@ -44,11 +47,14 @@ def __init__(self, hs: "HomeServer"): self.auth = hs.get_auth() self.auth_handler = hs.get_auth_handler() self.registration_handler = hs.get_registration_handler() - self.recaptcha_template = hs.config.recaptcha_template - self.terms_template = hs.config.terms_template - self.success_template = hs.config.fallback_success_template - - async def on_GET(self, request, stagetype): + self.recaptcha_template = hs.config.captcha.recaptcha_template + self.terms_template = hs.config.consent.terms_template + self.registration_token_template = ( + hs.config.registration.registration_token_template + ) + self.success_template = hs.config.registration.fallback_success_template + + async def on_GET(self, request: SynapseRequest, stagetype: str) -> None: session = parse_string(request, "session") if not session: raise SynapseError(400, "No session supplied") @@ -58,13 +64,16 @@ async def on_GET(self, request, stagetype): session=session, myurl="%s/r0/auth/%s/fallback/web" % (CLIENT_API_PREFIX, LoginType.RECAPTCHA), - sitekey=self.hs.config.recaptcha_public_key, + sitekey=self.hs.config.captcha.recaptcha_public_key, ) elif stagetype == LoginType.TERMS: html = self.terms_template.render( session=session, terms_url="%s_matrix/consent?v=%s" - % (self.hs.config.public_baseurl, self.hs.config.user_consent_version), + % ( + self.hs.config.server.public_baseurl, + self.hs.config.consent.user_consent_version, + ), myurl="%s/r0/auth/%s/fallback/web" % (CLIENT_API_PREFIX, LoginType.TERMS), ) @@ -74,6 +83,12 @@ async def on_GET(self, request, stagetype): # re-authenticate with their SSO provider. html = await self.auth_handler.start_sso_ui_auth(request, session) + elif stagetype == LoginType.REGISTRATION_TOKEN: + html = self.registration_token_template.render( + session=session, + myurl=f"{CLIENT_API_PREFIX}/r0/auth/{LoginType.REGISTRATION_TOKEN}/fallback/web", + ) + else: raise SynapseError(404, "Unknown auth stage type") @@ -81,7 +96,7 @@ async def on_GET(self, request, stagetype): respond_with_html(request, 200, html) return None - async def on_POST(self, request, stagetype): + async def on_POST(self, request: Request, stagetype: str) -> None: session = parse_string(request, "session") if not session: @@ -95,42 +110,68 @@ async def on_POST(self, request, stagetype): authdict = {"response": response, "session": session} - success = await self.auth_handler.add_oob_auth( - LoginType.RECAPTCHA, authdict, request.getClientIP() - ) - - if success: - html = self.success_template.render() - else: + try: + await self.auth_handler.add_oob_auth( + LoginType.RECAPTCHA, authdict, request.getClientIP() + ) + except LoginError as e: + # Authentication failed, let user try again html = self.recaptcha_template.render( session=session, myurl="%s/r0/auth/%s/fallback/web" % (CLIENT_API_PREFIX, LoginType.RECAPTCHA), - sitekey=self.hs.config.recaptcha_public_key, + sitekey=self.hs.config.captcha.recaptcha_public_key, + error=e.msg, ) + else: + # No LoginError was raised, so authentication was successful + html = self.success_template.render() + elif stagetype == LoginType.TERMS: authdict = {"session": session} - success = await self.auth_handler.add_oob_auth( - LoginType.TERMS, authdict, request.getClientIP() - ) - - if success: - html = self.success_template.render() - else: + try: + await self.auth_handler.add_oob_auth( + LoginType.TERMS, authdict, request.getClientIP() + ) + except LoginError as e: + # Authentication failed, let user try again html = self.terms_template.render( session=session, terms_url="%s_matrix/consent?v=%s" % ( - self.hs.config.public_baseurl, - self.hs.config.user_consent_version, + self.hs.config.server.public_baseurl, + self.hs.config.consent.user_consent_version, ), myurl="%s/r0/auth/%s/fallback/web" % (CLIENT_API_PREFIX, LoginType.TERMS), + error=e.msg, ) + else: + # No LoginError was raised, so authentication was successful + html = self.success_template.render() + elif stagetype == LoginType.SSO: # The SSO fallback workflow should not post here, raise SynapseError(404, "Fallback SSO auth does not support POST requests.") + + elif stagetype == LoginType.REGISTRATION_TOKEN: + token = parse_string(request, "token", required=True) + authdict = {"session": session, "token": token} + + try: + await self.auth_handler.add_oob_auth( + LoginType.REGISTRATION_TOKEN, authdict, request.getClientIP() + ) + except LoginError as e: + html = self.registration_token_template.render( + session=session, + myurl=f"{CLIENT_API_PREFIX}/r0/auth/{LoginType.REGISTRATION_TOKEN}/fallback/web", + error=e.msg, + ) + else: + html = self.success_template.render() + else: raise SynapseError(404, "Unknown auth stage type") @@ -139,5 +180,5 @@ async def on_POST(self, request, stagetype): return None -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: AuthRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/capabilities.py b/synapse/rest/client/capabilities.py similarity index 73% rename from synapse/rest/client/v2_alpha/capabilities.py rename to synapse/rest/client/capabilities.py index 88e3aac7974b..2a3e24ae7e55 100644 --- a/synapse/rest/client/v2_alpha/capabilities.py +++ b/synapse/rest/client/capabilities.py @@ -15,6 +15,7 @@ from typing import TYPE_CHECKING, Tuple from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, MSC3244_CAPABILITIES +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet from synapse.http.site import SynapseRequest from synapse.types import JsonDict @@ -43,10 +44,10 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: await self.auth.get_user_by_req(request, allow_guest=True) change_password = self.auth_handler.can_change_password() - response = { + response: JsonDict = { "capabilities": { "m.room_versions": { - "default": self.config.default_room_version.identifier, + "default": self.config.server.default_room_version.identifier, "available": { v.identifier: v.disposition for v in KNOWN_ROOM_VERSIONS.values() @@ -61,8 +62,19 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: "org.matrix.msc3244.room_capabilities" ] = MSC3244_CAPABILITIES + if self.config.experimental.msc3283_enabled: + response["capabilities"]["org.matrix.msc3283.set_displayname"] = { + "enabled": self.config.registration.enable_set_displayname + } + response["capabilities"]["org.matrix.msc3283.set_avatar_url"] = { + "enabled": self.config.registration.enable_set_avatar_url + } + response["capabilities"]["org.matrix.msc3283.3pid_changes"] = { + "enabled": self.config.registration.enable_3pid_changes + } + return 200, response -def register_servlets(hs: "HomeServer", http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: CapabilitiesRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/devices.py b/synapse/rest/client/devices.py similarity index 87% rename from synapse/rest/client/v2_alpha/devices.py rename to synapse/rest/client/devices.py index 8b9674db064a..8566dc5cb594 100644 --- a/synapse/rest/client/v2_alpha/devices.py +++ b/synapse/rest/client/devices.py @@ -14,34 +14,36 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple from synapse.api import errors +from synapse.http.server import HttpServer from synapse.http.servlet import ( RestServlet, assert_params_in_dict, parse_json_object_from_request, ) from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from ._base import client_patterns, interactive_auth_handler +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class DevicesRestServlet(RestServlet): PATTERNS = client_patterns("/devices$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() self.device_handler = hs.get_device_handler() - async def on_GET(self, request): + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) devices = await self.device_handler.get_devices_by_user( requester.user.to_string() @@ -57,7 +59,7 @@ class DeleteDevicesRestServlet(RestServlet): PATTERNS = client_patterns("/delete_devices") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() @@ -65,7 +67,7 @@ def __init__(self, hs): self.auth_handler = hs.get_auth_handler() @interactive_auth_handler - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) try: @@ -100,18 +102,16 @@ async def on_POST(self, request): class DeviceRestServlet(RestServlet): PATTERNS = client_patterns("/devices/(?P[^/]*)$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() self.device_handler = hs.get_device_handler() self.auth_handler = hs.get_auth_handler() - async def on_GET(self, request, device_id): + async def on_GET( + self, request: SynapseRequest, device_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) device = await self.device_handler.get_device( requester.user.to_string(), device_id @@ -119,7 +119,9 @@ async def on_GET(self, request, device_id): return 200, device @interactive_auth_handler - async def on_DELETE(self, request, device_id): + async def on_DELETE( + self, request: SynapseRequest, device_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) try: @@ -146,7 +148,9 @@ async def on_DELETE(self, request, device_id): await self.device_handler.delete_device(requester.user.to_string(), device_id) return 200, {} - async def on_PUT(self, request, device_id): + async def on_PUT( + self, request: SynapseRequest, device_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) body = parse_json_object_from_request(request) @@ -193,13 +197,13 @@ class DehydratedDeviceServlet(RestServlet): PATTERNS = client_patterns("/org.matrix.msc2697.v2/dehydrated_device", releases=()) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() self.device_handler = hs.get_device_handler() - async def on_GET(self, request: SynapseRequest): + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) dehydrated_device = await self.device_handler.get_dehydrated_device( requester.user.to_string() @@ -207,11 +211,11 @@ async def on_GET(self, request: SynapseRequest): if dehydrated_device is not None: (device_id, device_data) = dehydrated_device result = {"device_id": device_id, "device_data": device_data} - return (200, result) + return 200, result else: raise errors.NotFoundError("No dehydrated device available") - async def on_PUT(self, request: SynapseRequest): + async def on_PUT(self, request: SynapseRequest) -> Tuple[int, JsonDict]: submission = parse_json_object_from_request(request) requester = await self.auth.get_user_by_req(request) @@ -259,13 +263,13 @@ class ClaimDehydratedDeviceServlet(RestServlet): "/org.matrix.msc2697.v2/dehydrated_device/claim", releases=() ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() self.device_handler = hs.get_device_handler() - async def on_POST(self, request: SynapseRequest): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) submission = parse_json_object_from_request(request) @@ -289,10 +293,10 @@ async def on_POST(self, request: SynapseRequest): submission["device_id"], ) - return (200, result) + return 200, result -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: DeleteDevicesRestServlet(hs).register(http_server) DevicesRestServlet(hs).register(http_server) DeviceRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/directory.py b/synapse/rest/client/directory.py similarity index 69% rename from synapse/rest/client/v1/directory.py rename to synapse/rest/client/directory.py index ae92a3df8e35..ee247e3d1e0d 100644 --- a/synapse/rest/client/v1/directory.py +++ b/synapse/rest/client/directory.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. - import logging +from typing import TYPE_CHECKING, Tuple + +from twisted.web.server import Request from synapse.api.errors import ( AuthError, @@ -22,14 +24,19 @@ NotFoundError, SynapseError, ) +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request -from synapse.rest.client.v2_alpha._base import client_patterns -from synapse.types import RoomAlias +from synapse.http.site import SynapseRequest +from synapse.rest.client._base import client_patterns +from synapse.types import JsonDict, RoomAlias + +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger(__name__) -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: ClientDirectoryServer(hs).register(http_server) ClientDirectoryListServer(hs).register(http_server) ClientAppserviceDirectoryListServer(hs).register(http_server) @@ -38,21 +45,23 @@ def register_servlets(hs, http_server): class ClientDirectoryServer(RestServlet): PATTERNS = client_patterns("/directory/room/(?P[^/]*)$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.store = hs.get_datastore() self.directory_handler = hs.get_directory_handler() self.auth = hs.get_auth() - async def on_GET(self, request, room_alias): - room_alias = RoomAlias.from_string(room_alias) + async def on_GET(self, request: Request, room_alias: str) -> Tuple[int, JsonDict]: + room_alias_obj = RoomAlias.from_string(room_alias) - res = await self.directory_handler.get_association(room_alias) + res = await self.directory_handler.get_association(room_alias_obj) return 200, res - async def on_PUT(self, request, room_alias): - room_alias = RoomAlias.from_string(room_alias) + async def on_PUT( + self, request: SynapseRequest, room_alias: str + ) -> Tuple[int, JsonDict]: + room_alias_obj = RoomAlias.from_string(room_alias) content = parse_json_object_from_request(request) if "room_id" not in content: @@ -61,7 +70,7 @@ async def on_PUT(self, request, room_alias): ) logger.debug("Got content: %s", content) - logger.debug("Got room name: %s", room_alias.to_string()) + logger.debug("Got room name: %s", room_alias_obj.to_string()) room_id = content["room_id"] servers = content["servers"] if "servers" in content else None @@ -78,22 +87,25 @@ async def on_PUT(self, request, room_alias): requester = await self.auth.get_user_by_req(request) await self.directory_handler.create_association( - requester, room_alias, room_id, servers + requester, room_alias_obj, room_id, servers ) return 200, {} - async def on_DELETE(self, request, room_alias): + async def on_DELETE( + self, request: SynapseRequest, room_alias: str + ) -> Tuple[int, JsonDict]: + room_alias_obj = RoomAlias.from_string(room_alias) + try: service = self.auth.get_appservice_by_req(request) - room_alias = RoomAlias.from_string(room_alias) await self.directory_handler.delete_appservice_association( - service, room_alias + service, room_alias_obj ) logger.info( "Application service at %s deleted alias %s", service.url, - room_alias.to_string(), + room_alias_obj.to_string(), ) return 200, {} except InvalidClientCredentialsError: @@ -103,12 +115,10 @@ async def on_DELETE(self, request, room_alias): requester = await self.auth.get_user_by_req(request) user = requester.user - room_alias = RoomAlias.from_string(room_alias) - - await self.directory_handler.delete_association(requester, room_alias) + await self.directory_handler.delete_association(requester, room_alias_obj) logger.info( - "User %s deleted alias %s", user.to_string(), room_alias.to_string() + "User %s deleted alias %s", user.to_string(), room_alias_obj.to_string() ) return 200, {} @@ -117,20 +127,22 @@ async def on_DELETE(self, request, room_alias): class ClientDirectoryListServer(RestServlet): PATTERNS = client_patterns("/directory/list/room/(?P[^/]*)$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.store = hs.get_datastore() self.directory_handler = hs.get_directory_handler() self.auth = hs.get_auth() - async def on_GET(self, request, room_id): + async def on_GET(self, request: Request, room_id: str) -> Tuple[int, JsonDict]: room = await self.store.get_room(room_id) if room is None: raise NotFoundError("Unknown room") return 200, {"visibility": "public" if room["is_public"] else "private"} - async def on_PUT(self, request, room_id): + async def on_PUT( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) content = parse_json_object_from_request(request) @@ -142,7 +154,9 @@ async def on_PUT(self, request, room_id): return 200, {} - async def on_DELETE(self, request, room_id): + async def on_DELETE( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) await self.directory_handler.edit_published_room_list( @@ -157,21 +171,27 @@ class ClientAppserviceDirectoryListServer(RestServlet): "/directory/list/appservice/(?P[^/]*)/(?P[^/]*)$", v1=True ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.store = hs.get_datastore() self.directory_handler = hs.get_directory_handler() self.auth = hs.get_auth() - def on_PUT(self, request, network_id, room_id): + async def on_PUT( + self, request: SynapseRequest, network_id: str, room_id: str + ) -> Tuple[int, JsonDict]: content = parse_json_object_from_request(request) visibility = content.get("visibility", "public") - return self._edit(request, network_id, room_id, visibility) + return await self._edit(request, network_id, room_id, visibility) - def on_DELETE(self, request, network_id, room_id): - return self._edit(request, network_id, room_id, "private") + async def on_DELETE( + self, request: SynapseRequest, network_id: str, room_id: str + ) -> Tuple[int, JsonDict]: + return await self._edit(request, network_id, room_id, "private") - async def _edit(self, request, network_id, room_id, visibility): + async def _edit( + self, request: SynapseRequest, network_id: str, room_id: str, visibility: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) if not requester.app_service: raise AuthError( diff --git a/synapse/rest/client/v1/events.py b/synapse/rest/client/events.py similarity index 70% rename from synapse/rest/client/v1/events.py rename to synapse/rest/client/events.py index ee7454996e5a..13b72a045a4a 100644 --- a/synapse/rest/client/v1/events.py +++ b/synapse/rest/client/events.py @@ -14,11 +14,18 @@ """This module contains REST servlets to do with event streaming, /events.""" import logging +from typing import TYPE_CHECKING, Dict, List, Tuple, Union from synapse.api.errors import SynapseError -from synapse.http.servlet import RestServlet -from synapse.rest.client.v2_alpha._base import client_patterns +from synapse.http.server import HttpServer +from synapse.http.servlet import RestServlet, parse_string +from synapse.http.site import SynapseRequest +from synapse.rest.client._base import client_patterns from synapse.streams.config import PaginationConfig +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -28,31 +35,30 @@ class EventStreamRestServlet(RestServlet): DEFAULT_LONGPOLL_TIME_MS = 30000 - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.event_stream_handler = hs.get_event_stream_handler() self.auth = hs.get_auth() self.store = hs.get_datastore() - async def on_GET(self, request): + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) is_guest = requester.is_guest - room_id = None + args: Dict[bytes, List[bytes]] = request.args # type: ignore if is_guest: - if b"room_id" not in request.args: + if b"room_id" not in args: raise SynapseError(400, "Guest users must specify room_id param") - if b"room_id" in request.args: - room_id = request.args[b"room_id"][0].decode("ascii") + room_id = parse_string(request, "room_id") pagin_config = await PaginationConfig.from_request(self.store, request) timeout = EventStreamRestServlet.DEFAULT_LONGPOLL_TIME_MS - if b"timeout" in request.args: + if b"timeout" in args: try: - timeout = int(request.args[b"timeout"][0]) + timeout = int(args[b"timeout"][0]) except ValueError: raise SynapseError(400, "timeout must be in milliseconds.") - as_client_event = b"raw" not in request.args + as_client_event = b"raw" not in args chunk = await self.event_stream_handler.get_stream( requester.user.to_string(), @@ -70,25 +76,27 @@ async def on_GET(self, request): class EventRestServlet(RestServlet): PATTERNS = client_patterns("/events/(?P[^/]*)$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.clock = hs.get_clock() self.event_handler = hs.get_event_handler() self.auth = hs.get_auth() self._event_serializer = hs.get_event_client_serializer() - async def on_GET(self, request, event_id): + async def on_GET( + self, request: SynapseRequest, event_id: str + ) -> Tuple[int, Union[str, JsonDict]]: requester = await self.auth.get_user_by_req(request) event = await self.event_handler.get_event(requester.user, None, event_id) time_now = self.clock.time_msec() if event: - event = await self._event_serializer.serialize_event(event, time_now) - return 200, event + result = await self._event_serializer.serialize_event(event, time_now) + return 200, result else: return 404, "Event not found." -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: EventStreamRestServlet(hs).register(http_server) EventRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/filter.py b/synapse/rest/client/filter.py similarity index 78% rename from synapse/rest/client/v2_alpha/filter.py rename to synapse/rest/client/filter.py index 411667a9c8d9..cc1c2f973140 100644 --- a/synapse/rest/client/v2_alpha/filter.py +++ b/synapse/rest/client/filter.py @@ -13,26 +13,34 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple from synapse.api.errors import AuthError, NotFoundError, StoreError, SynapseError +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request -from synapse.types import UserID +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict, UserID from ._base import client_patterns, set_timeline_upper_limit +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class GetFilterRestServlet(RestServlet): PATTERNS = client_patterns("/user/(?P[^/]*)/filter/(?P[^/]*)") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() self.filtering = hs.get_filtering() - async def on_GET(self, request, user_id, filter_id): + async def on_GET( + self, request: SynapseRequest, user_id: str, filter_id: str + ) -> Tuple[int, JsonDict]: target_user = UserID.from_string(user_id) requester = await self.auth.get_user_by_req(request) @@ -43,13 +51,13 @@ async def on_GET(self, request, user_id, filter_id): raise AuthError(403, "Can only get filters for local users") try: - filter_id = int(filter_id) + filter_id_int = int(filter_id) except Exception: raise SynapseError(400, "Invalid filter_id") try: filter_collection = await self.filtering.get_user_filter( - user_localpart=target_user.localpart, filter_id=filter_id + user_localpart=target_user.localpart, filter_id=filter_id_int ) except StoreError as e: if e.code != 404: @@ -62,13 +70,15 @@ async def on_GET(self, request, user_id, filter_id): class CreateFilterRestServlet(RestServlet): PATTERNS = client_patterns("/user/(?P[^/]*)/filter") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() self.filtering = hs.get_filtering() - async def on_POST(self, request, user_id): + async def on_POST( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: target_user = UserID.from_string(user_id) requester = await self.auth.get_user_by_req(request) @@ -80,7 +90,7 @@ async def on_POST(self, request, user_id): raise AuthError(403, "Can only create filters for local users") content = parse_json_object_from_request(request) - set_timeline_upper_limit(content, self.hs.config.filter_timeline_limit) + set_timeline_upper_limit(content, self.hs.config.server.filter_timeline_limit) filter_id = await self.filtering.add_user_filter( user_localpart=target_user.localpart, user_filter=content @@ -89,6 +99,6 @@ async def on_POST(self, request, user_id): return 200, {"filter_id": str(filter_id)} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: GetFilterRestServlet(hs).register(http_server) CreateFilterRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/groups.py b/synapse/rest/client/groups.py similarity index 97% rename from synapse/rest/client/v2_alpha/groups.py rename to synapse/rest/client/groups.py index 6285680c00c8..a7e9aa3e9bbf 100644 --- a/synapse/rest/client/v2_alpha/groups.py +++ b/synapse/rest/client/groups.py @@ -15,7 +15,7 @@ import logging from functools import wraps -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING, Any, Awaitable, Callable, Optional, Tuple from twisted.web.server import Request @@ -26,6 +26,7 @@ ) from synapse.api.errors import Codes, SynapseError from synapse.handlers.groups_local import GroupsLocalHandler +from synapse.http.server import HttpServer from synapse.http.servlet import ( RestServlet, assert_params_in_dict, @@ -42,14 +43,18 @@ logger = logging.getLogger(__name__) -def _validate_group_id(f): +def _validate_group_id( + f: Callable[..., Awaitable[Tuple[int, JsonDict]]] +) -> Callable[..., Awaitable[Tuple[int, JsonDict]]]: """Wrapper to validate the form of the group ID. Can be applied to any on_FOO methods that accepts a group ID as a URL parameter. """ @wraps(f) - def wrapper(self, request: Request, group_id: str, *args, **kwargs): + def wrapper( + self: RestServlet, request: Request, group_id: str, *args: Any, **kwargs: Any + ) -> Awaitable[Tuple[int, JsonDict]]: if not GroupID.is_valid(group_id): raise SynapseError(400, "%s is not a legal group ID" % (group_id,)) @@ -155,7 +160,7 @@ async def on_PUT( group_id: str, category_id: Optional[str], room_id: str, - ): + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) requester_user_id = requester.user.to_string() @@ -187,7 +192,7 @@ async def on_PUT( @_validate_group_id async def on_DELETE( self, request: SynapseRequest, group_id: str, category_id: str, room_id: str - ): + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) requester_user_id = requester.user.to_string() @@ -450,7 +455,7 @@ async def on_PUT( @_validate_group_id async def on_DELETE( self, request: SynapseRequest, group_id: str, role_id: str, user_id: str - ): + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) requester_user_id = requester.user.to_string() @@ -673,7 +678,7 @@ def __init__(self, hs: "HomeServer"): @_validate_group_id async def on_PUT( self, request: SynapseRequest, group_id: str, room_id: str, config_key: str - ): + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) requester_user_id = requester.user.to_string() @@ -705,7 +710,7 @@ def __init__(self, hs: "HomeServer"): @_validate_group_id async def on_PUT( - self, request: SynapseRequest, group_id, user_id + self, request: SynapseRequest, group_id: str, user_id: str ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) requester_user_id = requester.user.to_string() @@ -737,7 +742,7 @@ def __init__(self, hs: "HomeServer"): @_validate_group_id async def on_PUT( - self, request: SynapseRequest, group_id, user_id + self, request: SynapseRequest, group_id: str, user_id: str ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) requester_user_id = requester.user.to_string() @@ -930,7 +935,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: return 200, result -def register_servlets(hs: "HomeServer", http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: GroupServlet(hs).register(http_server) GroupSummaryServlet(hs).register(http_server) GroupInvitedUsersServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/initial_sync.py b/synapse/rest/client/initial_sync.py similarity index 72% rename from synapse/rest/client/v1/initial_sync.py rename to synapse/rest/client/initial_sync.py index bef1edc838ab..49b1037b28d4 100644 --- a/synapse/rest/client/v1/initial_sync.py +++ b/synapse/rest/client/initial_sync.py @@ -12,25 +12,33 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import TYPE_CHECKING, Dict, List, Tuple +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_boolean -from synapse.rest.client.v2_alpha._base import client_patterns +from synapse.http.site import SynapseRequest +from synapse.rest.client._base import client_patterns from synapse.streams.config import PaginationConfig +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer # TODO: Needs unit testing class InitialSyncRestServlet(RestServlet): PATTERNS = client_patterns("/initialSync$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.initial_sync_handler = hs.get_initial_sync_handler() self.auth = hs.get_auth() self.store = hs.get_datastore() - async def on_GET(self, request): + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) - as_client_event = b"raw" not in request.args + args: Dict[bytes, List[bytes]] = request.args # type: ignore + as_client_event = b"raw" not in args pagination_config = await PaginationConfig.from_request(self.store, request) include_archived = parse_boolean(request, "archived", default=False) content = await self.initial_sync_handler.snapshot_all_rooms( @@ -43,5 +51,5 @@ async def on_GET(self, request): return 200, content -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: InitialSyncRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/keys.py similarity index 85% rename from synapse/rest/client/v2_alpha/keys.py rename to synapse/rest/client/keys.py index d0d9d30d40a3..730c18f08f78 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/keys.py @@ -15,19 +15,25 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Any, Optional, Tuple -from synapse.api.errors import SynapseError +from synapse.api.errors import InvalidAPICallError, SynapseError +from synapse.http.server import HttpServer from synapse.http.servlet import ( RestServlet, parse_integer, parse_json_object_from_request, parse_string, ) +from synapse.http.site import SynapseRequest from synapse.logging.opentracing import log_kv, set_tag, trace -from synapse.types import StreamToken +from synapse.types import JsonDict, StreamToken from ._base import client_patterns, interactive_auth_handler +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -59,18 +65,16 @@ class KeyUploadServlet(RestServlet): PATTERNS = client_patterns("/keys/upload(/(?P[^/]+))?$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.e2e_keys_handler = hs.get_e2e_keys_handler() self.device_handler = hs.get_device_handler() @trace(opname="upload_keys") - async def on_POST(self, request, device_id): + async def on_POST( + self, request: SynapseRequest, device_id: Optional[str] + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) user_id = requester.user.to_string() body = parse_json_object_from_request(request) @@ -148,21 +152,30 @@ class KeyQueryServlet(RestServlet): PATTERNS = client_patterns("/keys/query$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.e2e_keys_handler = hs.get_e2e_keys_handler() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) user_id = requester.user.to_string() device_id = requester.device_id timeout = parse_integer(request, "timeout", 10 * 1000) body = parse_json_object_from_request(request) + + device_keys = body.get("device_keys") + if not isinstance(device_keys, dict): + raise InvalidAPICallError("'device_keys' must be a JSON object") + + def is_list_of_strings(values: Any) -> bool: + return isinstance(values, list) and all(isinstance(v, str) for v in values) + + if any(not is_list_of_strings(keys) for keys in device_keys.values()): + raise InvalidAPICallError( + "'device_keys' values must be a list of strings", + ) + result = await self.e2e_keys_handler.query_devices( body, timeout, user_id, device_id ) @@ -181,17 +194,13 @@ class KeyChangesServlet(RestServlet): PATTERNS = client_patterns("/keys/changes$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.device_handler = hs.get_device_handler() self.store = hs.get_datastore() - async def on_GET(self, request): + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) from_token_string = parse_string(request, "from", required=True) @@ -231,12 +240,12 @@ class OneTimeKeyServlet(RestServlet): PATTERNS = client_patterns("/keys/claim$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.e2e_keys_handler = hs.get_e2e_keys_handler() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: await self.auth.get_user_by_req(request, allow_guest=True) timeout = parse_integer(request, "timeout", 10 * 1000) body = parse_json_object_from_request(request) @@ -253,13 +262,9 @@ class SigningKeyUploadServlet(RestServlet): } """ - PATTERNS = client_patterns("/keys/device_signing/upload$", releases=()) + PATTERNS = client_patterns("/keys/device_signing/upload$", releases=("v3",)) - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() @@ -267,7 +272,7 @@ def __init__(self, hs): self.auth_handler = hs.get_auth_handler() @interactive_auth_handler - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) user_id = requester.user.to_string() body = parse_json_object_from_request(request) @@ -315,16 +320,12 @@ class SignaturesUploadServlet(RestServlet): PATTERNS = client_patterns("/keys/signatures/upload$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.e2e_keys_handler = hs.get_e2e_keys_handler() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) user_id = requester.user.to_string() body = parse_json_object_from_request(request) @@ -335,7 +336,7 @@ async def on_POST(self, request): return 200, result -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: KeyUploadServlet(hs).register(http_server) KeyQueryServlet(hs).register(http_server) KeyChangesServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/knock.py b/synapse/rest/client/knock.py similarity index 91% rename from synapse/rest/client/v2_alpha/knock.py rename to synapse/rest/client/knock.py index 7d1bc40658a6..0152a0c66a50 100644 --- a/synapse/rest/client/v2_alpha/knock.py +++ b/synapse/rest/client/knock.py @@ -13,12 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple from twisted.web.server import Request from synapse.api.constants import Membership from synapse.api.errors import SynapseError +from synapse.http.server import HttpServer from synapse.http.servlet import ( RestServlet, parse_json_object_from_request, @@ -95,7 +96,9 @@ async def on_POST( return 200, {"room_id": room_id} - def on_PUT(self, request: Request, room_identifier: str, txn_id: str): + def on_PUT( + self, request: Request, room_identifier: str, txn_id: str + ) -> Awaitable[Tuple[int, JsonDict]]: set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request( @@ -103,5 +106,5 @@ def on_PUT(self, request: Request, room_identifier: str, txn_id: str): ) -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: KnockRoomAliasServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/login.py similarity index 75% rename from synapse/rest/client/v1/login.py rename to synapse/rest/client/login.py index 11567bf32cef..f9994658c4a3 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/login.py @@ -1,4 +1,4 @@ -# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2014-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,17 @@ import logging import re -from typing import TYPE_CHECKING, Any, Awaitable, Callable, Dict, List, Optional +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Dict, + List, + Optional, + Tuple, + Union, +) from typing_extensions import TypedDict @@ -28,13 +38,12 @@ from synapse.http.servlet import ( RestServlet, assert_params_in_dict, - parse_boolean, parse_bytes_from_args, parse_json_object_from_request, parse_string, ) from synapse.http.site import SynapseRequest -from synapse.rest.client.v2_alpha._base import client_patterns +from synapse.rest.client._base import client_patterns from synapse.rest.well_known import WellKnownBuilder from synapse.types import JsonDict, UserID @@ -61,26 +70,29 @@ class LoginRestServlet(RestServlet): TOKEN_TYPE = "m.login.token" JWT_TYPE = "org.matrix.login.jwt" JWT_TYPE_DEPRECATED = "m.login.jwt" - APPSERVICE_TYPE = "uk.half-shot.msc2778.login.application_service" - REFRESH_TOKEN_PARAM = "org.matrix.msc2918.refresh_token" + APPSERVICE_TYPE = "m.login.application_service" + APPSERVICE_TYPE_UNSTABLE = "uk.half-shot.msc2778.login.application_service" + REFRESH_TOKEN_PARAM = "refresh_token" def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs # JWT configuration variables. - self.jwt_enabled = hs.config.jwt_enabled - self.jwt_secret = hs.config.jwt_secret - self.jwt_algorithm = hs.config.jwt_algorithm - self.jwt_issuer = hs.config.jwt_issuer - self.jwt_audiences = hs.config.jwt_audiences + self.jwt_enabled = hs.config.jwt.jwt_enabled + self.jwt_secret = hs.config.jwt.jwt_secret + self.jwt_subject_claim = hs.config.jwt.jwt_subject_claim + self.jwt_algorithm = hs.config.jwt.jwt_algorithm + self.jwt_issuer = hs.config.jwt.jwt_issuer + self.jwt_audiences = hs.config.jwt.jwt_audiences # SSO configuration. - self.saml2_enabled = hs.config.saml2_enabled - self.cas_enabled = hs.config.cas_enabled - self.oidc_enabled = hs.config.oidc_enabled - self._msc2858_enabled = hs.config.experimental.msc2858_enabled - self._msc2918_enabled = hs.config.access_token_lifetime is not None + self.saml2_enabled = hs.config.saml2.saml2_enabled + self.cas_enabled = hs.config.cas.cas_enabled + self.oidc_enabled = hs.config.oidc.oidc_enabled + self._refresh_tokens_enabled = ( + hs.config.registration.refreshable_access_token_lifetime is not None + ) self.auth = hs.get_auth() @@ -94,18 +106,24 @@ def __init__(self, hs: "HomeServer"): self._address_ratelimiter = Ratelimiter( store=hs.get_datastore(), clock=hs.get_clock(), - rate_hz=self.hs.config.rc_login_address.per_second, - burst_count=self.hs.config.rc_login_address.burst_count, + rate_hz=self.hs.config.ratelimiting.rc_login_address.per_second, + burst_count=self.hs.config.ratelimiting.rc_login_address.burst_count, ) self._account_ratelimiter = Ratelimiter( store=hs.get_datastore(), clock=hs.get_clock(), - rate_hz=self.hs.config.rc_login_account.per_second, - burst_count=self.hs.config.rc_login_account.burst_count, + rate_hz=self.hs.config.ratelimiting.rc_login_account.per_second, + burst_count=self.hs.config.ratelimiting.rc_login_account.burst_count, ) - def on_GET(self, request: SynapseRequest): - flows = [] + # ensure the CAS/SAML/OIDC handlers are loaded on this worker instance. + # The reason for this is to ensure that the auth_provider_ids are registered + # with SsoHandler, which in turn ensures that the login/registration prometheus + # counters are initialised for the auth_provider_ids. + _load_sso_handlers(hs) + + def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + flows: List[JsonDict] = [] if self.jwt_enabled: flows.append({"type": LoginRestServlet.JWT_TYPE}) flows.append({"type": LoginRestServlet.JWT_TYPE_DEPRECATED}) @@ -116,25 +134,15 @@ def on_GET(self, request: SynapseRequest): flows.append({"type": LoginRestServlet.CAS_TYPE}) if self.cas_enabled or self.saml2_enabled or self.oidc_enabled: - sso_flow: JsonDict = { - "type": LoginRestServlet.SSO_TYPE, - "identity_providers": [ - _get_auth_flow_dict_for_idp( - idp, - ) - for idp in self._sso_handler.get_identity_providers().values() - ], - } - - if self._msc2858_enabled: - # backwards-compatibility support for clients which don't - # support the stable API yet - sso_flow["org.matrix.msc2858.identity_providers"] = [ - _get_auth_flow_dict_for_idp(idp, use_unstable_brands=True) - for idp in self._sso_handler.get_identity_providers().values() - ] - - flows.append(sso_flow) + flows.append( + { + "type": LoginRestServlet.SSO_TYPE, + "identity_providers": [ + _get_auth_flow_dict_for_idp(idp) + for idp in self._sso_handler.get_identity_providers().values() + ], + } + ) # While it's valid for us to advertise this login type generally, # synapse currently only gives out these tokens as part of the @@ -148,23 +156,29 @@ def on_GET(self, request: SynapseRequest): flows.extend({"type": t} for t in self.auth_handler.get_supported_login_types()) flows.append({"type": LoginRestServlet.APPSERVICE_TYPE}) + flows.append({"type": LoginRestServlet.APPSERVICE_TYPE_UNSTABLE}) return 200, {"flows": flows} - async def on_POST(self, request: SynapseRequest): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, LoginResponse]: login_submission = parse_json_object_from_request(request) - if self._msc2918_enabled: - # Check if this login should also issue a refresh token, as per - # MSC2918 - should_issue_refresh_token = parse_boolean( - request, name=LoginRestServlet.REFRESH_TOKEN_PARAM, default=False - ) - else: - should_issue_refresh_token = False + # Check to see if the client requested a refresh token. + client_requested_refresh_token = login_submission.get( + LoginRestServlet.REFRESH_TOKEN_PARAM, False + ) + if not isinstance(client_requested_refresh_token, bool): + raise SynapseError(400, "`refresh_token` should be true or false.") + + should_issue_refresh_token = ( + self._refresh_tokens_enabled and client_requested_refresh_token + ) try: - if login_submission["type"] == LoginRestServlet.APPSERVICE_TYPE: + if login_submission["type"] in ( + LoginRestServlet.APPSERVICE_TYPE, + LoginRestServlet.APPSERVICE_TYPE_UNSTABLE, + ): appservice = self.auth.get_appservice_by_req(request) if appservice.is_rate_limited(): @@ -211,7 +225,7 @@ async def _do_appservice_login( login_submission: JsonDict, appservice: ApplicationService, should_issue_refresh_token: bool = False, - ): + ) -> LoginResponse: identifier = login_submission.get("identifier") logger.info("Got appservice login request with identifier: %r", identifier) @@ -288,6 +302,7 @@ async def _complete_login( ratelimit: bool = True, auth_provider_id: Optional[str] = None, should_issue_refresh_token: bool = False, + auth_provider_session_id: Optional[str] = None, ) -> LoginResponse: """Called when we've successfully authed the user and now need to actually login them in (e.g. create devices). This gets called on @@ -303,10 +318,10 @@ async def _complete_login( create_non_existent_users: Whether to create the user if they don't exist. Defaults to False. ratelimit: Whether to ratelimit the login request. - auth_provider_id: The SSO IdP the user used, if any (just used for the - prometheus metrics). + auth_provider_id: The SSO IdP the user used, if any. should_issue_refresh_token: True if this login should issue a refresh token alongside the access token. + auth_provider_session_id: The session ID got during login from the SSO IdP. Returns: result: Dictionary of account information after successful login. @@ -339,6 +354,7 @@ async def _complete_login( initial_display_name, auth_provider_id=auth_provider_id, should_issue_refresh_token=should_issue_refresh_token, + auth_provider_session_id=auth_provider_session_id, ) result = LoginResponse( @@ -384,6 +400,7 @@ async def _do_token_login( self.auth_handler._sso_login_callback, auth_provider_id=res.auth_provider_id, should_issue_refresh_token=should_issue_refresh_token, + auth_provider_session_id=res.auth_provider_session_id, ) async def _do_jwt_login( @@ -413,7 +430,7 @@ async def _do_jwt_login( errcode=Codes.FORBIDDEN, ) - user = payload.get("sub", None) + user = payload.get(self.jwt_subject_claim, None) if user is None: raise LoginError(403, "Invalid JWT", errcode=Codes.FORBIDDEN) @@ -427,9 +444,7 @@ async def _do_jwt_login( return result -def _get_auth_flow_dict_for_idp( - idp: SsoIdentityProvider, use_unstable_brands: bool = False -) -> JsonDict: +def _get_auth_flow_dict_for_idp(idp: SsoIdentityProvider) -> JsonDict: """Return an entry for the login flow dict Returns an entry suitable for inclusion in "identity_providers" in the @@ -437,34 +452,27 @@ def _get_auth_flow_dict_for_idp( Args: idp: the identity provider to describe - use_unstable_brands: whether we should use brand identifiers suitable - for the unstable API """ e: JsonDict = {"id": idp.idp_id, "name": idp.idp_name} if idp.idp_icon: e["icon"] = idp.idp_icon if idp.idp_brand: e["brand"] = idp.idp_brand - # use the stable brand identifier if the unstable identifier isn't defined. - if use_unstable_brands and idp.unstable_idp_brand: - e["brand"] = idp.unstable_idp_brand return e class RefreshTokenServlet(RestServlet): - PATTERNS = client_patterns( - "/org.matrix.msc2918.refresh_token/refresh$", releases=(), unstable=True - ) + PATTERNS = (re.compile("^/_matrix/client/v1/refresh$"),) def __init__(self, hs: "HomeServer"): self._auth_handler = hs.get_auth_handler() self._clock = hs.get_clock() - self.access_token_lifetime = hs.config.access_token_lifetime + self.refreshable_access_token_lifetime = ( + hs.config.registration.refreshable_access_token_lifetime + ) + self.refresh_token_lifetime = hs.config.registration.refresh_token_lifetime - async def on_POST( - self, - request: SynapseRequest, - ): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: refresh_submission = parse_json_object_from_request(request) assert_params_in_dict(refresh_submission, ["refresh_token"]) @@ -472,58 +480,49 @@ async def on_POST( if not isinstance(token, str): raise SynapseError(400, "Invalid param: refresh_token", Codes.INVALID_PARAM) - valid_until_ms = self._clock.time_msec() + self.access_token_lifetime - access_token, refresh_token = await self._auth_handler.refresh_token( - token, valid_until_ms - ) - expires_in_ms = valid_until_ms - self._clock.time_msec() - return ( - 200, - { - "access_token": access_token, - "refresh_token": refresh_token, - "expires_in_ms": expires_in_ms, - }, + now = self._clock.time_msec() + access_valid_until_ms = None + if self.refreshable_access_token_lifetime is not None: + access_valid_until_ms = now + self.refreshable_access_token_lifetime + refresh_valid_until_ms = None + if self.refresh_token_lifetime is not None: + refresh_valid_until_ms = now + self.refresh_token_lifetime + + ( + access_token, + refresh_token, + actual_access_token_expiry, + ) = await self._auth_handler.refresh_token( + token, access_valid_until_ms, refresh_valid_until_ms ) + response: Dict[str, Union[str, int]] = { + "access_token": access_token, + "refresh_token": refresh_token, + } + + # expires_in_ms is only present if the token expires + if actual_access_token_expiry is not None: + response["expires_in_ms"] = actual_access_token_expiry - now + + return 200, response + class SsoRedirectServlet(RestServlet): PATTERNS = list(client_patterns("/login/(cas|sso)/redirect$", v1=True)) + [ re.compile( "^" + CLIENT_API_PREFIX - + "/r0/login/sso/redirect/(?P[A-Za-z0-9_.~-]+)$" + + "/(r0|v3)/login/sso/redirect/(?P[A-Za-z0-9_.~-]+)$" ) ] def __init__(self, hs: "HomeServer"): # make sure that the relevant handlers are instantiated, so that they # register themselves with the main SSOHandler. - if hs.config.cas_enabled: - hs.get_cas_handler() - if hs.config.saml2_enabled: - hs.get_saml_handler() - if hs.config.oidc_enabled: - hs.get_oidc_handler() + _load_sso_handlers(hs) self._sso_handler = hs.get_sso_handler() - self._msc2858_enabled = hs.config.experimental.msc2858_enabled - self._public_baseurl = hs.config.public_baseurl - - def register(self, http_server: HttpServer) -> None: - super().register(http_server) - if self._msc2858_enabled: - # expose additional endpoint for MSC2858 support: backwards-compat support - # for clients which don't yet support the stable endpoints. - http_server.register_paths( - "GET", - client_patterns( - "/org.matrix.msc2858/login/sso/redirect/(?P[A-Za-z0-9_.~-]+)$", - releases=(), - unstable=True, - ), - self.on_GET, - self.__class__.__name__, - ) + self._public_baseurl = hs.config.server.public_baseurl async def on_GET( self, request: SynapseRequest, idp_id: Optional[str] = None @@ -569,7 +568,7 @@ async def on_GET( class CasTicketServlet(RestServlet): PATTERNS = client_patterns("/login/cas/ticket", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self._cas_handler = hs.get_cas_handler() @@ -591,10 +590,26 @@ async def on_GET(self, request: SynapseRequest) -> None: ) -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: LoginRestServlet(hs).register(http_server) - if hs.config.access_token_lifetime is not None: + if hs.config.registration.refreshable_access_token_lifetime is not None: RefreshTokenServlet(hs).register(http_server) SsoRedirectServlet(hs).register(http_server) - if hs.config.cas_enabled: + if hs.config.cas.cas_enabled: CasTicketServlet(hs).register(http_server) + + +def _load_sso_handlers(hs: "HomeServer") -> None: + """Ensure that the SSO handlers are loaded, if they are enabled by configuration. + + This is mostly useful to ensure that the CAS/SAML/OIDC handlers register themselves + with the main SsoHandler. + + It's safe to call this multiple times. + """ + if hs.config.cas.cas_enabled: + hs.get_cas_handler() + if hs.config.saml2.saml2_enabled: + hs.get_saml_handler() + if hs.config.oidc.oidc_enabled: + hs.get_oidc_handler() diff --git a/synapse/rest/client/v1/logout.py b/synapse/rest/client/logout.py similarity index 79% rename from synapse/rest/client/v1/logout.py rename to synapse/rest/client/logout.py index 5aa7908d73a6..193a6951b91b 100644 --- a/synapse/rest/client/v1/logout.py +++ b/synapse/rest/client/logout.py @@ -13,9 +13,16 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet -from synapse.rest.client.v2_alpha._base import client_patterns +from synapse.http.site import SynapseRequest +from synapse.rest.client._base import client_patterns +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -23,13 +30,13 @@ class LogoutRestServlet(RestServlet): PATTERNS = client_patterns("/logout$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self._auth_handler = hs.get_auth_handler() self._device_handler = hs.get_device_handler() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_expired=True) if requester.device_id is None: @@ -48,13 +55,13 @@ async def on_POST(self, request): class LogoutAllRestServlet(RestServlet): PATTERNS = client_patterns("/logout/all$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self._auth_handler = hs.get_auth_handler() self._device_handler = hs.get_device_handler() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_expired=True) user_id = requester.user.to_string() @@ -67,6 +74,6 @@ async def on_POST(self, request): return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: LogoutRestServlet(hs).register(http_server) LogoutAllRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/notifications.py b/synapse/rest/client/notifications.py similarity index 88% rename from synapse/rest/client/v2_alpha/notifications.py rename to synapse/rest/client/notifications.py index 0ede643c2d91..d1d8a984c630 100644 --- a/synapse/rest/client/v2_alpha/notifications.py +++ b/synapse/rest/client/notifications.py @@ -13,26 +13,33 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple from synapse.events.utils import format_event_for_client_v2_without_room_id +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_integer, parse_string +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class NotificationsServlet(RestServlet): PATTERNS = client_patterns("/notifications$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.store = hs.get_datastore() self.auth = hs.get_auth() self.clock = hs.get_clock() self._event_serializer = hs.get_event_client_serializer() - async def on_GET(self, request): + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) user_id = requester.user.to_string() @@ -87,5 +94,5 @@ async def on_GET(self, request): return 200, {"notifications": returned_push_actions, "next_token": next_token} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: NotificationsServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/openid.py b/synapse/rest/client/openid.py similarity index 84% rename from synapse/rest/client/v2_alpha/openid.py rename to synapse/rest/client/openid.py index e8d2673819cb..add56d699884 100644 --- a/synapse/rest/client/v2_alpha/openid.py +++ b/synapse/rest/client/openid.py @@ -12,15 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. - import logging +from typing import TYPE_CHECKING, Tuple from synapse.api.errors import AuthError +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from synapse.util.stringutils import random_string from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -58,14 +64,16 @@ class IdTokenServlet(RestServlet): EXPIRES_MS = 3600 * 1000 - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastore() self.clock = hs.get_clock() - self.server_name = hs.config.server_name + self.server_name = hs.config.server.server_name - async def on_POST(self, request, user_id): + async def on_POST( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) if user_id != requester.user.to_string(): raise AuthError(403, "Cannot request tokens for other users.") @@ -90,5 +98,5 @@ async def on_POST(self, request, user_id): ) -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: IdTokenServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/password_policy.py b/synapse/rest/client/password_policy.py similarity index 69% rename from synapse/rest/client/v2_alpha/password_policy.py rename to synapse/rest/client/password_policy.py index a83927aee641..9f1908004b9d 100644 --- a/synapse/rest/client/v2_alpha/password_policy.py +++ b/synapse/rest/client/password_policy.py @@ -13,30 +13,34 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple +from twisted.web.server import Request + +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet +from synapse.types import JsonDict from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class PasswordPolicyServlet(RestServlet): PATTERNS = client_patterns("/password_policy$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() - self.policy = hs.config.password_policy - self.enabled = hs.config.password_policy_enabled + self.policy = hs.config.auth.password_policy + self.enabled = hs.config.auth.password_policy_enabled - def on_GET(self, request): + def on_GET(self, request: Request) -> Tuple[int, JsonDict]: if not self.enabled or not self.policy: - return (200, {}) + return 200, {} policy = {} @@ -50,8 +54,8 @@ def on_GET(self, request): if param in self.policy: policy["m.%s" % param] = self.policy[param] - return (200, policy) + return 200, policy -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: PasswordPolicyServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/presence.py b/synapse/rest/client/presence.py similarity index 80% rename from synapse/rest/client/v1/presence.py rename to synapse/rest/client/presence.py index 2b24fe5aa65f..94dd4fe2f45c 100644 --- a/synapse/rest/client/v1/presence.py +++ b/synapse/rest/client/presence.py @@ -15,12 +15,18 @@ """ This module contains REST servlets to do with presence: /presence/ """ import logging +from typing import TYPE_CHECKING, Tuple from synapse.api.errors import AuthError, SynapseError from synapse.handlers.presence import format_user_presence_state +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request -from synapse.rest.client.v2_alpha._base import client_patterns -from synapse.types import UserID +from synapse.http.site import SynapseRequest +from synapse.rest.client._base import client_patterns +from synapse.types import JsonDict, UserID + +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -28,7 +34,7 @@ class PresenceStatusRestServlet(RestServlet): PATTERNS = client_patterns("/presence/(?P[^/]*)/status", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.presence_handler = hs.get_presence_handler() @@ -37,7 +43,9 @@ def __init__(self, hs): self._use_presence = hs.config.server.use_presence - async def on_GET(self, request, user_id): + async def on_GET( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) user = UserID.from_string(user_id) @@ -53,13 +61,15 @@ async def on_GET(self, request, user_id): raise AuthError(403, "You are not allowed to see their presence.") state = await self.presence_handler.get_state(target_user=user) - state = format_user_presence_state( + result = format_user_presence_state( state, self.clock.time_msec(), include_user_id=False ) - return 200, state + return 200, result - async def on_PUT(self, request, user_id): + async def on_PUT( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) user = UserID.from_string(user_id) @@ -91,5 +101,5 @@ async def on_PUT(self, request, user_id): return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: PresenceStatusRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/profile.py b/synapse/rest/client/profile.py similarity index 78% rename from synapse/rest/client/v1/profile.py rename to synapse/rest/client/profile.py index f42f4b35674f..c684636c0a01 100644 --- a/synapse/rest/client/v1/profile.py +++ b/synapse/rest/client/profile.py @@ -14,25 +14,34 @@ """ This module contains REST servlets to do with profile: /profile/ """ +from typing import TYPE_CHECKING, Tuple + from synapse.api.errors import Codes, SynapseError +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request -from synapse.rest.client.v2_alpha._base import client_patterns -from synapse.types import UserID +from synapse.http.site import SynapseRequest +from synapse.rest.client._base import client_patterns +from synapse.types import JsonDict, UserID + +if TYPE_CHECKING: + from synapse.server import HomeServer class ProfileDisplaynameRestServlet(RestServlet): PATTERNS = client_patterns("/profile/(?P[^/]*)/displayname", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.profile_handler = hs.get_profile_handler() self.auth = hs.get_auth() - async def on_GET(self, request, user_id): + async def on_GET( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: requester_user = None - if self.hs.config.require_auth_for_profile_requests: + if self.hs.config.server.require_auth_for_profile_requests: requester = await self.auth.get_user_by_req(request) requester_user = requester.user @@ -48,7 +57,9 @@ async def on_GET(self, request, user_id): return 200, ret - async def on_PUT(self, request, user_id): + async def on_PUT( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) user = UserID.from_string(user_id) is_admin = await self.auth.is_server_admin(requester.user) @@ -72,16 +83,18 @@ async def on_PUT(self, request, user_id): class ProfileAvatarURLRestServlet(RestServlet): PATTERNS = client_patterns("/profile/(?P[^/]*)/avatar_url", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.profile_handler = hs.get_profile_handler() self.auth = hs.get_auth() - async def on_GET(self, request, user_id): + async def on_GET( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: requester_user = None - if self.hs.config.require_auth_for_profile_requests: + if self.hs.config.server.require_auth_for_profile_requests: requester = await self.auth.get_user_by_req(request) requester_user = requester.user @@ -97,7 +110,9 @@ async def on_GET(self, request, user_id): return 200, ret - async def on_PUT(self, request, user_id): + async def on_PUT( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) user = UserID.from_string(user_id) is_admin = await self.auth.is_server_admin(requester.user) @@ -120,16 +135,18 @@ async def on_PUT(self, request, user_id): class ProfileRestServlet(RestServlet): PATTERNS = client_patterns("/profile/(?P[^/]*)", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.profile_handler = hs.get_profile_handler() self.auth = hs.get_auth() - async def on_GET(self, request, user_id): + async def on_GET( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: requester_user = None - if self.hs.config.require_auth_for_profile_requests: + if self.hs.config.server.require_auth_for_profile_requests: requester = await self.auth.get_user_by_req(request) requester_user = requester.user @@ -149,7 +166,7 @@ async def on_GET(self, request, user_id): return 200, ret -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: ProfileDisplaynameRestServlet(hs).register(http_server) ProfileAvatarURLRestServlet(hs).register(http_server) ProfileRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/push_rule.py b/synapse/rest/client/push_rule.py similarity index 76% rename from synapse/rest/client/v1/push_rule.py rename to synapse/rest/client/push_rule.py index be29a0b39ec6..6f796d5e5096 100644 --- a/synapse/rest/client/v1/push_rule.py +++ b/synapse/rest/client/push_rule.py @@ -12,22 +12,40 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, Union + +import attr + from synapse.api.errors import ( NotFoundError, StoreError, SynapseError, UnrecognizedRequestError, ) +from synapse.http.server import HttpServer from synapse.http.servlet import ( RestServlet, parse_json_value_from_request, parse_string, ) +from synapse.http.site import SynapseRequest from synapse.push.baserules import BASE_RULE_IDS, NEW_RULE_IDS from synapse.push.clientformat import format_push_rules_for_user from synapse.push.rulekinds import PRIORITY_CLASS_MAP -from synapse.rest.client.v2_alpha._base import client_patterns +from synapse.rest.client._base import client_patterns from synapse.storage.push_rule import InconsistentRuleException, RuleNotFoundException +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class RuleSpec: + scope: str + template: str + rule_id: str + attr: Optional[str] class PushRuleRestServlet(RestServlet): @@ -36,16 +54,18 @@ class PushRuleRestServlet(RestServlet): "Unrecognised request: You probably wanted a trailing slash" ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastore() self.notifier = hs.get_notifier() - self._is_worker = hs.config.worker_app is not None + self._is_worker = hs.config.worker.worker_app is not None - self._users_new_default_push_rules = hs.config.users_new_default_push_rules + self._users_new_default_push_rules = ( + hs.config.server.users_new_default_push_rules + ) - async def on_PUT(self, request, path): + async def on_PUT(self, request: SynapseRequest, path: str) -> Tuple[int, JsonDict]: if self._is_worker: raise Exception("Cannot handle PUT /push_rules on worker") @@ -57,25 +77,25 @@ async def on_PUT(self, request, path): requester = await self.auth.get_user_by_req(request) - if "/" in spec["rule_id"] or "\\" in spec["rule_id"]: + if "/" in spec.rule_id or "\\" in spec.rule_id: raise SynapseError(400, "rule_id may not contain slashes") content = parse_json_value_from_request(request) user_id = requester.user.to_string() - if "attr" in spec: + if spec.attr: await self.set_rule_attr(user_id, spec, content) self.notify_user(user_id) return 200, {} - if spec["rule_id"].startswith("."): + if spec.rule_id.startswith("."): # Rule ids starting with '.' are reserved for server default rules. raise SynapseError(400, "cannot add new rule_ids that start with '.'") try: (conditions, actions) = _rule_tuple_from_request_object( - spec["template"], spec["rule_id"], content + spec.template, spec.rule_id, content ) except InvalidRuleException as e: raise SynapseError(400, str(e)) @@ -106,7 +126,9 @@ async def on_PUT(self, request, path): return 200, {} - async def on_DELETE(self, request, path): + async def on_DELETE( + self, request: SynapseRequest, path: str + ) -> Tuple[int, JsonDict]: if self._is_worker: raise Exception("Cannot handle DELETE /push_rules on worker") @@ -127,7 +149,7 @@ async def on_DELETE(self, request, path): else: raise - async def on_GET(self, request, path): + async def on_GET(self, request: SynapseRequest, path: str) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) user_id = requester.user.to_string() @@ -138,40 +160,42 @@ async def on_GET(self, request, path): rules = format_push_rules_for_user(requester.user, rules) - path = path.split("/")[1:] + path_parts = path.split("/")[1:] - if path == []: + if path_parts == []: # we're a reference impl: pedantry is our job. raise UnrecognizedRequestError( PushRuleRestServlet.SLIGHTLY_PEDANTIC_TRAILING_SLASH_ERROR ) - if path[0] == "": + if path_parts[0] == "": return 200, rules - elif path[0] == "global": - result = _filter_ruleset_with_path(rules["global"], path[1:]) + elif path_parts[0] == "global": + result = _filter_ruleset_with_path(rules["global"], path_parts[1:]) return 200, result else: raise UnrecognizedRequestError() - def notify_user(self, user_id): + def notify_user(self, user_id: str) -> None: stream_id = self.store.get_max_push_rules_stream_id() self.notifier.on_new_event("push_rules_key", stream_id, users=[user_id]) - async def set_rule_attr(self, user_id, spec, val): - if spec["attr"] not in ("enabled", "actions"): + async def set_rule_attr( + self, user_id: str, spec: RuleSpec, val: Union[bool, JsonDict] + ) -> None: + if spec.attr not in ("enabled", "actions"): # for the sake of potential future expansion, shouldn't report # 404 in the case of an unknown request so check it corresponds to # a known attribute first. raise UnrecognizedRequestError() namespaced_rule_id = _namespaced_rule_id_from_spec(spec) - rule_id = spec["rule_id"] + rule_id = spec.rule_id is_default_rule = rule_id.startswith(".") if is_default_rule: if namespaced_rule_id not in BASE_RULE_IDS: raise NotFoundError("Unknown rule %s" % (namespaced_rule_id,)) - if spec["attr"] == "enabled": + if spec.attr == "enabled": if isinstance(val, dict) and "enabled" in val: val = val["enabled"] if not isinstance(val, bool): @@ -179,14 +203,18 @@ async def set_rule_attr(self, user_id, spec, val): # This should *actually* take a dict, but many clients pass # bools directly, so let's not break them. raise SynapseError(400, "Value for 'enabled' must be boolean") - return await self.store.set_push_rule_enabled( + await self.store.set_push_rule_enabled( user_id, namespaced_rule_id, val, is_default_rule ) - elif spec["attr"] == "actions": + elif spec.attr == "actions": + if not isinstance(val, dict): + raise SynapseError(400, "Value must be a dict") actions = val.get("actions") + if not isinstance(actions, list): + raise SynapseError(400, "Value for 'actions' must be dict") _check_actions(actions) namespaced_rule_id = _namespaced_rule_id_from_spec(spec) - rule_id = spec["rule_id"] + rule_id = spec.rule_id is_default_rule = rule_id.startswith(".") if is_default_rule: if user_id in self._users_new_default_push_rules: @@ -196,22 +224,21 @@ async def set_rule_attr(self, user_id, spec, val): if namespaced_rule_id not in rule_ids: raise SynapseError(404, "Unknown rule %r" % (namespaced_rule_id,)) - return await self.store.set_push_rule_actions( + await self.store.set_push_rule_actions( user_id, namespaced_rule_id, actions, is_default_rule ) else: raise UnrecognizedRequestError() -def _rule_spec_from_path(path): +def _rule_spec_from_path(path: Sequence[str]) -> RuleSpec: """Turn a sequence of path components into a rule spec Args: - path (sequence[unicode]): the URL path components. + path: the URL path components. Returns: - dict: rule spec dict, containing scope/template/rule_id entries, - and possibly attr. + rule spec, containing scope/template/rule_id entries, and possibly attr. Raises: UnrecognizedRequestError if the path components cannot be parsed. @@ -237,17 +264,18 @@ def _rule_spec_from_path(path): rule_id = path[0] - spec = {"scope": scope, "template": template, "rule_id": rule_id} - path = path[1:] + attr = None if len(path) > 0 and len(path[0]) > 0: - spec["attr"] = path[0] + attr = path[0] - return spec + return RuleSpec(scope, template, rule_id, attr) -def _rule_tuple_from_request_object(rule_template, rule_id, req_obj): +def _rule_tuple_from_request_object( + rule_template: str, rule_id: str, req_obj: JsonDict +) -> Tuple[List[JsonDict], List[Union[str, JsonDict]]]: if rule_template in ["override", "underride"]: if "conditions" not in req_obj: raise InvalidRuleException("Missing 'conditions'") @@ -277,7 +305,7 @@ def _rule_tuple_from_request_object(rule_template, rule_id, req_obj): return conditions, actions -def _check_actions(actions): +def _check_actions(actions: List[Union[str, JsonDict]]) -> None: if not isinstance(actions, list): raise InvalidRuleException("No actions found") @@ -290,7 +318,7 @@ def _check_actions(actions): raise InvalidRuleException("Unrecognised action") -def _filter_ruleset_with_path(ruleset, path): +def _filter_ruleset_with_path(ruleset: JsonDict, path: List[str]) -> JsonDict: if path == []: raise UnrecognizedRequestError( PushRuleRestServlet.SLIGHTLY_PEDANTIC_TRAILING_SLASH_ERROR @@ -315,7 +343,7 @@ def _filter_ruleset_with_path(ruleset, path): if r["rule_id"] == rule_id: the_rule = r if the_rule is None: - raise NotFoundError + raise NotFoundError() path = path[1:] if len(path) == 0: @@ -330,25 +358,25 @@ def _filter_ruleset_with_path(ruleset, path): raise UnrecognizedRequestError() -def _priority_class_from_spec(spec): - if spec["template"] not in PRIORITY_CLASS_MAP.keys(): - raise InvalidRuleException("Unknown template: %s" % (spec["template"])) - pc = PRIORITY_CLASS_MAP[spec["template"]] +def _priority_class_from_spec(spec: RuleSpec) -> int: + if spec.template not in PRIORITY_CLASS_MAP.keys(): + raise InvalidRuleException("Unknown template: %s" % (spec.template)) + pc = PRIORITY_CLASS_MAP[spec.template] return pc -def _namespaced_rule_id_from_spec(spec): - return _namespaced_rule_id(spec, spec["rule_id"]) +def _namespaced_rule_id_from_spec(spec: RuleSpec) -> str: + return _namespaced_rule_id(spec, spec.rule_id) -def _namespaced_rule_id(spec, rule_id): - return "global/%s/%s" % (spec["template"], rule_id) +def _namespaced_rule_id(spec: RuleSpec, rule_id: str) -> str: + return "global/%s/%s" % (spec.template, rule_id) class InvalidRuleException(Exception): pass -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: PushRuleRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/pusher.py b/synapse/rest/client/pusher.py similarity index 87% rename from synapse/rest/client/v1/pusher.py rename to synapse/rest/client/pusher.py index 18102eca6c1b..98604a93887f 100644 --- a/synapse/rest/client/v1/pusher.py +++ b/synapse/rest/client/pusher.py @@ -13,17 +13,23 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple from synapse.api.errors import Codes, StoreError, SynapseError -from synapse.http.server import respond_with_html_bytes +from synapse.http.server import HttpServer, respond_with_html_bytes from synapse.http.servlet import ( RestServlet, assert_params_in_dict, parse_json_object_from_request, parse_string, ) +from synapse.http.site import SynapseRequest from synapse.push import PusherConfigException -from synapse.rest.client.v2_alpha._base import client_patterns +from synapse.rest.client._base import client_patterns +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -31,12 +37,12 @@ class PushersRestServlet(RestServlet): PATTERNS = client_patterns("/pushers$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() - async def on_GET(self, request): + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) user = requester.user @@ -50,14 +56,14 @@ async def on_GET(self, request): class PushersSetRestServlet(RestServlet): PATTERNS = client_patterns("/pushers/set$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() self.notifier = hs.get_notifier() self.pusher_pool = self.hs.get_pusherpool() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) user = requester.user @@ -132,14 +138,14 @@ class PushersRemoveRestServlet(RestServlet): PATTERNS = client_patterns("/pushers/remove$", v1=True) SUCCESS_HTML = b"You have been unsubscribed" - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.notifier = hs.get_notifier() self.auth = hs.get_auth() self.pusher_pool = self.hs.get_pusherpool() - async def on_GET(self, request): + async def on_GET(self, request: SynapseRequest) -> None: requester = await self.auth.get_user_by_req(request, rights="delete_pusher") user = requester.user @@ -165,7 +171,7 @@ async def on_GET(self, request): return None -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: PushersRestServlet(hs).register(http_server) PushersSetRestServlet(hs).register(http_server) PushersRemoveRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/read_marker.py b/synapse/rest/client/read_marker.py similarity index 84% rename from synapse/rest/client/v2_alpha/read_marker.py rename to synapse/rest/client/read_marker.py index 027f8b81fa93..43c04fac6fdb 100644 --- a/synapse/rest/client/v2_alpha/read_marker.py +++ b/synapse/rest/client/read_marker.py @@ -13,27 +13,36 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple from synapse.api.constants import ReadReceiptEventFields from synapse.api.errors import Codes, SynapseError +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class ReadMarkerRestServlet(RestServlet): PATTERNS = client_patterns("/rooms/(?P[^/]*)/read_markers$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.receipts_handler = hs.get_receipts_handler() self.read_marker_handler = hs.get_read_marker_handler() self.presence_handler = hs.get_presence_handler() - async def on_POST(self, request, room_id): + async def on_POST( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) await self.presence_handler.bump_presence_active_time(requester.user) @@ -70,5 +79,5 @@ async def on_POST(self, request, room_id): return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: ReadMarkerRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/receipts.py b/synapse/rest/client/receipts.py similarity index 69% rename from synapse/rest/client/v2_alpha/receipts.py rename to synapse/rest/client/receipts.py index d9ab836cd892..2b25b9aad6a3 100644 --- a/synapse/rest/client/v2_alpha/receipts.py +++ b/synapse/rest/client/receipts.py @@ -13,13 +13,24 @@ # limitations under the License. import logging +import re +from typing import TYPE_CHECKING, Tuple from synapse.api.constants import ReadReceiptEventFields from synapse.api.errors import Codes, SynapseError +from synapse.http import get_request_user_agent +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from ._base import client_patterns +pattern = re.compile(r"(?:Element|SchildiChat)/1\.[012]\.") + +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -30,20 +41,28 @@ class ReceiptRestServlet(RestServlet): "/(?P[^/]*)$" ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() self.receipts_handler = hs.get_receipts_handler() self.presence_handler = hs.get_presence_handler() - async def on_POST(self, request, room_id, receipt_type, event_id): + async def on_POST( + self, request: SynapseRequest, room_id: str, receipt_type: str, event_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) if receipt_type != "m.read": raise SynapseError(400, "Receipt type must be 'm.read'") - body = parse_json_object_from_request(request, allow_empty_body=True) + # Do not allow older SchildiChat and Element Android clients (prior to Element/1.[012].x) to send an empty body. + user_agent = get_request_user_agent(request) + allow_empty_body = False + if "Android" in user_agent: + if pattern.match(user_agent) or "Riot" in user_agent: + allow_empty_body = True + body = parse_json_object_from_request(request, allow_empty_body) hidden = body.get(ReadReceiptEventFields.MSC2285_HIDDEN, False) if not isinstance(hidden, bool): @@ -67,5 +86,5 @@ async def on_POST(self, request, room_id, receipt_type, event_id): return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: ReceiptRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/register.py similarity index 80% rename from synapse/rest/client/v2_alpha/register.py rename to synapse/rest/client/register.py index 4d31584acd20..8b56c76aed66 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/register.py @@ -12,10 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import hmac import logging import random -from typing import List, Union +from typing import TYPE_CHECKING, List, Optional, Tuple + +from twisted.web.server import Request import synapse import synapse.api.auth @@ -28,23 +29,22 @@ ThreepidValidationError, UnrecognizedRequestError, ) +from synapse.api.ratelimiting import Ratelimiter from synapse.config import ConfigError -from synapse.config.captcha import CaptchaConfig -from synapse.config.consent import ConsentConfig from synapse.config.emailconfig import ThreepidBehaviour +from synapse.config.homeserver import HomeServerConfig from synapse.config.ratelimiting import FederationRateLimitConfig -from synapse.config.registration import RegistrationConfig from synapse.config.server import is_threepid_reserved from synapse.handlers.auth import AuthHandler from synapse.handlers.ui_auth import UIAuthSessionDataConstants -from synapse.http.server import finish_request, respond_with_html +from synapse.http.server import HttpServer, finish_request, respond_with_html from synapse.http.servlet import ( RestServlet, assert_params_in_dict, - parse_boolean, parse_json_object_from_request, parse_string, ) +from synapse.http.site import SynapseRequest from synapse.metrics import threepid_send_requests from synapse.push.mailer import Mailer from synapse.types import JsonDict @@ -59,17 +59,8 @@ from ._base import client_patterns, interactive_auth_handler -# We ought to be using hmac.compare_digest() but on older pythons it doesn't -# exist. It's a _really minor_ security flaw to use plain string comparison -# because the timing attack is so obscured by all the other code here it's -# unlikely to make much difference -if hasattr(hmac, "compare_digest"): - compare_digest = hmac.compare_digest -else: - - def compare_digest(a, b): - return a == b - +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -77,27 +68,25 @@ def compare_digest(a, b): class EmailRegisterRequestTokenRestServlet(RestServlet): PATTERNS = client_patterns("/register/email/requestToken$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.identity_handler = hs.get_identity_handler() self.config = hs.config - if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: self.mailer = Mailer( hs=self.hs, - app_name=self.config.email_app_name, - template_html=self.config.email_registration_template_html, - template_text=self.config.email_registration_template_text, + app_name=self.config.email.email_app_name, + template_html=self.config.email.email_registration_template_html, + template_text=self.config.email.email_registration_template_text, ) - async def on_POST(self, request): - if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.hs.config.local_threepid_handling_disabled_due_to_email_config: + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: + if ( + self.hs.config.email.local_threepid_handling_disabled_due_to_email_config + ): logger.warning( "Email registration has been disabled due to lack of email config" ) @@ -115,7 +104,7 @@ async def on_POST(self, request): # For emails, canonicalise the address. # We store all email addresses canonicalised in the DB. # (See on_POST in EmailThreepidRequestTokenRestServlet - # in synapse/rest/client/v2_alpha/account.py) + # in synapse/rest/client/account.py) try: email = validate_email(body["email"]) except ValueError as e: @@ -139,7 +128,7 @@ async def on_POST(self, request): ) if existing_user_id is not None: - if self.hs.config.request_token_inhibit_3pid_errors: + if self.hs.config.server.request_token_inhibit_3pid_errors: # Make the client think the operation succeeded. See the rationale in the # comments for request_token_inhibit_3pid_errors. # Also wait for some random amount of time between 100ms and 1s to make it @@ -149,12 +138,12 @@ async def on_POST(self, request): raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) - if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - assert self.hs.config.account_threepid_delegate_email + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + assert self.hs.config.registration.account_threepid_delegate_email # Have the configured identity server handle the request ret = await self.identity_handler.requestEmailToken( - self.hs.config.account_threepid_delegate_email, + self.hs.config.registration.account_threepid_delegate_email, email, client_secret, send_attempt, @@ -183,16 +172,12 @@ async def on_POST(self, request): class MsisdnRegisterRequestTokenRestServlet(RestServlet): PATTERNS = client_patterns("/register/msisdn/requestToken$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.identity_handler = hs.get_identity_handler() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: body = parse_json_object_from_request(request) assert_params_in_dict( @@ -223,7 +208,7 @@ async def on_POST(self, request): ) if existing_user_id is not None: - if self.hs.config.request_token_inhibit_3pid_errors: + if self.hs.config.server.request_token_inhibit_3pid_errors: # Make the client think the operation succeeded. See the rationale in the # comments for request_token_inhibit_3pid_errors. # Also wait for some random amount of time between 100ms and 1s to make it @@ -235,7 +220,7 @@ async def on_POST(self, request): 400, "Phone number is already in use", Codes.THREEPID_IN_USE ) - if not self.hs.config.account_threepid_delegate_msisdn: + if not self.hs.config.registration.account_threepid_delegate_msisdn: logger.warning( "No upstream msisdn account_threepid_delegate configured on the server to " "handle this request" @@ -245,7 +230,7 @@ async def on_POST(self, request): ) ret = await self.identity_handler.requestMsisdnToken( - self.hs.config.account_threepid_delegate_msisdn, + self.hs.config.registration.account_threepid_delegate_msisdn, country, phone_number, client_secret, @@ -267,11 +252,7 @@ class RegistrationSubmitTokenServlet(RestServlet): "/registration/(?P[^/]*)/submit_token$", releases=(), unstable=True ) - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() @@ -279,18 +260,18 @@ def __init__(self, hs): self.clock = hs.get_clock() self.store = hs.get_datastore() - if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: self._failure_email_template = ( - self.config.email_registration_template_failure_html + self.config.email.email_registration_template_failure_html ) - async def on_GET(self, request, medium): + async def on_GET(self, request: Request, medium: str) -> None: if medium != "email": raise SynapseError( 400, "This medium is currently not supported for registration" ) - if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.config.local_threepid_handling_disabled_due_to_email_config: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.email.local_threepid_handling_disabled_due_to_email_config: logger.warning( "User registration via email has been disabled due to lack of email config" ) @@ -323,7 +304,7 @@ async def on_GET(self, request, medium): return None # Otherwise show the success template - html = self.config.email_registration_template_success_html_content + html = self.config.email.email_registration_template_success_html_content status_code = 200 except ThreepidValidationError as e: status_code = e.code @@ -338,11 +319,7 @@ async def on_GET(self, request, medium): class UsernameAvailabilityRestServlet(RestServlet): PATTERNS = client_patterns("/register/available") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.registration_handler = hs.get_registration_handler() @@ -354,16 +331,16 @@ def __init__(self, hs): # Artificially delay requests if rate > sleep_limit/window_size sleep_limit=1, # Amount of artificial delay to apply - sleep_msec=1000, + sleep_delay=1000, # Error with 429 if more than reject_limit requests are queued reject_limit=1, # Allow 1 request at a time - concurrent_requests=1, + concurrent=1, ), ) - async def on_GET(self, request): - if not self.hs.config.enable_registration: + async def on_GET(self, request: Request) -> Tuple[int, JsonDict]: + if not self.hs.config.registration.enable_registration: raise SynapseError( 403, "Registration has been disabled", errcode=Codes.FORBIDDEN ) @@ -379,14 +356,55 @@ async def on_GET(self, request): return 200, {"available": True} +class RegistrationTokenValidityRestServlet(RestServlet): + """Check the validity of a registration token. + + Example: + + GET /_matrix/client/unstable/org.matrix.msc3231/register/org.matrix.msc3231.login.registration_token/validity?token=abcd + + 200 OK + + { + "valid": true + } + """ + + PATTERNS = client_patterns( + f"/org.matrix.msc3231/register/{LoginType.REGISTRATION_TOKEN}/validity", + releases=(), + unstable=True, + ) + + def __init__(self, hs: "HomeServer"): + super().__init__() + self.hs = hs + self.store = hs.get_datastore() + self.ratelimiter = Ratelimiter( + store=self.store, + clock=hs.get_clock(), + rate_hz=hs.config.ratelimiting.rc_registration_token_validity.per_second, + burst_count=hs.config.ratelimiting.rc_registration_token_validity.burst_count, + ) + + async def on_GET(self, request: Request) -> Tuple[int, JsonDict]: + await self.ratelimiter.ratelimit(None, (request.getClientIP(),)) + + if not self.hs.config.registration.enable_registration: + raise SynapseError( + 403, "Registration has been disabled", errcode=Codes.FORBIDDEN + ) + + token = parse_string(request, "token", required=True) + valid = await self.store.registration_token_is_valid(token) + + return 200, {"valid": valid} + + class RegisterRestServlet(RestServlet): PATTERNS = client_patterns("/register$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs @@ -400,41 +418,42 @@ def __init__(self, hs): self.ratelimiter = hs.get_registration_ratelimiter() self.password_policy_handler = hs.get_password_policy_handler() self.clock = hs.get_clock() - self._registration_enabled = self.hs.config.enable_registration - self._msc2918_enabled = hs.config.access_token_lifetime is not None + self._registration_enabled = self.hs.config.registration.enable_registration + self._refresh_tokens_enabled = ( + hs.config.registration.refreshable_access_token_lifetime is not None + ) self._registration_flows = _calculate_registration_flows( hs.config, self.auth_handler ) @interactive_auth_handler - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: body = parse_json_object_from_request(request) client_addr = request.getClientIP() await self.ratelimiter.ratelimit(None, client_addr, update=False) - kind = b"user" - if b"kind" in request.args: - kind = request.args[b"kind"][0] + kind = parse_string(request, "kind", default="user") - if kind == b"guest": + if kind == "guest": ret = await self._do_guest_registration(body, address=client_addr) return ret - elif kind != b"user": + elif kind != "user": raise UnrecognizedRequestError( - "Do not understand membership kind: %s" % (kind.decode("utf8"),) + f"Do not understand membership kind: {kind}", ) - if self._msc2918_enabled: - # Check if this registration should also issue a refresh token, as - # per MSC2918 - should_issue_refresh_token = parse_boolean( - request, name="org.matrix.msc2918.refresh_token", default=False - ) - else: - should_issue_refresh_token = False + # Check if the clients wishes for this registration to issue a refresh + # token. + client_requested_refresh_tokens = body.get("refresh_token", False) + if not isinstance(client_requested_refresh_tokens, bool): + raise SynapseError(400, "`refresh_token` should be true or false.") + + should_issue_refresh_token = ( + self._refresh_tokens_enabled and client_requested_refresh_tokens + ) # Pull out the provided username and do basic sanity checks early since # the auth layer will store these in sessions. @@ -631,7 +650,7 @@ async def on_POST(self, request): # For emails, canonicalise the address. # We store all email addresses canonicalised in the DB. # (See on_POST in EmailThreepidRequestTokenRestServlet - # in synapse/rest/client/v2_alpha/account.py) + # in synapse/rest/client/account.py) if medium == "email": try: address = canonicalise_email(address) @@ -665,7 +684,7 @@ async def on_POST(self, request): # written to the db if threepid: if is_threepid_reserved( - self.hs.config.mau_limits_reserved_threepids, threepid + self.hs.config.server.mau_limits_reserved_threepids, threepid ): await self.store.upsert_monthly_active_user(registered_user_id) @@ -686,6 +705,22 @@ async def on_POST(self, request): ) if registered: + # Check if a token was used to authenticate registration + registration_token = await self.auth_handler.get_session_data( + session_id, + UIAuthSessionDataConstants.REGISTRATION_TOKEN, + ) + if registration_token: + # Increment the `completed` counter for the token + await self.store.use_registration_token(registration_token) + # Indicate that the token has been successfully used so that + # pending is not decremented again when expiring old UIA sessions. + await self.store.mark_ui_auth_stage_complete( + session_id, + LoginType.REGISTRATION_TOKEN, + True, + ) + await self.registration_handler.post_registration_actions( user_id=registered_user_id, auth_result=auth_result, @@ -695,8 +730,12 @@ async def on_POST(self, request): return 200, return_dict async def _do_appservice_registration( - self, username, as_token, body, should_issue_refresh_token: bool = False - ): + self, + username: str, + as_token: str, + body: JsonDict, + should_issue_refresh_token: bool = False, + ) -> JsonDict: user_id = await self.registration_handler.appservice_register( username, as_token ) @@ -713,7 +752,7 @@ async def _create_registration_details( params: JsonDict, is_appservice_ghost: bool = False, should_issue_refresh_token: bool = False, - ): + ) -> JsonDict: """Complete registration of newly-registered user Allocates device_id if one was not given; also creates access_token. @@ -728,7 +767,10 @@ async def _create_registration_details( Returns: dictionary for response from /register """ - result = {"user_id": user_id, "home_server": self.hs.hostname} + result: JsonDict = { + "user_id": user_id, + "home_server": self.hs.hostname, + } if not params.get("inhibit_login", False): device_id = params.get("device_id") initial_display_name = params.get("initial_device_display_name") @@ -757,8 +799,10 @@ async def _create_registration_details( return result - async def _do_guest_registration(self, params, address=None): - if not self.hs.config.allow_guest_access: + async def _do_guest_registration( + self, params: JsonDict, address: Optional[str] = None + ) -> Tuple[int, JsonDict]: + if not self.hs.config.registration.allow_guest_access: raise SynapseError(403, "Guest access is disabled") user_id = await self.registration_handler.register_user( make_guest=True, address=address @@ -777,7 +821,7 @@ async def _do_guest_registration(self, params, address=None): user_id, device_id, initial_display_name, is_guest=True ) - result = { + result: JsonDict = { "user_id": user_id, "device_id": device_id, "access_token": access_token, @@ -795,9 +839,7 @@ async def _do_guest_registration(self, params, address=None): def _calculate_registration_flows( - # technically `config` has to provide *all* of these interfaces, not just one - config: Union[RegistrationConfig, ConsentConfig, CaptchaConfig], - auth_handler: AuthHandler, + config: HomeServerConfig, auth_handler: AuthHandler ) -> List[List[str]]: """Get a suitable flows list for registration @@ -809,13 +851,13 @@ def _calculate_registration_flows( """ # FIXME: need a better error than "no auth flow found" for scenarios # where we required 3PID for registration but the user didn't give one - require_email = "email" in config.registrations_require_3pid - require_msisdn = "msisdn" in config.registrations_require_3pid + require_email = "email" in config.registration.registrations_require_3pid + require_msisdn = "msisdn" in config.registration.registrations_require_3pid show_msisdn = True show_email = True - if config.disable_msisdn_registration: + if config.registration.disable_msisdn_registration: show_msisdn = False require_msisdn = False @@ -859,21 +901,27 @@ def _calculate_registration_flows( flows.append([LoginType.MSISDN, LoginType.EMAIL_IDENTITY]) # Prepend m.login.terms to all flows if we're requiring consent - if config.user_consent_at_registration: + if config.consent.user_consent_at_registration: for flow in flows: flow.insert(0, LoginType.TERMS) # Prepend recaptcha to all flows if we're requiring captcha - if config.enable_registration_captcha: + if config.captcha.enable_registration_captcha: for flow in flows: flow.insert(0, LoginType.RECAPTCHA) + # Prepend registration token to all flows if we're requiring a token + if config.registration.registration_requires_token: + for flow in flows: + flow.insert(0, LoginType.REGISTRATION_TOKEN) + return flows -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: EmailRegisterRequestTokenRestServlet(hs).register(http_server) MsisdnRegisterRequestTokenRestServlet(hs).register(http_server) UsernameAvailabilityRestServlet(hs).register(http_server) RegistrationSubmitTokenServlet(hs).register(http_server) + RegistrationTokenValidityRestServlet(hs).register(http_server) RegisterRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/relations.py b/synapse/rest/client/relations.py similarity index 80% rename from synapse/rest/client/v2_alpha/relations.py rename to synapse/rest/client/relations.py index 0821cd285fd3..fc4e6921c5e6 100644 --- a/synapse/rest/client/v2_alpha/relations.py +++ b/synapse/rest/client/relations.py @@ -19,25 +19,32 @@ """ import logging +from typing import TYPE_CHECKING, Awaitable, Optional, Tuple from synapse.api.constants import EventTypes, RelationTypes from synapse.api.errors import ShadowBanError, SynapseError +from synapse.http.server import HttpServer from synapse.http.servlet import ( RestServlet, parse_integer, parse_json_object_from_request, parse_string, ) +from synapse.http.site import SynapseRequest from synapse.rest.client.transactions import HttpTransactionCache from synapse.storage.relations import ( AggregationPaginationToken, PaginationChunk, RelationPaginationToken, ) +from synapse.types import JsonDict from synapse.util.stringutils import random_string from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -59,13 +66,13 @@ class RelationSendServlet(RestServlet): "/(?P[^/]*)/(?P[^/]*)/(?P[^/]*)" ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.event_creation_handler = hs.get_event_creation_handler() self.txns = HttpTransactionCache(hs) - def register(self, http_server): + def register(self, http_server: HttpServer) -> None: http_server.register_paths( "POST", client_patterns(self.PATTERN + "$", releases=()), @@ -79,14 +86,35 @@ def register(self, http_server): self.__class__.__name__, ) - def on_PUT(self, request, *args, **kwargs): + def on_PUT( + self, + request: SynapseRequest, + room_id: str, + parent_id: str, + relation_type: str, + event_type: str, + txn_id: Optional[str] = None, + ) -> Awaitable[Tuple[int, JsonDict]]: return self.txns.fetch_or_execute_request( - request, self.on_PUT_or_POST, request, *args, **kwargs + request, + self.on_PUT_or_POST, + request, + room_id, + parent_id, + relation_type, + event_type, + txn_id, ) async def on_PUT_or_POST( - self, request, room_id, parent_id, relation_type, event_type, txn_id=None - ): + self, + request: SynapseRequest, + room_id: str, + parent_id: str, + relation_type: str, + event_type: str, + txn_id: Optional[str] = None, + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) if event_type == EventTypes.Member: @@ -100,9 +128,10 @@ async def on_PUT_or_POST( content["m.relates_to"] = { "event_id": parent_id, - "key": aggregation_key, "rel_type": relation_type, } + if aggregation_key is not None: + content["m.relates_to"]["key"] = aggregation_key event_dict = { "type": event_type, @@ -136,7 +165,7 @@ class RelationPaginationServlet(RestServlet): releases=(), ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastore() @@ -145,8 +174,13 @@ def __init__(self, hs): self.event_handler = hs.get_event_handler() async def on_GET( - self, request, room_id, parent_id, relation_type=None, event_type=None - ): + self, + request: SynapseRequest, + room_id: str, + parent_id: str, + relation_type: Optional[str] = None, + event_type: Optional[str] = None, + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) await self.auth.check_user_in_room_or_world_readable( @@ -156,6 +190,8 @@ async def on_GET( # This gets the original event and checks that a) the event exists and # b) the user is allowed to view it. event = await self.event_handler.get_event(requester.user, room_id, parent_id) + if event is None: + raise SynapseError(404, "Unknown parent event.") limit = parse_integer(request, "limit", default=5) from_token_str = parse_string(request, "from") @@ -188,21 +224,17 @@ async def on_GET( ) now = self.clock.time_msec() - # We set bundle_aggregations to False when retrieving the original - # event because we want the content before relations were applied to - # it. + # Do not bundle aggregations when retrieving the original event because + # we want the content before relations are applied to it. original_event = await self._event_serializer.serialize_event( event, now, bundle_aggregations=False ) - # Similarly, we don't allow relations to be applied to relations, so we - # return the original relations without any aggregations on top of them - # here. - events = await self._event_serializer.serialize_events( - events, now, bundle_aggregations=False - ) + # The relations returned for the requested event do include their + # bundled aggregations. + serialized_events = await self._event_serializer.serialize_events(events, now) return_value = pagination_chunk.to_dict() - return_value["chunk"] = events + return_value["chunk"] = serialized_events return_value["original_event"] = original_event return 200, return_value @@ -233,15 +265,20 @@ class RelationAggregationPaginationServlet(RestServlet): releases=(), ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastore() self.event_handler = hs.get_event_handler() async def on_GET( - self, request, room_id, parent_id, relation_type=None, event_type=None - ): + self, + request: SynapseRequest, + room_id: str, + parent_id: str, + relation_type: Optional[str] = None, + event_type: Optional[str] = None, + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) await self.auth.check_user_in_room_or_world_readable( @@ -253,9 +290,13 @@ async def on_GET( # This checks that a) the event exists and b) the user is allowed to # view it. event = await self.event_handler.get_event(requester.user, room_id, parent_id) + if event is None: + raise SynapseError(404, "Unknown parent event.") if relation_type not in (RelationTypes.ANNOTATION, None): - raise SynapseError(400, "Relation type must be 'annotation'") + raise SynapseError( + 400, f"Relation type must be '{RelationTypes.ANNOTATION}'" + ) limit = parse_integer(request, "limit", default=5) from_token_str = parse_string(request, "from") @@ -315,7 +356,7 @@ class RelationAggregationGroupPaginationServlet(RestServlet): releases=(), ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastore() @@ -323,7 +364,15 @@ def __init__(self, hs): self._event_serializer = hs.get_event_client_serializer() self.event_handler = hs.get_event_handler() - async def on_GET(self, request, room_id, parent_id, relation_type, event_type, key): + async def on_GET( + self, + request: SynapseRequest, + room_id: str, + parent_id: str, + relation_type: str, + event_type: str, + key: str, + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) await self.auth.check_user_in_room_or_world_readable( @@ -366,15 +415,15 @@ async def on_GET(self, request, room_id, parent_id, relation_type, event_type, k ) now = self.clock.time_msec() - events = await self._event_serializer.serialize_events(events, now) + serialized_events = await self._event_serializer.serialize_events(events, now) return_value = result.to_dict() - return_value["chunk"] = events + return_value["chunk"] = serialized_events return 200, return_value -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: RelationSendServlet(hs).register(http_server) RelationPaginationServlet(hs).register(http_server) RelationAggregationPaginationServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/report_event.py b/synapse/rest/client/report_event.py similarity index 81% rename from synapse/rest/client/v2_alpha/report_event.py rename to synapse/rest/client/report_event.py index 07ea39a8a3e4..d4a4adb50c59 100644 --- a/synapse/rest/client/v2_alpha/report_event.py +++ b/synapse/rest/client/report_event.py @@ -14,26 +14,35 @@ import logging from http import HTTPStatus +from typing import TYPE_CHECKING, Tuple from synapse.api.errors import Codes, SynapseError +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class ReportEventRestServlet(RestServlet): PATTERNS = client_patterns("/rooms/(?P[^/]*)/report/(?P[^/]*)$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() self.clock = hs.get_clock() self.store = hs.get_datastore() - async def on_POST(self, request, room_id, event_id): + async def on_POST( + self, request: SynapseRequest, room_id: str, event_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) user_id = requester.user.to_string() @@ -64,5 +73,5 @@ async def on_POST(self, request, room_id, event_id): return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: ReportEventRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/room.py similarity index 58% rename from synapse/rest/client/v1/room.py rename to synapse/rest/client/room.py index 8789d565a08d..a69145d1214d 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/room.py @@ -16,22 +16,25 @@ """ This module contains REST servlets to do with rooms: /rooms/ """ import logging import re -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple from urllib import parse as urlparse -from synapse.api.constants import EventContentFields, EventTypes, Membership +from twisted.web.server import Request + +from synapse.api.constants import EventTypes, Membership from synapse.api.errors import ( AuthError, Codes, - HttpResponseException, InvalidClientCredentialsError, + MissingClientTokenError, ShadowBanError, SynapseError, ) from synapse.api.filtering import Filter -from synapse.appservice import ApplicationService from synapse.events.utils import format_event_for_client_v2 +from synapse.http.server import HttpServer from synapse.http.servlet import ( + ResolveRoomIdMixin, RestServlet, assert_params_in_dict, parse_boolean, @@ -42,20 +45,11 @@ ) from synapse.http.site import SynapseRequest from synapse.logging.opentracing import set_tag +from synapse.rest.client._base import client_patterns from synapse.rest.client.transactions import HttpTransactionCache -from synapse.rest.client.v2_alpha._base import client_patterns from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig -from synapse.types import ( - JsonDict, - Requester, - RoomAlias, - RoomID, - StreamToken, - ThirdPartyInstanceID, - UserID, - create_requester, -) +from synapse.types import JsonDict, StreamToken, ThirdPartyInstanceID, UserID from synapse.util import json_decoder from synapse.util.stringutils import parse_and_validate_server_name, random_string @@ -66,7 +60,7 @@ class TransactionRestServlet(RestServlet): - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.txns = HttpTransactionCache(hs) @@ -74,20 +68,22 @@ def __init__(self, hs): class RoomCreateRestServlet(TransactionRestServlet): # No PATTERN; we have custom dispatch rules here - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self._room_creation_handler = hs.get_room_creation_handler() self.auth = hs.get_auth() - def register(self, http_server): + def register(self, http_server: HttpServer) -> None: PATTERNS = "/createRoom" register_txn_path(self, PATTERNS, http_server) - def on_PUT(self, request, txn_id): + def on_PUT( + self, request: SynapseRequest, txn_id: str + ) -> Awaitable[Tuple[int, JsonDict]]: set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request(request, self.on_POST, request) - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_limited=False) info, _ = await self._room_creation_handler.create_room( @@ -96,21 +92,21 @@ async def on_POST(self, request): return 200, info - def get_room_config(self, request): + def get_room_config(self, request: Request) -> JsonDict: user_supplied_config = parse_json_object_from_request(request) return user_supplied_config # TODO: Needs unit testing for generic events class RoomStateEventRestServlet(TransactionRestServlet): - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.event_creation_handler = hs.get_event_creation_handler() self.room_member_handler = hs.get_room_member_handler() self.message_handler = hs.get_message_handler() self.auth = hs.get_auth() - def register(self, http_server): + def register(self, http_server: HttpServer) -> None: # /room/$roomid/state/$eventtype no_state_key = "/rooms/(?P[^/]*)/state/(?P[^/]*)$" @@ -145,13 +141,19 @@ def register(self, http_server): self.__class__.__name__, ) - def on_GET_no_state_key(self, request, room_id, event_type): + def on_GET_no_state_key( + self, request: SynapseRequest, room_id: str, event_type: str + ) -> Awaitable[Tuple[int, JsonDict]]: return self.on_GET(request, room_id, event_type, "") - def on_PUT_no_state_key(self, request, room_id, event_type): + def on_PUT_no_state_key( + self, request: SynapseRequest, room_id: str, event_type: str + ) -> Awaitable[Tuple[int, JsonDict]]: return self.on_PUT(request, room_id, event_type, "") - async def on_GET(self, request, room_id, event_type, state_key): + async def on_GET( + self, request: SynapseRequest, room_id: str, event_type: str, state_key: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) format = parse_string( request, "format", default="content", allowed_values=["content", "event"] @@ -174,7 +176,17 @@ async def on_GET(self, request, room_id, event_type, state_key): elif format == "content": return 200, data.get_dict()["content"] - async def on_PUT(self, request, room_id, event_type, state_key, txn_id=None): + # Format must be event or content, per the parse_string call above. + raise RuntimeError(f"Unknown format: {format:r}.") + + async def on_PUT( + self, + request: SynapseRequest, + room_id: str, + event_type: str, + state_key: str, + txn_id: Optional[str] = None, + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) if txn_id: @@ -220,27 +232,35 @@ async def on_PUT(self, request, room_id, event_type, state_key, txn_id=None): # TODO: Needs unit testing for generic events + feedback class RoomSendEventRestServlet(TransactionRestServlet): - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.event_creation_handler = hs.get_event_creation_handler() self.auth = hs.get_auth() - def register(self, http_server): + def register(self, http_server: HttpServer) -> None: # /rooms/$roomid/send/$event_type[/$txn_id] PATTERNS = "/rooms/(?P[^/]*)/send/(?P[^/]*)" register_txn_path(self, PATTERNS, http_server, with_get=True) - async def on_POST(self, request, room_id, event_type, txn_id=None): + async def on_POST( + self, + request: SynapseRequest, + room_id: str, + event_type: str, + txn_id: Optional[str] = None, + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) content = parse_json_object_from_request(request) - event_dict = { + event_dict: JsonDict = { "type": event_type, "content": content, "room_id": room_id, "sender": requester.user.to_string(), } + # Twisted will have processed the args by now. + assert request.args is not None if b"ts" in request.args and requester.app_service: event_dict["origin_server_ts"] = parse_integer(request, "ts", 0) @@ -258,10 +278,14 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): set_tag("event_id", event_id) return 200, {"event_id": event_id} - def on_GET(self, request, room_id, event_type, txn_id): + def on_GET( + self, request: SynapseRequest, room_id: str, event_type: str, txn_id: str + ) -> Tuple[int, str]: return 200, "Not implemented" - def on_PUT(self, request, room_id, event_type, txn_id): + def on_PUT( + self, request: SynapseRequest, room_id: str, event_type: str, txn_id: str + ) -> Awaitable[Tuple[int, JsonDict]]: set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request( @@ -269,408 +293,14 @@ def on_PUT(self, request, room_id, event_type, txn_id): ) -class RoomBatchSendEventRestServlet(TransactionRestServlet): - """ - API endpoint which can insert a chunk of events historically back in time - next to the given `prev_event`. - - `chunk_id` comes from `next_chunk_id `in the response of the batch send - endpoint and is derived from the "insertion" events added to each chunk. - It's not required for the first batch send. - - `state_events_at_start` is used to define the historical state events - needed to auth the events like join events. These events will float - outside of the normal DAG as outlier's and won't be visible in the chat - history which also allows us to insert multiple chunks without having a bunch - of `@mxid joined the room` noise between each chunk. - - `events` is chronological chunk/list of events you want to insert. - There is a reverse-chronological constraint on chunks so once you insert - some messages, you can only insert older ones after that. - tldr; Insert chunks from your most recent history -> oldest history. - - POST /_matrix/client/unstable/org.matrix.msc2716/rooms//batch_send?prev_event=&chunk_id= - { - "events": [ ... ], - "state_events_at_start": [ ... ] - } - """ - - PATTERNS = ( - re.compile( - "^/_matrix/client/unstable/org.matrix.msc2716" - "/rooms/(?P[^/]*)/batch_send$" - ), - ) - - def __init__(self, hs): - super().__init__(hs) - self.hs = hs - self.store = hs.get_datastore() - self.state_store = hs.get_storage().state - self.event_creation_handler = hs.get_event_creation_handler() - self.room_member_handler = hs.get_room_member_handler() - self.auth = hs.get_auth() - - async def _inherit_depth_from_prev_ids(self, prev_event_ids) -> int: - ( - most_recent_prev_event_id, - most_recent_prev_event_depth, - ) = await self.store.get_max_depth_of(prev_event_ids) - - # We want to insert the historical event after the `prev_event` but before the successor event - # - # We inherit depth from the successor event instead of the `prev_event` - # because events returned from `/messages` are first sorted by `topological_ordering` - # which is just the `depth` and then tie-break with `stream_ordering`. - # - # We mark these inserted historical events as "backfilled" which gives them a - # negative `stream_ordering`. If we use the same depth as the `prev_event`, - # then our historical event will tie-break and be sorted before the `prev_event` - # when it should come after. - # - # We want to use the successor event depth so they appear after `prev_event` because - # it has a larger `depth` but before the successor event because the `stream_ordering` - # is negative before the successor event. - successor_event_ids = await self.store.get_successor_events( - [most_recent_prev_event_id] - ) - - # If we can't find any successor events, then it's a forward extremity of - # historical messages and we can just inherit from the previous historical - # event which we can already assume has the correct depth where we want - # to insert into. - if not successor_event_ids: - depth = most_recent_prev_event_depth - else: - ( - _, - oldest_successor_depth, - ) = await self.store.get_min_depth_of(successor_event_ids) - - depth = oldest_successor_depth - - return depth - - def _create_insertion_event_dict( - self, sender: str, room_id: str, origin_server_ts: int - ): - """Creates an event dict for an "insertion" event with the proper fields - and a random chunk ID. - - Args: - sender: The event author MXID - room_id: The room ID that the event belongs to - origin_server_ts: Timestamp when the event was sent - - Returns: - Tuple of event ID and stream ordering position - """ - - next_chunk_id = random_string(8) - insertion_event = { - "type": EventTypes.MSC2716_INSERTION, - "sender": sender, - "room_id": room_id, - "content": { - EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id, - EventContentFields.MSC2716_HISTORICAL: True, - }, - "origin_server_ts": origin_server_ts, - } - - return insertion_event - - async def _create_requester_for_user_id_from_app_service( - self, user_id: str, app_service: ApplicationService - ) -> Requester: - """Creates a new requester for the given user_id - and validates that the app service is allowed to control - the given user. - - Args: - user_id: The author MXID that the app service is controlling - app_service: The app service that controls the user - - Returns: - Requester object - """ - - await self.auth.validate_appservice_can_control_user_id(app_service, user_id) - - return create_requester(user_id, app_service=app_service) - - async def on_POST(self, request, room_id): - requester = await self.auth.get_user_by_req(request, allow_guest=False) - - if not requester.app_service: - raise AuthError( - 403, - "Only application services can use the /batchsend endpoint", - ) - - body = parse_json_object_from_request(request) - assert_params_in_dict(body, ["state_events_at_start", "events"]) - - prev_events_from_query = parse_strings_from_args(request.args, "prev_event") - chunk_id_from_query = parse_string(request, "chunk_id") - - if prev_events_from_query is None: - raise SynapseError( - 400, - "prev_event query parameter is required when inserting historical messages back in time", - errcode=Codes.MISSING_PARAM, - ) - - # For the event we are inserting next to (`prev_events_from_query`), - # find the most recent auth events (derived from state events) that - # allowed that message to be sent. We will use that as a base - # to auth our historical messages against. - ( - most_recent_prev_event_id, - _, - ) = await self.store.get_max_depth_of(prev_events_from_query) - # mapping from (type, state_key) -> state_event_id - prev_state_map = await self.state_store.get_state_ids_for_event( - most_recent_prev_event_id - ) - # List of state event ID's - prev_state_ids = list(prev_state_map.values()) - auth_event_ids = prev_state_ids - - for state_event in body["state_events_at_start"]: - assert_params_in_dict( - state_event, ["type", "origin_server_ts", "content", "sender"] - ) - - logger.debug( - "RoomBatchSendEventRestServlet inserting state_event=%s, auth_event_ids=%s", - state_event, - auth_event_ids, - ) - - event_dict = { - "type": state_event["type"], - "origin_server_ts": state_event["origin_server_ts"], - "content": state_event["content"], - "room_id": room_id, - "sender": state_event["sender"], - "state_key": state_event["state_key"], - } - - # Make the state events float off on their own - fake_prev_event_id = "$" + random_string(43) - - # TODO: This is pretty much the same as some other code to handle inserting state in this file - if event_dict["type"] == EventTypes.Member: - membership = event_dict["content"].get("membership", None) - event_id, _ = await self.room_member_handler.update_membership( - await self._create_requester_for_user_id_from_app_service( - state_event["sender"], requester.app_service - ), - target=UserID.from_string(event_dict["state_key"]), - room_id=room_id, - action=membership, - content=event_dict["content"], - outlier=True, - prev_event_ids=[fake_prev_event_id], - # Make sure to use a copy of this list because we modify it - # later in the loop here. Otherwise it will be the same - # reference and also update in the event when we append later. - auth_event_ids=auth_event_ids.copy(), - ) - else: - # TODO: Add some complement tests that adds state that is not member joins - # and will use this code path. Maybe we only want to support join state events - # and can get rid of this `else`? - ( - event, - _, - ) = await self.event_creation_handler.create_and_send_nonmember_event( - await self._create_requester_for_user_id_from_app_service( - state_event["sender"], requester.app_service - ), - event_dict, - outlier=True, - prev_event_ids=[fake_prev_event_id], - # Make sure to use a copy of this list because we modify it - # later in the loop here. Otherwise it will be the same - # reference and also update in the event when we append later. - auth_event_ids=auth_event_ids.copy(), - ) - event_id = event.event_id - - auth_event_ids.append(event_id) - - events_to_create = body["events"] - - inherited_depth = await self._inherit_depth_from_prev_ids( - prev_events_from_query - ) - - # Figure out which chunk to connect to. If they passed in - # chunk_id_from_query let's use it. The chunk ID passed in comes - # from the chunk_id in the "insertion" event from the previous chunk. - last_event_in_chunk = events_to_create[-1] - chunk_id_to_connect_to = chunk_id_from_query - base_insertion_event = None - if chunk_id_from_query: - # All but the first base insertion event should point at a fake - # event, which causes the HS to ask for the state at the start of - # the chunk later. - prev_event_ids = [fake_prev_event_id] - # TODO: Verify the chunk_id_from_query corresponds to an insertion event - pass - # Otherwise, create an insertion event to act as a starting point. - # - # We don't always have an insertion event to start hanging more history - # off of (ideally there would be one in the main DAG, but that's not the - # case if we're wanting to add history to e.g. existing rooms without - # an insertion event), in which case we just create a new insertion event - # that can then get pointed to by a "marker" event later. - else: - prev_event_ids = prev_events_from_query - - base_insertion_event_dict = self._create_insertion_event_dict( - sender=requester.user.to_string(), - room_id=room_id, - origin_server_ts=last_event_in_chunk["origin_server_ts"], - ) - base_insertion_event_dict["prev_events"] = prev_event_ids.copy() - - ( - base_insertion_event, - _, - ) = await self.event_creation_handler.create_and_send_nonmember_event( - await self._create_requester_for_user_id_from_app_service( - base_insertion_event_dict["sender"], - requester.app_service, - ), - base_insertion_event_dict, - prev_event_ids=base_insertion_event_dict.get("prev_events"), - auth_event_ids=auth_event_ids, - historical=True, - depth=inherited_depth, - ) - - chunk_id_to_connect_to = base_insertion_event["content"][ - EventContentFields.MSC2716_NEXT_CHUNK_ID - ] - - # Connect this current chunk to the insertion event from the previous chunk - chunk_event = { - "type": EventTypes.MSC2716_CHUNK, - "sender": requester.user.to_string(), - "room_id": room_id, - "content": {EventContentFields.MSC2716_CHUNK_ID: chunk_id_to_connect_to}, - # Since the chunk event is put at the end of the chunk, - # where the newest-in-time event is, copy the origin_server_ts from - # the last event we're inserting - "origin_server_ts": last_event_in_chunk["origin_server_ts"], - } - # Add the chunk event to the end of the chunk (newest-in-time) - events_to_create.append(chunk_event) - - # Add an "insertion" event to the start of each chunk (next to the oldest-in-time - # event in the chunk) so the next chunk can be connected to this one. - insertion_event = self._create_insertion_event_dict( - sender=requester.user.to_string(), - room_id=room_id, - # Since the insertion event is put at the start of the chunk, - # where the oldest-in-time event is, copy the origin_server_ts from - # the first event we're inserting - origin_server_ts=events_to_create[0]["origin_server_ts"], - ) - # Prepend the insertion event to the start of the chunk (oldest-in-time) - events_to_create = [insertion_event] + events_to_create - - event_ids = [] - events_to_persist = [] - for ev in events_to_create: - assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"]) - - # Mark all events as historical - # This has important semantics within the Synapse internals to backfill properly - ev["content"][EventContentFields.MSC2716_HISTORICAL] = True - - event_dict = { - "type": ev["type"], - "origin_server_ts": ev["origin_server_ts"], - "content": ev["content"], - "room_id": room_id, - "sender": ev["sender"], # requester.user.to_string(), - "prev_events": prev_event_ids.copy(), - } - - event, context = await self.event_creation_handler.create_event( - await self._create_requester_for_user_id_from_app_service( - ev["sender"], requester.app_service - ), - event_dict, - prev_event_ids=event_dict.get("prev_events"), - auth_event_ids=auth_event_ids, - historical=True, - depth=inherited_depth, - ) - logger.debug( - "RoomBatchSendEventRestServlet inserting event=%s, prev_event_ids=%s, auth_event_ids=%s", - event, - prev_event_ids, - auth_event_ids, - ) - - assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( - event.sender, - ) - - events_to_persist.append((event, context)) - event_id = event.event_id - - event_ids.append(event_id) - prev_event_ids = [event_id] - - # Persist events in reverse-chronological order so they have the - # correct stream_ordering as they are backfilled (which decrements). - # Events are sorted by (topological_ordering, stream_ordering) - # where topological_ordering is just depth. - for (event, context) in reversed(events_to_persist): - ev = await self.event_creation_handler.handle_new_client_event( - await self._create_requester_for_user_id_from_app_service( - event["sender"], requester.app_service - ), - event=event, - context=context, - ) - - # Add the base_insertion_event to the bottom of the list we return - if base_insertion_event is not None: - event_ids.append(base_insertion_event.event_id) - - return 200, { - "state_events": auth_event_ids, - "events": event_ids, - "next_chunk_id": insertion_event["content"][ - EventContentFields.MSC2716_NEXT_CHUNK_ID - ], - } - - def on_GET(self, request, room_id): - return 501, "Not implemented" - - def on_PUT(self, request, room_id): - return self.txns.fetch_or_execute_request( - request, self.on_POST, request, room_id - ) - - # TODO: Needs unit testing for room ID + alias joins -class JoinRoomAliasServlet(TransactionRestServlet): - def __init__(self, hs): +class JoinRoomAliasServlet(ResolveRoomIdMixin, TransactionRestServlet): + def __init__(self, hs: "HomeServer"): super().__init__(hs) - self.room_member_handler = hs.get_room_member_handler() + super(ResolveRoomIdMixin, self).__init__(hs) # ensure the Mixin is set up self.auth = hs.get_auth() - def register(self, http_server): + def register(self, http_server: HttpServer) -> None: # /join/$room_identifier[/$txn_id] PATTERNS = "/join/(?P[^/]*)" register_txn_path(self, PATTERNS, http_server) @@ -680,7 +310,7 @@ async def on_POST( request: SynapseRequest, room_identifier: str, txn_id: Optional[str] = None, - ): + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) try: @@ -690,24 +320,13 @@ async def on_POST( # cheekily send invalid bodies. content = {} - if RoomID.is_valid(room_identifier): - room_id = room_identifier - - # twisted.web.server.Request.args is incorrectly defined as Optional[Any] - args: Dict[bytes, List[bytes]] = request.args # type: ignore - - remote_room_hosts = parse_strings_from_args( - args, "server_name", required=False - ) - elif RoomAlias.is_valid(room_identifier): - handler = self.room_member_handler - room_alias = RoomAlias.from_string(room_identifier) - room_id_obj, remote_room_hosts = await handler.lookup_room_alias(room_alias) - room_id = room_id_obj.to_string() - else: - raise SynapseError( - 400, "%s was not legal room ID or room alias" % (room_identifier,) - ) + # twisted.web.server.Request.args is incorrectly defined as Optional[Any] + args: Dict[bytes, List[bytes]] = request.args # type: ignore + remote_room_hosts = parse_strings_from_args(args, "server_name", required=False) + room_id, remote_room_hosts = await self.resolve_room_id( + room_identifier, + remote_room_hosts, + ) await self.room_member_handler.update_membership( requester=requester, @@ -722,7 +341,9 @@ async def on_POST( return 200, {"room_id": room_id} - def on_PUT(self, request, room_identifier, txn_id): + def on_PUT( + self, request: SynapseRequest, room_identifier: str, txn_id: str + ) -> Awaitable[Tuple[int, JsonDict]]: set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request( @@ -734,12 +355,12 @@ def on_PUT(self, request, room_identifier, txn_id): class PublicRoomListRestServlet(TransactionRestServlet): PATTERNS = client_patterns("/publicRooms$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.hs = hs self.auth = hs.get_auth() - async def on_GET(self, request): + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: server = parse_string(request, "server") try: @@ -748,7 +369,7 @@ async def on_GET(self, request): # Option to allow servers to require auth when accessing # /publicRooms via CS API. This is especially helpful in private # federations. - if not self.hs.config.allow_public_rooms_without_auth: + if not self.hs.config.server.allow_public_rooms_without_auth: raise # We allow people to not be authed if they're just looking at our @@ -767,7 +388,7 @@ async def on_GET(self, request): limit = None handler = self.hs.get_room_list_handler() - if server and server != self.hs.config.server_name: + if server and server != self.hs.config.server.server_name: # Ensure the server is valid. try: parse_and_validate_server_name(server) @@ -778,12 +399,9 @@ async def on_GET(self, request): Codes.INVALID_PARAM, ) - try: - data = await handler.get_remote_public_room_list( - server, limit=limit, since_token=since_token - ) - except HttpResponseException as e: - raise e.to_synapse_error() + data = await handler.get_remote_public_room_list( + server, limit=limit, since_token=since_token + ) else: data = await handler.get_local_public_room_list( limit=limit, since_token=since_token @@ -791,7 +409,7 @@ async def on_GET(self, request): return 200, data - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: await self.auth.get_user_by_req(request, allow_guest=True, allow_limited=False) server = parse_string(request, "server") @@ -820,7 +438,7 @@ async def on_POST(self, request): limit = None handler = self.hs.get_room_list_handler() - if server and server != self.hs.config.server_name: + if server and server != self.hs.config.server.server_name: # Ensure the server is valid. try: parse_and_validate_server_name(server) @@ -831,17 +449,15 @@ async def on_POST(self, request): Codes.INVALID_PARAM, ) - try: - data = await handler.get_remote_public_room_list( - server, - limit=limit, - since_token=since_token, - search_filter=search_filter, - include_all_networks=include_all_networks, - third_party_instance_id=third_party_instance_id, - ) - except HttpResponseException as e: - raise e.to_synapse_error() + data = await handler.get_remote_public_room_list( + server, + limit=limit, + since_token=since_token, + search_filter=search_filter, + include_all_networks=include_all_networks, + third_party_instance_id=third_party_instance_id, + ) + else: data = await handler.get_local_public_room_list( limit=limit, @@ -857,13 +473,15 @@ async def on_POST(self, request): class RoomMemberListRestServlet(RestServlet): PATTERNS = client_patterns("/rooms/(?P[^/]*)/members$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.message_handler = hs.get_message_handler() self.auth = hs.get_auth() self.store = hs.get_datastore() - async def on_GET(self, request, room_id): + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: # TODO support Pagination stream API (limit/tokens) requester = await self.auth.get_user_by_req(request, allow_guest=True) handler = self.message_handler @@ -909,12 +527,14 @@ async def on_GET(self, request, room_id): class JoinedRoomMemberListRestServlet(RestServlet): PATTERNS = client_patterns("/rooms/(?P[^/]*)/joined_members$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.message_handler = hs.get_message_handler() self.auth = hs.get_auth() - async def on_GET(self, request, room_id): + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) users_with_profile = await self.message_handler.get_joined_members( @@ -928,22 +548,29 @@ async def on_GET(self, request, room_id): class RoomMessageListRestServlet(RestServlet): PATTERNS = client_patterns("/rooms/(?P[^/]*)/messages$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() + self._hs = hs self.pagination_handler = hs.get_pagination_handler() self.auth = hs.get_auth() self.store = hs.get_datastore() - async def on_GET(self, request, room_id): + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) pagination_config = await PaginationConfig.from_request( self.store, request, default_limit=10 ) + # Twisted will have processed the args by now. + assert request.args is not None as_client_event = b"raw" not in request.args filter_str = parse_string(request, "filter", encoding="utf-8") if filter_str: filter_json = urlparse.unquote(filter_str) - event_filter: Optional[Filter] = Filter(json_decoder.decode(filter_json)) + event_filter: Optional[Filter] = Filter( + self._hs, json_decoder.decode(filter_json) + ) if ( event_filter and event_filter.filter_json.get("event_format", "client") @@ -968,12 +595,14 @@ async def on_GET(self, request, room_id): class RoomStateRestServlet(RestServlet): PATTERNS = client_patterns("/rooms/(?P[^/]*)/state$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.message_handler = hs.get_message_handler() self.auth = hs.get_auth() - async def on_GET(self, request, room_id): + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, List[JsonDict]]: requester = await self.auth.get_user_by_req(request, allow_guest=True) # Get all the current state for this room events = await self.message_handler.get_state_events( @@ -988,13 +617,15 @@ async def on_GET(self, request, room_id): class RoomInitialSyncRestServlet(RestServlet): PATTERNS = client_patterns("/rooms/(?P[^/]*)/initialSync$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.initial_sync_handler = hs.get_initial_sync_handler() self.auth = hs.get_auth() self.store = hs.get_datastore() - async def on_GET(self, request, room_id): + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) pagination_config = await PaginationConfig.from_request(self.store, request) content = await self.initial_sync_handler.room_initial_sync( @@ -1008,14 +639,16 @@ class RoomEventServlet(RestServlet): "/rooms/(?P[^/]*)/event/(?P[^/]*)$", v1=True ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.clock = hs.get_clock() self.event_handler = hs.get_event_handler() self._event_serializer = hs.get_event_client_serializer() self.auth = hs.get_auth() - async def on_GET(self, request, room_id, event_id): + async def on_GET( + self, request: SynapseRequest, room_id: str, event_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) try: event = await self.event_handler.get_event( @@ -1029,10 +662,10 @@ async def on_GET(self, request, room_id, event_id): time_now = self.clock.time_msec() if event: - event = await self._event_serializer.serialize_event(event, time_now) - return 200, event + event_dict = await self._event_serializer.serialize_event(event, time_now) + return 200, event_dict - return SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND) + raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND) class RoomEventContextServlet(RestServlet): @@ -1040,14 +673,17 @@ class RoomEventContextServlet(RestServlet): "/rooms/(?P[^/]*)/context/(?P[^/]*)$", v1=True ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() + self._hs = hs self.clock = hs.get_clock() self.room_context_handler = hs.get_room_context_handler() self._event_serializer = hs.get_event_client_serializer() self.auth = hs.get_auth() - async def on_GET(self, request, room_id, event_id): + async def on_GET( + self, request: SynapseRequest, room_id: str, event_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) limit = parse_integer(request, "limit", default=10) @@ -1056,7 +692,9 @@ async def on_GET(self, request, room_id, event_id): filter_str = parse_string(request, "filter", encoding="utf-8") if filter_str: filter_json = urlparse.unquote(filter_str) - event_filter: Optional[Filter] = Filter(json_decoder.decode(filter_json)) + event_filter: Optional[Filter] = Filter( + self._hs, json_decoder.decode(filter_json) + ) else: event_filter = None @@ -1078,33 +716,34 @@ async def on_GET(self, request, room_id, event_id): results["events_after"], time_now ) results["state"] = await self._event_serializer.serialize_events( - results["state"], - time_now, - # No need to bundle aggregations for state events - bundle_aggregations=False, + results["state"], time_now ) return 200, results class RoomForgetRestServlet(TransactionRestServlet): - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() - def register(self, http_server): + def register(self, http_server: HttpServer) -> None: PATTERNS = "/rooms/(?P[^/]*)/forget" register_txn_path(self, PATTERNS, http_server) - async def on_POST(self, request, room_id, txn_id=None): + async def on_POST( + self, request: SynapseRequest, room_id: str, txn_id: Optional[str] = None + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=False) await self.room_member_handler.forget(user=requester.user, room_id=room_id) return 200, {} - def on_PUT(self, request, room_id, txn_id): + def on_PUT( + self, request: SynapseRequest, room_id: str, txn_id: str + ) -> Awaitable[Tuple[int, JsonDict]]: set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request( @@ -1114,12 +753,12 @@ def on_PUT(self, request, room_id, txn_id): # TODO: Needs unit testing class RoomMembershipRestServlet(TransactionRestServlet): - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() - def register(self, http_server): + def register(self, http_server: HttpServer) -> None: # /rooms/$roomid/[invite|join|leave] PATTERNS = ( "/rooms/(?P[^/]*)/" @@ -1127,7 +766,13 @@ def register(self, http_server): ) register_txn_path(self, PATTERNS, http_server) - async def on_POST(self, request, room_id, membership_action, txn_id=None): + async def on_POST( + self, + request: SynapseRequest, + room_id: str, + membership_action: str, + txn_id: Optional[str] = None, + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) if requester.is_guest and membership_action not in { @@ -1190,13 +835,15 @@ async def on_POST(self, request, room_id, membership_action, txn_id=None): return 200, return_value - def _has_3pid_invite_keys(self, content): + def _has_3pid_invite_keys(self, content: JsonDict) -> bool: for key in {"id_server", "medium", "address"}: if key not in content: return False return True - def on_PUT(self, request, room_id, membership_action, txn_id): + def on_PUT( + self, request: SynapseRequest, room_id: str, membership_action: str, txn_id: str + ) -> Awaitable[Tuple[int, JsonDict]]: set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request( @@ -1205,16 +852,22 @@ def on_PUT(self, request, room_id, membership_action, txn_id): class RoomRedactEventRestServlet(TransactionRestServlet): - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.event_creation_handler = hs.get_event_creation_handler() self.auth = hs.get_auth() - def register(self, http_server): + def register(self, http_server: HttpServer) -> None: PATTERNS = "/rooms/(?P[^/]*)/redact/(?P[^/]*)" register_txn_path(self, PATTERNS, http_server) - async def on_POST(self, request, room_id, event_id, txn_id=None): + async def on_POST( + self, + request: SynapseRequest, + room_id: str, + event_id: str, + txn_id: Optional[str] = None, + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) content = parse_json_object_from_request(request) @@ -1240,7 +893,9 @@ async def on_POST(self, request, room_id, event_id, txn_id=None): set_tag("event_id", event_id) return 200, {"event_id": event_id} - def on_PUT(self, request, room_id, event_id, txn_id): + def on_PUT( + self, request: SynapseRequest, room_id: str, event_id: str, txn_id: str + ) -> Awaitable[Tuple[int, JsonDict]]: set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request( @@ -1262,10 +917,12 @@ def __init__(self, hs: "HomeServer"): # If we're not on the typing writer instance we should scream if we get # requests. self._is_typing_writer = ( - hs.config.worker.writers.typing == hs.get_instance_name() + hs.get_instance_name() in hs.config.worker.writers.typing ) - async def on_PUT(self, request, room_id, user_id): + async def on_PUT( + self, request: SynapseRequest, room_id: str, user_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) if not self._is_typing_writer: @@ -1316,7 +973,9 @@ def __init__(self, hs: "HomeServer"): self.auth = hs.get_auth() self.directory_handler = hs.get_directory_handler() - async def on_GET(self, request, room_id): + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) alias_list = await self.directory_handler.get_aliases_for_room( @@ -1329,12 +988,12 @@ async def on_GET(self, request, room_id): class SearchRestServlet(RestServlet): PATTERNS = client_patterns("/search$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.search_handler = hs.get_search_handler() self.auth = hs.get_auth() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_limited=False) content = parse_json_object_from_request(request) @@ -1348,19 +1007,24 @@ async def on_POST(self, request): class JoinedRoomsRestServlet(RestServlet): PATTERNS = client_patterns("/joined_rooms$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.store = hs.get_datastore() self.auth = hs.get_auth() - async def on_GET(self, request): + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) room_ids = await self.store.get_rooms_for_user(requester.user.to_string()) return 200, {"joined_rooms": list(room_ids)} -def register_txn_path(servlet, regex_string, http_server, with_get=False): +def register_txn_path( + servlet: RestServlet, + regex_string: str, + http_server: HttpServer, + with_get: bool = False, +) -> None: """Registers a transaction-based path. This registers two paths: @@ -1368,32 +1032,97 @@ def register_txn_path(servlet, regex_string, http_server, with_get=False): POST regex_string Args: - regex_string (str): The regex string to register. Must NOT have a - trailing $ as this string will be appended to. - http_server : The http_server to register paths with. + regex_string: The regex string to register. Must NOT have a + trailing $ as this string will be appended to. + http_server: The http_server to register paths with. with_get: True to also register respective GET paths for the PUTs. """ + on_POST = getattr(servlet, "on_POST", None) + on_PUT = getattr(servlet, "on_PUT", None) + if on_POST is None or on_PUT is None: + raise RuntimeError("on_POST and on_PUT must exist when using register_txn_path") http_server.register_paths( "POST", client_patterns(regex_string + "$", v1=True), - servlet.on_POST, + on_POST, servlet.__class__.__name__, ) http_server.register_paths( "PUT", client_patterns(regex_string + "/(?P[^/]*)$", v1=True), - servlet.on_PUT, + on_PUT, servlet.__class__.__name__, ) + on_GET = getattr(servlet, "on_GET", None) if with_get: + if on_GET is None: + raise RuntimeError( + "register_txn_path called with with_get = True, but no on_GET method exists" + ) http_server.register_paths( "GET", client_patterns(regex_string + "/(?P[^/]*)$", v1=True), - servlet.on_GET, + on_GET, servlet.__class__.__name__, ) +class TimestampLookupRestServlet(RestServlet): + """ + API endpoint to fetch the `event_id` of the closest event to the given + timestamp (`ts` query parameter) in the given direction (`dir` query + parameter). + + Useful for cases like jump to date so you can start paginating messages from + a given date in the archive. + + `ts` is a timestamp in milliseconds where we will find the closest event in + the given direction. + + `dir` can be `f` or `b` to indicate forwards and backwards in time from the + given timestamp. + + GET /_matrix/client/unstable/org.matrix.msc3030/rooms//timestamp_to_event?ts=&dir= + { + "event_id": ... + } + """ + + PATTERNS = ( + re.compile( + "^/_matrix/client/unstable/org.matrix.msc3030" + "/rooms/(?P[^/]*)/timestamp_to_event$" + ), + ) + + def __init__(self, hs: "HomeServer"): + super().__init__() + self._auth = hs.get_auth() + self._store = hs.get_datastore() + self.timestamp_lookup_handler = hs.get_timestamp_lookup_handler() + + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: + requester = await self._auth.get_user_by_req(request) + await self._auth.check_user_in_room(room_id, requester.user.to_string()) + + timestamp = parse_integer(request, "ts", required=True) + direction = parse_string(request, "dir", default="f", allowed_values=["f", "b"]) + + ( + event_id, + origin_server_ts, + ) = await self.timestamp_lookup_handler.get_event_for_timestamp( + requester, room_id, timestamp, direction + ) + + return 200, { + "event_id": event_id, + "origin_server_ts": origin_server_ts, + } + + class RoomSpaceSummaryRestServlet(RestServlet): PATTERNS = ( re.compile( @@ -1405,18 +1134,26 @@ class RoomSpaceSummaryRestServlet(RestServlet): def __init__(self, hs: "HomeServer"): super().__init__() self._auth = hs.get_auth() - self._space_summary_handler = hs.get_space_summary_handler() + self._room_summary_handler = hs.get_room_summary_handler() async def on_GET( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: requester = await self._auth.get_user_by_req(request, allow_guest=True) - return 200, await self._space_summary_handler.get_space_summary( + max_rooms_per_space = parse_integer(request, "max_rooms_per_space") + if max_rooms_per_space is not None and max_rooms_per_space < 0: + raise SynapseError( + 400, + "Value for 'max_rooms_per_space' must be a non-negative integer", + Codes.BAD_JSON, + ) + + return 200, await self._room_summary_handler.get_space_summary( requester.user.to_string(), room_id, suggested_only=parse_boolean(request, "suggested_only", default=False), - max_rooms_per_space=parse_integer(request, "max_rooms_per_space"), + max_rooms_per_space=max_rooms_per_space, ) # TODO When switching to the stable endpoint, remove the POST handler. @@ -1433,12 +1170,19 @@ async def on_POST( ) max_rooms_per_space = content.get("max_rooms_per_space") - if max_rooms_per_space is not None and not isinstance(max_rooms_per_space, int): - raise SynapseError( - 400, "'max_rooms_per_space' must be an integer", Codes.BAD_JSON - ) + if max_rooms_per_space is not None: + if not isinstance(max_rooms_per_space, int): + raise SynapseError( + 400, "'max_rooms_per_space' must be an integer", Codes.BAD_JSON + ) + if max_rooms_per_space < 0: + raise SynapseError( + 400, + "Value for 'max_rooms_per_space' must be a non-negative integer", + Codes.BAD_JSON, + ) - return 200, await self._space_summary_handler.get_space_summary( + return 200, await self._room_summary_handler.get_space_summary( requester.user.to_string(), room_id, suggested_only=suggested_only, @@ -1446,9 +1190,87 @@ async def on_POST( ) -def register_servlets(hs: "HomeServer", http_server, is_worker=False): - msc2716_enabled = hs.config.experimental.msc2716_enabled +class RoomHierarchyRestServlet(RestServlet): + PATTERNS = ( + re.compile( + "^/_matrix/client/(v1|unstable/org.matrix.msc2946)" + "/rooms/(?P[^/]*)/hierarchy$" + ), + ) + + def __init__(self, hs: "HomeServer"): + super().__init__() + self._auth = hs.get_auth() + self._room_summary_handler = hs.get_room_summary_handler() + + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: + requester = await self._auth.get_user_by_req(request, allow_guest=True) + + max_depth = parse_integer(request, "max_depth") + if max_depth is not None and max_depth < 0: + raise SynapseError( + 400, "'max_depth' must be a non-negative integer", Codes.BAD_JSON + ) + + limit = parse_integer(request, "limit") + if limit is not None and limit <= 0: + raise SynapseError( + 400, "'limit' must be a positive integer", Codes.BAD_JSON + ) + + return 200, await self._room_summary_handler.get_room_hierarchy( + requester, + room_id, + suggested_only=parse_boolean(request, "suggested_only", default=False), + max_depth=max_depth, + limit=limit, + from_token=parse_string(request, "from"), + ) + + +class RoomSummaryRestServlet(ResolveRoomIdMixin, RestServlet): + PATTERNS = ( + re.compile( + "^/_matrix/client/unstable/im.nheko.summary" + "/rooms/(?P[^/]*)/summary$" + ), + ) + + def __init__(self, hs: "HomeServer"): + super().__init__(hs) + self._auth = hs.get_auth() + self._room_summary_handler = hs.get_room_summary_handler() + + async def on_GET( + self, request: SynapseRequest, room_identifier: str + ) -> Tuple[int, JsonDict]: + try: + requester = await self._auth.get_user_by_req(request, allow_guest=True) + requester_user_id: Optional[str] = requester.user.to_string() + except MissingClientTokenError: + # auth is optional + requester_user_id = None + + # twisted.web.server.Request.args is incorrectly defined as Optional[Any] + args: Dict[bytes, List[bytes]] = request.args # type: ignore + remote_room_hosts = parse_strings_from_args(args, "via", required=False) + room_id, remote_room_hosts = await self.resolve_room_id( + room_identifier, + remote_room_hosts, + ) + + return 200, await self._room_summary_handler.get_room_summary( + requester_user_id, + room_id, + remote_room_hosts, + ) + +def register_servlets( + hs: "HomeServer", http_server: HttpServer, is_worker: bool = False +) -> None: RoomStateEventRestServlet(hs).register(http_server) RoomMemberListRestServlet(hs).register(http_server) JoinedRoomMemberListRestServlet(hs).register(http_server) @@ -1456,24 +1278,27 @@ def register_servlets(hs: "HomeServer", http_server, is_worker=False): JoinRoomAliasServlet(hs).register(http_server) RoomMembershipRestServlet(hs).register(http_server) RoomSendEventRestServlet(hs).register(http_server) - if msc2716_enabled: - RoomBatchSendEventRestServlet(hs).register(http_server) PublicRoomListRestServlet(hs).register(http_server) RoomStateRestServlet(hs).register(http_server) RoomRedactEventRestServlet(hs).register(http_server) RoomTypingRestServlet(hs).register(http_server) RoomEventContextServlet(hs).register(http_server) RoomSpaceSummaryRestServlet(hs).register(http_server) + RoomHierarchyRestServlet(hs).register(http_server) + if hs.config.experimental.msc3266_enabled: + RoomSummaryRestServlet(hs).register(http_server) RoomEventServlet(hs).register(http_server) JoinedRoomsRestServlet(hs).register(http_server) RoomAliasListServlet(hs).register(http_server) SearchRestServlet(hs).register(http_server) + RoomCreateRestServlet(hs).register(http_server) + if hs.config.experimental.msc3030_enabled: + TimestampLookupRestServlet(hs).register(http_server) # Some servlets only get registered for the main process. if not is_worker: - RoomCreateRestServlet(hs).register(http_server) RoomForgetRestServlet(hs).register(http_server) -def register_deprecated_servlets(hs, http_server): +def register_deprecated_servlets(hs: "HomeServer", http_server: HttpServer) -> None: RoomInitialSyncRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py new file mode 100644 index 000000000000..e4c9451ae06a --- /dev/null +++ b/synapse/rest/client/room_batch.py @@ -0,0 +1,251 @@ +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import re +from http import HTTPStatus +from typing import TYPE_CHECKING, Awaitable, Tuple + +from twisted.web.server import Request + +from synapse.api.constants import EventContentFields +from synapse.api.errors import AuthError, Codes, SynapseError +from synapse.http.server import HttpServer +from synapse.http.servlet import ( + RestServlet, + assert_params_in_dict, + parse_json_object_from_request, + parse_string, + parse_strings_from_args, +) +from synapse.http.site import SynapseRequest +from synapse.rest.client.transactions import HttpTransactionCache +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class RoomBatchSendEventRestServlet(RestServlet): + """ + API endpoint which can insert a batch of events historically back in time + next to the given `prev_event`. + + `batch_id` comes from `next_batch_id `in the response of the batch send + endpoint and is derived from the "insertion" events added to each batch. + It's not required for the first batch send. + + `state_events_at_start` is used to define the historical state events + needed to auth the events like join events. These events will float + outside of the normal DAG as outlier's and won't be visible in the chat + history which also allows us to insert multiple batches without having a bunch + of `@mxid joined the room` noise between each batch. + + `events` is chronological list of events you want to insert. + There is a reverse-chronological constraint on batches so once you insert + some messages, you can only insert older ones after that. + tldr; Insert batches from your most recent history -> oldest history. + + POST /_matrix/client/unstable/org.matrix.msc2716/rooms//batch_send?prev_event_id=&batch_id= + { + "events": [ ... ], + "state_events_at_start": [ ... ] + } + """ + + PATTERNS = ( + re.compile( + "^/_matrix/client/unstable/org.matrix.msc2716" + "/rooms/(?P[^/]*)/batch_send$" + ), + ) + + def __init__(self, hs: "HomeServer"): + super().__init__() + self.store = hs.get_datastore() + self.event_creation_handler = hs.get_event_creation_handler() + self.auth = hs.get_auth() + self.room_batch_handler = hs.get_room_batch_handler() + self.txns = HttpTransactionCache(hs) + + async def on_POST( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: + requester = await self.auth.get_user_by_req(request, allow_guest=False) + + if not requester.app_service: + raise AuthError( + HTTPStatus.FORBIDDEN, + "Only application services can use the /batchsend endpoint", + ) + + body = parse_json_object_from_request(request) + assert_params_in_dict(body, ["state_events_at_start", "events"]) + + assert request.args is not None + prev_event_ids_from_query = parse_strings_from_args( + request.args, "prev_event_id" + ) + batch_id_from_query = parse_string(request, "batch_id") + + if prev_event_ids_from_query is None: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "prev_event query parameter is required when inserting historical messages back in time", + errcode=Codes.MISSING_PARAM, + ) + + # Verify the batch_id_from_query corresponds to an actual insertion event + # and have the batch connected. + if batch_id_from_query: + corresponding_insertion_event_id = ( + await self.store.get_insertion_event_id_by_batch_id( + room_id, batch_id_from_query + ) + ) + if corresponding_insertion_event_id is None: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "No insertion event corresponds to the given ?batch_id", + errcode=Codes.INVALID_PARAM, + ) + + # For the event we are inserting next to (`prev_event_ids_from_query`), + # find the most recent auth events (derived from state events) that + # allowed that message to be sent. We will use that as a base + # to auth our historical messages against. + auth_event_ids = await self.room_batch_handler.get_most_recent_auth_event_ids_from_event_id_list( + prev_event_ids_from_query + ) + + state_event_ids_at_start = [] + # Create and persist all of the state events that float off on their own + # before the batch. These will most likely be all of the invite/member + # state events used to auth the upcoming historical messages. + if body["state_events_at_start"]: + state_event_ids_at_start = ( + await self.room_batch_handler.persist_state_events_at_start( + state_events_at_start=body["state_events_at_start"], + room_id=room_id, + initial_auth_event_ids=auth_event_ids, + app_service_requester=requester, + ) + ) + # Update our ongoing auth event ID list with all of the new state we + # just created + auth_event_ids.extend(state_event_ids_at_start) + + inherited_depth = await self.room_batch_handler.inherit_depth_from_prev_ids( + prev_event_ids_from_query + ) + + events_to_create = body["events"] + + # Figure out which batch to connect to. If they passed in + # batch_id_from_query let's use it. The batch ID passed in comes + # from the batch_id in the "insertion" event from the previous batch. + last_event_in_batch = events_to_create[-1] + base_insertion_event = None + if batch_id_from_query: + batch_id_to_connect_to = batch_id_from_query + # Otherwise, create an insertion event to act as a starting point. + # + # We don't always have an insertion event to start hanging more history + # off of (ideally there would be one in the main DAG, but that's not the + # case if we're wanting to add history to e.g. existing rooms without + # an insertion event), in which case we just create a new insertion event + # that can then get pointed to by a "marker" event later. + else: + base_insertion_event_dict = ( + self.room_batch_handler.create_insertion_event_dict( + sender=requester.user.to_string(), + room_id=room_id, + origin_server_ts=last_event_in_batch["origin_server_ts"], + ) + ) + base_insertion_event_dict["prev_events"] = prev_event_ids_from_query.copy() + + ( + base_insertion_event, + _, + ) = await self.event_creation_handler.create_and_send_nonmember_event( + await self.room_batch_handler.create_requester_for_user_id_from_app_service( + base_insertion_event_dict["sender"], + requester.app_service, + ), + base_insertion_event_dict, + prev_event_ids=base_insertion_event_dict.get("prev_events"), + auth_event_ids=auth_event_ids, + historical=True, + depth=inherited_depth, + ) + + batch_id_to_connect_to = base_insertion_event.content[ + EventContentFields.MSC2716_NEXT_BATCH_ID + ] + + # Also connect the historical event chain to the end of the floating + # state chain, which causes the HS to ask for the state at the start of + # the batch later. If there is no state chain to connect to, just make + # the insertion event float itself. + prev_event_ids = [] + if len(state_event_ids_at_start): + prev_event_ids = [state_event_ids_at_start[-1]] + + # Create and persist all of the historical events as well as insertion + # and batch meta events to make the batch navigable in the DAG. + event_ids, next_batch_id = await self.room_batch_handler.handle_batch_of_events( + events_to_create=events_to_create, + room_id=room_id, + batch_id_to_connect_to=batch_id_to_connect_to, + initial_prev_event_ids=prev_event_ids, + inherited_depth=inherited_depth, + auth_event_ids=auth_event_ids, + app_service_requester=requester, + ) + + insertion_event_id = event_ids[0] + batch_event_id = event_ids[-1] + historical_event_ids = event_ids[1:-1] + + response_dict = { + "state_event_ids": state_event_ids_at_start, + "event_ids": historical_event_ids, + "next_batch_id": next_batch_id, + "insertion_event_id": insertion_event_id, + "batch_event_id": batch_event_id, + } + if base_insertion_event is not None: + response_dict["base_insertion_event_id"] = base_insertion_event.event_id + + return HTTPStatus.OK, response_dict + + def on_GET(self, request: Request, room_id: str) -> Tuple[int, str]: + return HTTPStatus.NOT_IMPLEMENTED, "Not implemented" + + def on_PUT( + self, request: SynapseRequest, room_id: str + ) -> Awaitable[Tuple[int, JsonDict]]: + return self.txns.fetch_or_execute_request( + request, self.on_POST, request, room_id + ) + + +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: + msc2716_enabled = hs.config.experimental.msc2716_enabled + + if msc2716_enabled: + RoomBatchSendEventRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/room_keys.py b/synapse/rest/client/room_keys.py similarity index 91% rename from synapse/rest/client/v2_alpha/room_keys.py rename to synapse/rest/client/room_keys.py index 263596be8658..37e39570f6ac 100644 --- a/synapse/rest/client/v2_alpha/room_keys.py +++ b/synapse/rest/client/room_keys.py @@ -13,16 +13,23 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Optional, Tuple from synapse.api.errors import Codes, NotFoundError, SynapseError +from synapse.http.server import HttpServer from synapse.http.servlet import ( RestServlet, parse_json_object_from_request, parse_string, ) +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -31,16 +38,14 @@ class RoomKeysServlet(RestServlet): "/room_keys/keys(/(?P[^/]+))?(/(?P[^/]+))?$" ) - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.e2e_room_keys_handler = hs.get_e2e_room_keys_handler() - async def on_PUT(self, request, room_id, session_id): + async def on_PUT( + self, request: SynapseRequest, room_id: Optional[str], session_id: Optional[str] + ) -> Tuple[int, JsonDict]: """ Uploads one or more encrypted E2E room keys for backup purposes. room_id: the ID of the room the keys are for (optional) @@ -133,7 +138,9 @@ async def on_PUT(self, request, room_id, session_id): ret = await self.e2e_room_keys_handler.upload_room_keys(user_id, version, body) return 200, ret - async def on_GET(self, request, room_id, session_id): + async def on_GET( + self, request: SynapseRequest, room_id: Optional[str], session_id: Optional[str] + ) -> Tuple[int, JsonDict]: """ Retrieves one or more encrypted E2E room keys for backup purposes. Symmetric with the PUT version of the API. @@ -215,7 +222,9 @@ async def on_GET(self, request, room_id, session_id): return 200, room_keys - async def on_DELETE(self, request, room_id, session_id): + async def on_DELETE( + self, request: SynapseRequest, room_id: Optional[str], session_id: Optional[str] + ) -> Tuple[int, JsonDict]: """ Deletes one or more encrypted E2E room keys for a user for backup purposes. @@ -242,16 +251,12 @@ async def on_DELETE(self, request, room_id, session_id): class RoomKeysNewVersionServlet(RestServlet): PATTERNS = client_patterns("/room_keys/version$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.e2e_room_keys_handler = hs.get_e2e_room_keys_handler() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: """ Create a new backup version for this user's room_keys with the given info. The version is allocated by the server and returned to the user @@ -295,16 +300,14 @@ async def on_POST(self, request): class RoomKeysVersionServlet(RestServlet): PATTERNS = client_patterns("/room_keys/version(/(?P[^/]+))?$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.e2e_room_keys_handler = hs.get_e2e_room_keys_handler() - async def on_GET(self, request, version): + async def on_GET( + self, request: SynapseRequest, version: Optional[str] + ) -> Tuple[int, JsonDict]: """ Retrieve the version information about a given version of the user's room_keys backup. If the version part is missing, returns info about the @@ -332,7 +335,9 @@ async def on_GET(self, request, version): raise SynapseError(404, "No backup found", Codes.NOT_FOUND) return 200, info - async def on_DELETE(self, request, version): + async def on_DELETE( + self, request: SynapseRequest, version: Optional[str] + ) -> Tuple[int, JsonDict]: """ Delete the information about a given version of the user's room_keys backup. If the version part is missing, deletes the most @@ -351,7 +356,9 @@ async def on_DELETE(self, request, version): await self.e2e_room_keys_handler.delete_version(user_id, version) return 200, {} - async def on_PUT(self, request, version): + async def on_PUT( + self, request: SynapseRequest, version: Optional[str] + ) -> Tuple[int, JsonDict]: """ Update the information about a given version of the user's room_keys backup. @@ -385,7 +392,7 @@ async def on_PUT(self, request, version): return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: RoomKeysServlet(hs).register(http_server) RoomKeysVersionServlet(hs).register(http_server) RoomKeysNewVersionServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/room_upgrade_rest_servlet.py b/synapse/rest/client/room_upgrade_rest_servlet.py similarity index 84% rename from synapse/rest/client/v2_alpha/room_upgrade_rest_servlet.py rename to synapse/rest/client/room_upgrade_rest_servlet.py index 6d1b083acb47..6a7792e18b2e 100644 --- a/synapse/rest/client/v2_alpha/room_upgrade_rest_servlet.py +++ b/synapse/rest/client/room_upgrade_rest_servlet.py @@ -13,18 +13,25 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple from synapse.api.errors import Codes, ShadowBanError, SynapseError from synapse.api.room_versions import KNOWN_ROOM_VERSIONS +from synapse.http.server import HttpServer from synapse.http.servlet import ( RestServlet, assert_params_in_dict, parse_json_object_from_request, ) +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from synapse.util import stringutils from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -41,9 +48,6 @@ class RoomUpgradeRestServlet(RestServlet): } Creates a new room and shuts down the old one. Returns the ID of the new room. - - Args: - hs (synapse.server.HomeServer): """ PATTERNS = client_patterns( @@ -51,13 +55,15 @@ class RoomUpgradeRestServlet(RestServlet): "/rooms/(?P[^/]*)/upgrade$" ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self._hs = hs self._room_creation_handler = hs.get_room_creation_handler() self._auth = hs.get_auth() - async def on_POST(self, request, room_id): + async def on_POST( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: requester = await self._auth.get_user_by_req(request) content = parse_json_object_from_request(request) @@ -84,5 +90,5 @@ async def on_POST(self, request, room_id): return 200, ret -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: RoomUpgradeRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/sendtodevice.py b/synapse/rest/client/sendtodevice.py similarity index 75% rename from synapse/rest/client/v2_alpha/sendtodevice.py rename to synapse/rest/client/sendtodevice.py index d537d811d8a1..3322c8ef48da 100644 --- a/synapse/rest/client/v2_alpha/sendtodevice.py +++ b/synapse/rest/client/sendtodevice.py @@ -13,15 +13,21 @@ # limitations under the License. import logging -from typing import Tuple +from typing import TYPE_CHECKING, Awaitable, Tuple from synapse.http import servlet +from synapse.http.server import HttpServer from synapse.http.servlet import assert_params_in_dict, parse_json_object_from_request +from synapse.http.site import SynapseRequest from synapse.logging.opentracing import set_tag, trace from synapse.rest.client.transactions import HttpTransactionCache +from synapse.types import JsonDict from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -30,11 +36,7 @@ class SendToDeviceRestServlet(servlet.RestServlet): "/sendToDevice/(?P[^/]*)/(?P[^/]*)$" ) - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() @@ -42,14 +44,18 @@ def __init__(self, hs): self.device_message_handler = hs.get_device_message_handler() @trace(opname="sendToDevice") - def on_PUT(self, request, message_type, txn_id): + def on_PUT( + self, request: SynapseRequest, message_type: str, txn_id: str + ) -> Awaitable[Tuple[int, JsonDict]]: set_tag("message_type", message_type) set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request( request, self._put, request, message_type, txn_id ) - async def _put(self, request, message_type, txn_id): + async def _put( + self, request: SynapseRequest, message_type: str, txn_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) content = parse_json_object_from_request(request) @@ -59,9 +65,8 @@ async def _put(self, request, message_type, txn_id): requester, message_type, content["messages"] ) - response: Tuple[int, dict] = (200, {}) - return response + return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: SendToDeviceRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/shared_rooms.py b/synapse/rest/client/shared_rooms.py similarity index 78% rename from synapse/rest/client/v2_alpha/shared_rooms.py rename to synapse/rest/client/shared_rooms.py index d2e7f04b406c..09a46737de06 100644 --- a/synapse/rest/client/v2_alpha/shared_rooms.py +++ b/synapse/rest/client/shared_rooms.py @@ -12,13 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple from synapse.api.errors import Codes, SynapseError +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet -from synapse.types import UserID +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict, UserID from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -32,13 +38,15 @@ class UserSharedRoomsServlet(RestServlet): releases=(), # This is an unstable feature ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastore() - self.user_directory_active = hs.config.update_user_directory + self.user_directory_active = hs.config.server.update_user_directory - async def on_GET(self, request, user_id): + async def on_GET( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: if not self.user_directory_active: raise SynapseError( @@ -63,5 +71,5 @@ async def on_GET(self, request, user_id): return 200, {"joined": list(rooms)} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: UserSharedRoomsServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/sync.py similarity index 76% rename from synapse/rest/client/v2_alpha/sync.py rename to synapse/rest/client/sync.py index e321668698ae..7f5846d38934 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/sync.py @@ -14,17 +14,38 @@ import itertools import logging from collections import defaultdict -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Dict, + Iterable, + List, + Optional, + Tuple, + Union, +) from synapse.api.constants import Membership, PresenceState from synapse.api.errors import Codes, StoreError, SynapseError -from synapse.api.filtering import DEFAULT_FILTER_COLLECTION, FilterCollection +from synapse.api.filtering import FilterCollection +from synapse.api.presence import UserPresenceState +from synapse.events import EventBase from synapse.events.utils import ( format_event_for_client_v2_without_room_id, format_event_raw, ) from synapse.handlers.presence import format_user_presence_state -from synapse.handlers.sync import KnockedSyncResult, SyncConfig +from synapse.handlers.sync import ( + ArchivedSyncResult, + InvitedSyncResult, + JoinedSyncResult, + KnockedSyncResult, + SyncConfig, + SyncResult, +) +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_boolean, parse_integer, parse_string from synapse.http.site import SynapseRequest from synapse.types import JsonDict, StreamToken @@ -129,17 +150,17 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: request_key = (user, timeout, since, filter_id, full_state, device_id) if filter_id is None: - filter_collection = DEFAULT_FILTER_COLLECTION + filter_collection = self.filtering.DEFAULT_FILTER_COLLECTION elif filter_id.startswith("{"): try: filter_object = json_decoder.decode(filter_id) set_timeline_upper_limit( - filter_object, self.hs.config.filter_timeline_limit + filter_object, self.hs.config.server.filter_timeline_limit ) except Exception: raise SynapseError(400, "Invalid filter JSON") self.filtering.check_valid_filter(filter_object) - filter_collection = FilterCollection(filter_object) + filter_collection = FilterCollection(self.hs, filter_object) else: try: filter_collection = await self.filtering.get_user_filter( @@ -192,6 +213,8 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: return 200, {} time_now = self.clock.time_msec() + # We know that the the requester has an access token since appservices + # cannot use sync. response_content = await self.encode_response( time_now, sync_result, requester.access_token_id, filter_collection ) @@ -199,7 +222,13 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: logger.debug("Event formatting complete") return 200, response_content - async def encode_response(self, time_now, sync_result, access_token_id, filter): + async def encode_response( + self, + time_now: int, + sync_result: SyncResult, + access_token_id: Optional[int], + filter: FilterCollection, + ) -> JsonDict: logger.debug("Formatting events in sync response") if filter.event_format == "client": event_formatter = format_event_for_client_v2_without_room_id @@ -234,7 +263,7 @@ async def encode_response(self, time_now, sync_result, access_token_id, filter): logger.debug("building sync response dict") - response: dict = defaultdict(dict) + response: JsonDict = defaultdict(dict) response["next_batch"] = await sync_result.next_batch.to_string(self.store) if sync_result.account_data: @@ -259,10 +288,11 @@ async def encode_response(self, time_now, sync_result, access_token_id, filter): # Corresponding synapse issue: https://github.com/matrix-org/synapse/issues/10456 response["device_one_time_keys_count"] = sync_result.device_one_time_keys_count - if sync_result.device_unused_fallback_key_types: - response[ - "org.matrix.msc2732.device_unused_fallback_key_types" - ] = sync_result.device_unused_fallback_key_types + # https://github.com/matrix-org/matrix-doc/blob/54255851f642f84a4f1aaf7bc063eebe3d76752b/proposals/2732-olm-fallback-keys.md + # states that this field should always be included, as long as the server supports the feature. + response[ + "org.matrix.msc2732.device_unused_fallback_key_types" + ] = sync_result.device_unused_fallback_key_types if joined: response["rooms"][Membership.JOIN] = joined @@ -273,6 +303,8 @@ async def encode_response(self, time_now, sync_result, access_token_id, filter): if archived: response["rooms"][Membership.LEAVE] = archived + # By the time we get here groups is no longer optional. + assert sync_result.groups is not None if sync_result.groups.join: response["groups"][Membership.JOIN] = sync_result.groups.join if sync_result.groups.invite: @@ -283,7 +315,7 @@ async def encode_response(self, time_now, sync_result, access_token_id, filter): return response @staticmethod - def encode_presence(events, time_now): + def encode_presence(events: List[UserPresenceState], time_now: int) -> JsonDict: return { "events": [ { @@ -298,25 +330,27 @@ def encode_presence(events, time_now): } async def encode_joined( - self, rooms, time_now, token_id, event_fields, event_formatter - ): + self, + rooms: List[JoinedSyncResult], + time_now: int, + token_id: Optional[int], + event_fields: List[str], + event_formatter: Callable[[JsonDict], JsonDict], + ) -> JsonDict: """ Encode the joined rooms in a sync result Args: - rooms(list[synapse.handlers.sync.JoinedSyncResult]): list of sync - results for rooms this user is joined to - time_now(int): current time - used as a baseline for age - calculations - token_id(int): ID of the user's auth token - used for namespacing + rooms: list of sync results for rooms this user is joined to + time_now: current time - used as a baseline for age calculations + token_id: ID of the user's auth token - used for namespacing of transaction IDs - event_fields(list): List of event fields to include. If empty, + event_fields: List of event fields to include. If empty, all fields will be returned. - event_formatter (func[dict]): function to convert from federation format + event_formatter: function to convert from federation format to client format Returns: - dict[str, dict[str, object]]: the joined rooms list, in our - response format + The joined rooms list, in our response format """ joined = {} for room in rooms: @@ -331,23 +365,26 @@ async def encode_joined( return joined - async def encode_invited(self, rooms, time_now, token_id, event_formatter): + async def encode_invited( + self, + rooms: List[InvitedSyncResult], + time_now: int, + token_id: Optional[int], + event_formatter: Callable[[JsonDict], JsonDict], + ) -> JsonDict: """ Encode the invited rooms in a sync result Args: - rooms(list[synapse.handlers.sync.InvitedSyncResult]): list of - sync results for rooms this user is invited to - time_now(int): current time - used as a baseline for age - calculations - token_id(int): ID of the user's auth token - used for namespacing + rooms: list of sync results for rooms this user is invited to + time_now: current time - used as a baseline for age calculations + token_id: ID of the user's auth token - used for namespacing of transaction IDs - event_formatter (func[dict]): function to convert from federation format + event_formatter: function to convert from federation format to client format Returns: - dict[str, dict[str, object]]: the invited rooms list, in our - response format + The invited rooms list, in our response format """ invited = {} for room in rooms: @@ -370,7 +407,7 @@ async def encode_knocked( self, rooms: List[KnockedSyncResult], time_now: int, - token_id: int, + token_id: Optional[int], event_formatter: Callable[[Dict], Dict], ) -> Dict[str, Dict[str, Any]]: """ @@ -421,25 +458,26 @@ async def encode_knocked( return knocked async def encode_archived( - self, rooms, time_now, token_id, event_fields, event_formatter - ): + self, + rooms: List[ArchivedSyncResult], + time_now: int, + token_id: Optional[int], + event_fields: List[str], + event_formatter: Callable[[JsonDict], JsonDict], + ) -> JsonDict: """ Encode the archived rooms in a sync result Args: - rooms (list[synapse.handlers.sync.ArchivedSyncResult]): list of - sync results for rooms this user is joined to - time_now(int): current time - used as a baseline for age - calculations - token_id(int): ID of the user's auth token - used for namespacing + rooms: list of sync results for rooms this user is joined to + time_now: current time - used as a baseline for age calculations + token_id: ID of the user's auth token - used for namespacing of transaction IDs - event_fields(list): List of event fields to include. If empty, + event_fields: List of event fields to include. If empty, all fields will be returned. - event_formatter (func[dict]): function to convert from federation format - to client format + event_formatter: function to convert from federation format to client format Returns: - dict[str, dict[str, object]]: The invited rooms list, in our - response format + The archived rooms list, in our response format """ joined = {} for room in rooms: @@ -455,31 +493,43 @@ async def encode_archived( return joined async def encode_room( - self, room, time_now, token_id, joined, only_fields, event_formatter - ): + self, + room: Union[JoinedSyncResult, ArchivedSyncResult], + time_now: int, + token_id: Optional[int], + joined: bool, + only_fields: Optional[List[str]], + event_formatter: Callable[[JsonDict], JsonDict], + ) -> JsonDict: """ Args: - room (JoinedSyncResult|ArchivedSyncResult): sync result for a - single room - time_now (int): current time - used as a baseline for age - calculations - token_id (int): ID of the user's auth token - used for namespacing + room: sync result for a single room + time_now: current time - used as a baseline for age calculations + token_id: ID of the user's auth token - used for namespacing of transaction IDs - joined (bool): True if the user is joined to this room - will mean + joined: True if the user is joined to this room - will mean we handle ephemeral events - only_fields(list): Optional. The list of event fields to include. - event_formatter (func[dict]): function to convert from federation format + only_fields: Optional. The list of event fields to include. + event_formatter: function to convert from federation format to client format Returns: - dict[str, object]: the room, encoded in our response format + The room, encoded in our response format """ - def serialize(events): + def serialize(events: Iterable[EventBase]) -> Awaitable[List[JsonDict]]: return self._event_serializer.serialize_events( events, time_now=time_now, - # We don't bundle "live" events, as otherwise clients - # will end up double counting annotations. + # Don't bother to bundle aggregations if the timeline is unlimited, + # as clients will have all the necessary information. + # bundle_aggregations=room.timeline.limited, + # + # richvdh 2021-12-15: disable this temporarily as it has too high an + # overhead for initialsyncs. We need to figure out a way that the + # bundling can be done *before* the events are stored in the + # SyncResponseCache so that this part can be synchronous. + # + # Ensure to re-enable the test at tests/rest/client/test_relations.py::RelationsTestCase.test_bundled_aggregations. bundle_aggregations=False, token_id=token_id, event_format=event_formatter, @@ -507,7 +557,7 @@ def serialize(events): account_data = room.account_data - result = { + result: JsonDict = { "timeline": { "events": serialized_timeline, "prev_batch": await room.timeline.prev_batch.to_string(self.store), @@ -518,6 +568,7 @@ def serialize(events): } if joined: + assert isinstance(room, JoinedSyncResult) ephemeral_events = room.ephemeral result["ephemeral"] = {"events": ephemeral_events} result["unread_notifications"] = room.unread_notifications @@ -527,5 +578,5 @@ def serialize(events): return result -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: SyncRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/tags.py b/synapse/rest/client/tags.py similarity index 76% rename from synapse/rest/client/v2_alpha/tags.py rename to synapse/rest/client/tags.py index c14f83be1878..c88cb9367c5f 100644 --- a/synapse/rest/client/v2_alpha/tags.py +++ b/synapse/rest/client/tags.py @@ -13,12 +13,19 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple from synapse.api.errors import AuthError +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -29,12 +36,14 @@ class TagListServlet(RestServlet): PATTERNS = client_patterns("/user/(?P[^/]*)/rooms/(?P[^/]*)/tags") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastore() - async def on_GET(self, request, user_id, room_id): + async def on_GET( + self, request: SynapseRequest, user_id: str, room_id: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) if user_id != requester.user.to_string(): raise AuthError(403, "Cannot get tags for other users.") @@ -54,12 +63,14 @@ class TagServlet(RestServlet): "/user/(?P[^/]*)/rooms/(?P[^/]*)/tags/(?P[^/]*)" ) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.handler = hs.get_account_data_handler() - async def on_PUT(self, request, user_id, room_id, tag): + async def on_PUT( + self, request: SynapseRequest, user_id: str, room_id: str, tag: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) if user_id != requester.user.to_string(): raise AuthError(403, "Cannot add tags for other users.") @@ -70,7 +81,9 @@ async def on_PUT(self, request, user_id, room_id, tag): return 200, {} - async def on_DELETE(self, request, user_id, room_id, tag): + async def on_DELETE( + self, request: SynapseRequest, user_id: str, room_id: str, tag: str + ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) if user_id != requester.user.to_string(): raise AuthError(403, "Cannot add tags for other users.") @@ -80,6 +93,6 @@ async def on_DELETE(self, request, user_id, room_id, tag): return 200, {} -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: TagListServlet(hs).register(http_server) TagServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/thirdparty.py b/synapse/rest/client/thirdparty.py similarity index 74% rename from synapse/rest/client/v2_alpha/thirdparty.py rename to synapse/rest/client/thirdparty.py index b5c67c9bb67e..b895c73acf2c 100644 --- a/synapse/rest/client/v2_alpha/thirdparty.py +++ b/synapse/rest/client/thirdparty.py @@ -12,27 +12,33 @@ # See the License for the specific language governing permissions and # limitations under the License. - import logging +from typing import TYPE_CHECKING, Dict, List, Tuple from synapse.api.constants import ThirdPartyEntityKind +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class ThirdPartyProtocolsServlet(RestServlet): PATTERNS = client_patterns("/thirdparty/protocols") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.appservice_handler = hs.get_application_service_handler() - async def on_GET(self, request): + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: await self.auth.get_user_by_req(request, allow_guest=True) protocols = await self.appservice_handler.get_3pe_protocols() @@ -42,13 +48,15 @@ async def on_GET(self, request): class ThirdPartyProtocolServlet(RestServlet): PATTERNS = client_patterns("/thirdparty/protocol/(?P[^/]+)$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.appservice_handler = hs.get_application_service_handler() - async def on_GET(self, request, protocol): + async def on_GET( + self, request: SynapseRequest, protocol: str + ) -> Tuple[int, JsonDict]: await self.auth.get_user_by_req(request, allow_guest=True) protocols = await self.appservice_handler.get_3pe_protocols( @@ -63,16 +71,18 @@ async def on_GET(self, request, protocol): class ThirdPartyUserServlet(RestServlet): PATTERNS = client_patterns("/thirdparty/user(/(?P[^/]+))?$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.appservice_handler = hs.get_application_service_handler() - async def on_GET(self, request, protocol): + async def on_GET( + self, request: SynapseRequest, protocol: str + ) -> Tuple[int, List[JsonDict]]: await self.auth.get_user_by_req(request, allow_guest=True) - fields = request.args + fields: Dict[bytes, List[bytes]] = request.args # type: ignore[assignment] fields.pop(b"access_token", None) results = await self.appservice_handler.query_3pe( @@ -85,16 +95,18 @@ async def on_GET(self, request, protocol): class ThirdPartyLocationServlet(RestServlet): PATTERNS = client_patterns("/thirdparty/location(/(?P[^/]+))?$") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.appservice_handler = hs.get_application_service_handler() - async def on_GET(self, request, protocol): + async def on_GET( + self, request: SynapseRequest, protocol: str + ) -> Tuple[int, List[JsonDict]]: await self.auth.get_user_by_req(request, allow_guest=True) - fields = request.args + fields: Dict[bytes, List[bytes]] = request.args # type: ignore[assignment] fields.pop(b"access_token", None) results = await self.appservice_handler.query_3pe( @@ -104,7 +116,7 @@ async def on_GET(self, request, protocol): return 200, results -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: ThirdPartyProtocolsServlet(hs).register(http_server) ThirdPartyProtocolServlet(hs).register(http_server) ThirdPartyUserServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/tokenrefresh.py b/synapse/rest/client/tokenrefresh.py similarity index 75% rename from synapse/rest/client/v2_alpha/tokenrefresh.py rename to synapse/rest/client/tokenrefresh.py index b2f858545cbe..c8c3b25bd36f 100644 --- a/synapse/rest/client/v2_alpha/tokenrefresh.py +++ b/synapse/rest/client/tokenrefresh.py @@ -12,11 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import TYPE_CHECKING + +from twisted.web.server import Request + from synapse.api.errors import AuthError +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + class TokenRefreshRestServlet(RestServlet): """ @@ -26,12 +34,12 @@ class TokenRefreshRestServlet(RestServlet): PATTERNS = client_patterns("/tokenrefresh") - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() - async def on_POST(self, request): + async def on_POST(self, request: Request) -> None: raise AuthError(403, "tokenrefresh is no longer supported.") -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: TokenRefreshRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/transactions.py b/synapse/rest/client/transactions.py index 94ff3719ce88..914fb3acf5aa 100644 --- a/synapse/rest/client/transactions.py +++ b/synapse/rest/client/transactions.py @@ -15,28 +15,37 @@ """This module contains logic for storing HTTP PUT transactions. This is used to ensure idempotency when performing PUTs using the REST API.""" import logging +from typing import TYPE_CHECKING, Any, Awaitable, Callable, Dict, Tuple + +from twisted.python.failure import Failure +from twisted.web.server import Request from synapse.logging.context import make_deferred_yieldable, run_in_background +from synapse.types import JsonDict from synapse.util.async_helpers import ObservableDeferred +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) CLEANUP_PERIOD_MS = 1000 * 60 * 30 # 30 mins class HttpTransactionCache: - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self.hs = hs self.auth = self.hs.get_auth() self.clock = self.hs.get_clock() - self.transactions = { - # $txn_key: (ObservableDeferred<(res_code, res_json_body)>, timestamp) - } + # $txn_key: (ObservableDeferred<(res_code, res_json_body)>, timestamp) + self.transactions: Dict[ + str, Tuple[ObservableDeferred[Tuple[int, JsonDict]], int] + ] = {} # Try to clean entries every 30 mins. This means entries will exist # for at *LEAST* 30 mins, and at *MOST* 60 mins. self.cleaner = self.clock.looping_call(self._cleanup, CLEANUP_PERIOD_MS) - def _get_transaction_key(self, request): + def _get_transaction_key(self, request: Request) -> str: """A helper function which returns a transaction key that can be used with TransactionCache for idempotent requests. @@ -45,15 +54,21 @@ def _get_transaction_key(self, request): path and the access_token for the requesting user. Args: - request (twisted.web.http.Request): The incoming request. Must - contain an access_token. + request: The incoming request. Must contain an access_token. Returns: - str: A transaction key + A transaction key """ + assert request.path is not None token = self.auth.get_access_token_from_request(request) return request.path.decode("utf8") + "/" + token - def fetch_or_execute_request(self, request, fn, *args, **kwargs): + def fetch_or_execute_request( + self, + request: Request, + fn: Callable[..., Awaitable[Tuple[int, JsonDict]]], + *args: Any, + **kwargs: Any, + ) -> Awaitable[Tuple[int, JsonDict]]: """A helper function for fetch_or_execute which extracts a transaction key from the given request. @@ -64,15 +79,20 @@ def fetch_or_execute_request(self, request, fn, *args, **kwargs): self._get_transaction_key(request), fn, *args, **kwargs ) - def fetch_or_execute(self, txn_key, fn, *args, **kwargs): + def fetch_or_execute( + self, + txn_key: str, + fn: Callable[..., Awaitable[Tuple[int, JsonDict]]], + *args: Any, + **kwargs: Any, + ) -> Awaitable[Tuple[int, JsonDict]]: """Fetches the response for this transaction, or executes the given function to produce a response for this transaction. Args: - txn_key (str): A key to ensure idempotency should fetch_or_execute be - called again at a later point in time. - fn (function): A function which returns a tuple of - (response_code, response_dict). + txn_key: A key to ensure idempotency should fetch_or_execute be + called again at a later point in time. + fn: A function which returns a tuple of (response_code, response_dict). *args: Arguments to pass to fn. **kwargs: Keyword arguments to pass to fn. Returns: @@ -90,7 +110,7 @@ def fetch_or_execute(self, txn_key, fn, *args, **kwargs): # if the request fails with an exception, remove it # from the transaction map. This is done to ensure that we don't # cache transient errors like rate-limiting errors, etc. - def remove_from_map(err): + def remove_from_map(err: Failure) -> None: self.transactions.pop(txn_key, None) # we deliberately do not propagate the error any further, as we # expect the observers to have reported it. @@ -99,7 +119,7 @@ def remove_from_map(err): return make_deferred_yieldable(observable.observe()) - def _cleanup(self): + def _cleanup(self) -> None: now = self.clock.time_msec() for key in list(self.transactions): ts = self.transactions[key][1] diff --git a/synapse/rest/client/v2_alpha/user_directory.py b/synapse/rest/client/user_directory.py similarity index 82% rename from synapse/rest/client/v2_alpha/user_directory.py rename to synapse/rest/client/user_directory.py index 958d9ef21025..6e96eddacef9 100644 --- a/synapse/rest/client/v2_alpha/user_directory.py +++ b/synapse/rest/client/user_directory.py @@ -13,29 +13,32 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Tuple from synapse.api.errors import SynapseError +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from ._base import client_patterns +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class UserDirectorySearchRestServlet(RestServlet): PATTERNS = client_patterns("/user_directory/search$") - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() self.user_directory_handler = hs.get_user_directory_handler() - async def on_POST(self, request): + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: """Searches for users in directory Returns: @@ -55,7 +58,7 @@ async def on_POST(self, request): requester = await self.auth.get_user_by_req(request, allow_guest=False, allow_limited=False) user_id = requester.user.to_string() - if not self.hs.config.user_directory_search_enabled: + if not self.hs.config.userdirectory.user_directory_search_enabled: return 200, {"limited": False, "results": []} body = parse_json_object_from_request(request) @@ -75,5 +78,5 @@ async def on_POST(self, request): return 200, results -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: UserDirectorySearchRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/__init__.py b/synapse/rest/client/v1/__init__.py deleted file mode 100644 index 5e83dba2ed6f..000000000000 --- a/synapse/rest/client/v1/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/synapse/rest/client/v2_alpha/__init__.py b/synapse/rest/client/v2_alpha/__init__.py deleted file mode 100644 index 5e83dba2ed6f..000000000000 --- a/synapse/rest/client/v2_alpha/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index fa2e4e9cba48..8d888f456520 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -17,9 +17,17 @@ import logging import re +from typing import TYPE_CHECKING, Tuple + +from twisted.web.server import Request from synapse.api.constants import RoomCreationPreset +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -27,25 +35,25 @@ class VersionsRestServlet(RestServlet): PATTERNS = [re.compile("^/_matrix/client/versions$")] - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.config = hs.config # Calculate these once since they shouldn't change after start-up. self.e2ee_forced_public = ( RoomCreationPreset.PUBLIC_CHAT - in self.config.encryption_enabled_by_default_for_room_presets + in self.config.room.encryption_enabled_by_default_for_room_presets ) self.e2ee_forced_private = ( RoomCreationPreset.PRIVATE_CHAT - in self.config.encryption_enabled_by_default_for_room_presets + in self.config.room.encryption_enabled_by_default_for_room_presets ) self.e2ee_forced_trusted_private = ( RoomCreationPreset.TRUSTED_PRIVATE_CHAT - in self.config.encryption_enabled_by_default_for_room_presets + in self.config.room.encryption_enabled_by_default_for_room_presets ) - def on_GET(self, request): + def on_GET(self, request: Request) -> Tuple[int, JsonDict]: return ( 200, { @@ -64,6 +72,7 @@ def on_GET(self, request): "r0.4.0", "r0.5.0", "r0.6.0", + "r0.6.1", ], # as per MSC1497: "unstable_features": { @@ -89,5 +98,5 @@ def on_GET(self, request): ) -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: VersionsRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/voip.py b/synapse/rest/client/voip.py similarity index 69% rename from synapse/rest/client/v1/voip.py rename to synapse/rest/client/voip.py index c780ffded5e9..ea7e025156da 100644 --- a/synapse/rest/client/v1/voip.py +++ b/synapse/rest/client/voip.py @@ -15,29 +15,36 @@ import base64 import hashlib import hmac +from typing import TYPE_CHECKING, Tuple +from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet -from synapse.rest.client.v2_alpha._base import client_patterns +from synapse.http.site import SynapseRequest +from synapse.rest.client._base import client_patterns +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer class VoipRestServlet(RestServlet): PATTERNS = client_patterns("/voip/turnServer$", v1=True) - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() - async def on_GET(self, request): + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req( - request, self.hs.config.turn_allow_guests + request, self.hs.config.voip.turn_allow_guests ) - turnUris = self.hs.config.turn_uris - turnSecret = self.hs.config.turn_shared_secret - turnUsername = self.hs.config.turn_username - turnPassword = self.hs.config.turn_password - userLifetime = self.hs.config.turn_user_lifetime + turnUris = self.hs.config.voip.turn_uris + turnSecret = self.hs.config.voip.turn_shared_secret + turnUsername = self.hs.config.voip.turn_username + turnPassword = self.hs.config.voip.turn_password + userLifetime = self.hs.config.voip.turn_user_lifetime if turnUris and turnSecret and userLifetime: expiry = (self.hs.get_clock().time_msec() + userLifetime) / 1000 @@ -63,11 +70,11 @@ async def on_GET(self, request): { "username": username, "password": password, - "ttl": userLifetime / 1000, + "ttl": userLifetime // 1000, "uris": turnUris, }, ) -def register_servlets(hs, http_server): +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: VoipRestServlet(hs).register(http_server) diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py index 11f73208321b..3d2afacc50d9 100644 --- a/synapse/rest/consent/consent_resource.py +++ b/synapse/rest/consent/consent_resource.py @@ -17,17 +17,22 @@ from hashlib import sha256 from http import HTTPStatus from os import path -from typing import Dict, List +from typing import TYPE_CHECKING, Any, Dict, List import jinja2 from jinja2 import TemplateNotFound +from twisted.web.server import Request + from synapse.api.errors import NotFoundError, StoreError, SynapseError from synapse.config import ConfigError from synapse.http.server import DirectServeHtmlResource, respond_with_html from synapse.http.servlet import parse_bytes_from_args, parse_string from synapse.types import UserID +if TYPE_CHECKING: + from synapse.server import HomeServer + # language to use for the templates. TODO: figure this out from Accept-Language TEMPLATE_LANGUAGE = "en" @@ -69,11 +74,7 @@ class ConsentResource(DirectServeHtmlResource): against the user. """ - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): homeserver - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs @@ -83,14 +84,15 @@ def __init__(self, hs): # this is required by the request_handler wrapper self.clock = hs.get_clock() - self._default_consent_version = hs.config.user_consent_version - if self._default_consent_version is None: + # Consent must be configured to create this resource. + default_consent_version = hs.config.consent.user_consent_version + consent_template_directory = hs.config.consent.user_consent_template_dir + if default_consent_version is None or consent_template_directory is None: raise ConfigError( "Consent resource is enabled but user_consent section is " "missing in config file." ) - - consent_template_directory = hs.config.user_consent_template_dir + self._default_consent_version = default_consent_version # TODO: switch to synapse.util.templates.build_jinja_env loader = jinja2.FileSystemLoader(consent_template_directory) @@ -98,26 +100,22 @@ def __init__(self, hs): loader=loader, autoescape=jinja2.select_autoescape(["html", "htm", "xml"]) ) - if hs.config.form_secret is None: + if hs.config.key.form_secret is None: raise ConfigError( "Consent resource is enabled but form_secret is not set in " "config file. It should be set to an arbitrary secret string." ) - self._hmac_secret = hs.config.form_secret.encode("utf-8") + self._hmac_secret = hs.config.key.form_secret.encode("utf-8") - async def _async_render_GET(self, request): - """ - Args: - request (twisted.web.http.Request): - """ + async def _async_render_GET(self, request: Request) -> None: version = parse_string(request, "v", default=self._default_consent_version) username = parse_string(request, "u", default="") userhmac = None has_consented = False public_version = username == "" if not public_version: - args: Dict[bytes, List[bytes]] = request.args + args: Dict[bytes, List[bytes]] = request.args # type: ignore userhmac_bytes = parse_bytes_from_args(args, "h", required=True) self._check_hash(username, userhmac_bytes) @@ -147,14 +145,10 @@ async def _async_render_GET(self, request): except TemplateNotFound: raise NotFoundError("Unknown policy version") - async def _async_render_POST(self, request): - """ - Args: - request (twisted.web.http.Request): - """ + async def _async_render_POST(self, request: Request) -> None: version = parse_string(request, "v", required=True) username = parse_string(request, "u", required=True) - args: Dict[bytes, List[bytes]] = request.args + args: Dict[bytes, List[bytes]] = request.args # type: ignore userhmac = parse_bytes_from_args(args, "h", required=True) self._check_hash(username, userhmac) @@ -177,7 +171,9 @@ async def _async_render_POST(self, request): except TemplateNotFound: raise NotFoundError("success.html not found") - def _render_template(self, request, template_name, **template_args): + def _render_template( + self, request: Request, template_name: str, **template_args: Any + ) -> None: # get_template checks for ".." so we don't need to worry too much # about path traversal here. template_html = self._jinja_env.get_template( @@ -186,11 +182,11 @@ def _render_template(self, request, template_name, **template_args): html = template_html.render(**template_args) respond_with_html(request, 200, html) - def _check_hash(self, userid, userhmac): + def _check_hash(self, userid: str, userhmac: bytes) -> None: """ Args: - userid (unicode): - userhmac (bytes): + userid: + userhmac: Raises: SynapseError if the hash doesn't match diff --git a/synapse/rest/health.py b/synapse/rest/health.py index 4487b54abf39..78df7af2cf83 100644 --- a/synapse/rest/health.py +++ b/synapse/rest/health.py @@ -13,6 +13,7 @@ # limitations under the License. from twisted.web.resource import Resource +from twisted.web.server import Request class HealthResource(Resource): @@ -25,6 +26,6 @@ class HealthResource(Resource): isLeaf = 1 - def render_GET(self, request): + def render_GET(self, request: Request) -> bytes: request.setHeader(b"Content-Type", b"text/plain") return b"OK" diff --git a/synapse/rest/key/v2/__init__.py b/synapse/rest/key/v2/__init__.py index c6c63073eac0..7f8c1de1ff50 100644 --- a/synapse/rest/key/v2/__init__.py +++ b/synapse/rest/key/v2/__init__.py @@ -12,14 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import TYPE_CHECKING + from twisted.web.resource import Resource from .local_key_resource import LocalKey from .remote_key_resource import RemoteKey +if TYPE_CHECKING: + from synapse.server import HomeServer + class KeyApiV2Resource(Resource): - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): Resource.__init__(self) self.putChild(b"server", LocalKey(hs)) self.putChild(b"query", RemoteKey(hs)) diff --git a/synapse/rest/key/v2/local_key_resource.py b/synapse/rest/key/v2/local_key_resource.py index a5fcd15e3af4..12b3ae120cdb 100644 --- a/synapse/rest/key/v2/local_key_resource.py +++ b/synapse/rest/key/v2/local_key_resource.py @@ -12,16 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. - import logging +from typing import TYPE_CHECKING from canonicaljson import encode_canonical_json from signedjson.sign import sign_json from unpaddedbase64 import encode_base64 from twisted.web.resource import Resource +from twisted.web.server import Request from synapse.http.server import respond_with_json_bytes +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -58,26 +63,26 @@ class LocalKey(Resource): isLeaf = True - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self.config = hs.config self.clock = hs.get_clock() self.update_response_body(self.clock.time_msec()) Resource.__init__(self) - def update_response_body(self, time_now_msec): - refresh_interval = self.config.key_refresh_interval + def update_response_body(self, time_now_msec: int) -> None: + refresh_interval = self.config.key.key_refresh_interval self.valid_until_ts = int(time_now_msec + refresh_interval) self.response_body = encode_canonical_json(self.response_json_object()) - def response_json_object(self): + def response_json_object(self) -> JsonDict: verify_keys = {} - for key in self.config.signing_key: + for key in self.config.key.signing_key: verify_key_bytes = key.verify_key.encode() key_id = "%s:%s" % (key.alg, key.version) verify_keys[key_id] = {"key": encode_base64(verify_key_bytes)} old_verify_keys = {} - for key_id, key in self.config.old_signing_keys.items(): + for key_id, key in self.config.key.old_signing_keys.items(): verify_key_bytes = key.encode() old_verify_keys[key_id] = { "key": encode_base64(verify_key_bytes), @@ -86,17 +91,17 @@ def response_json_object(self): json_object = { "valid_until_ts": self.valid_until_ts, - "server_name": self.config.server_name, + "server_name": self.config.server.server_name, "verify_keys": verify_keys, "old_verify_keys": old_verify_keys, } - for key in self.config.signing_key: - json_object = sign_json(json_object, self.config.server_name, key) + for key in self.config.key.signing_key: + json_object = sign_json(json_object, self.config.server.server_name, key) return json_object - def render_GET(self, request): + def render_GET(self, request: Request) -> int: time_now = self.clock.time_msec() # Update the expiry time if less than half the interval remains. - if time_now + self.config.key_refresh_interval / 2 > self.valid_until_ts: + if time_now + self.config.key.key_refresh_interval / 2 > self.valid_until_ts: self.update_response_body(time_now) return respond_with_json_bytes(request, 200, self.response_body) diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py index 63a40b18528f..3923ba8439b0 100644 --- a/synapse/rest/key/v2/remote_key_resource.py +++ b/synapse/rest/key/v2/remote_key_resource.py @@ -13,7 +13,7 @@ # limitations under the License. import logging -from typing import Dict +from typing import TYPE_CHECKING, Dict from signedjson.sign import sign_json @@ -21,9 +21,14 @@ from synapse.crypto.keyring import ServerKeyFetcher from synapse.http.server import DirectServeJsonResource, respond_with_json from synapse.http.servlet import parse_integer, parse_json_object_from_request +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from synapse.util import json_decoder from synapse.util.async_helpers import yieldable_gather_results +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -85,16 +90,19 @@ class RemoteKey(DirectServeJsonResource): isLeaf = True - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.fetcher = ServerKeyFetcher(hs) self.store = hs.get_datastore() self.clock = hs.get_clock() - self.federation_domain_whitelist = hs.config.federation_domain_whitelist + self.federation_domain_whitelist = ( + hs.config.federation.federation_domain_whitelist + ) self.config = hs.config - async def _async_render_GET(self, request): + async def _async_render_GET(self, request: SynapseRequest) -> None: + assert request.postpath is not None if len(request.postpath) == 1: (server,) = request.postpath query: dict = {server.decode("ascii"): {}} @@ -110,14 +118,19 @@ async def _async_render_GET(self, request): await self.query_keys(request, query, query_remote_on_cache_miss=True) - async def _async_render_POST(self, request): + async def _async_render_POST(self, request: SynapseRequest) -> None: content = parse_json_object_from_request(request) query = content["server_keys"] await self.query_keys(request, query, query_remote_on_cache_miss=True) - async def query_keys(self, request, query, query_remote_on_cache_miss=False): + async def query_keys( + self, + request: SynapseRequest, + query: JsonDict, + query_remote_on_cache_miss: bool = False, + ) -> None: logger.info("Handling query for keys %r", query) store_queries = [] @@ -142,8 +155,8 @@ async def query_keys(self, request, query, query_remote_on_cache_miss=False): # Note that the value is unused. cache_misses: Dict[str, Dict[str, int]] = {} - for (server_name, key_id, _), results in cached.items(): - results = [(result["ts_added_ms"], result) for result in results] + for (server_name, key_id, _), key_results in cached.items(): + results = [(result["ts_added_ms"], result) for result in key_results] if not results and key_id is not None: cache_misses.setdefault(server_name, {})[key_id] = 0 @@ -223,11 +236,13 @@ async def query_keys(self, request, query, query_remote_on_cache_miss=False): signed_keys = [] for key_json in json_results: key_json = json_decoder.decode(key_json.decode("utf-8")) - for signing_key in self.config.key_server_signing_keys: - key_json = sign_json(key_json, self.config.server_name, signing_key) + for signing_key in self.config.key.key_server_signing_keys: + key_json = sign_json( + key_json, self.config.server.server_name, signing_key + ) signed_keys.append(key_json) - results = {"server_keys": signed_keys} + response = {"server_keys": signed_keys} - respond_with_json(request, 200, results, canonical_json=True) + respond_with_json(request, 200, response, canonical_json=True) diff --git a/synapse/rest/media/v1/__init__.py b/synapse/rest/media/v1/__init__.py index 3dd16d4bb542..d5b74cddf125 100644 --- a/synapse/rest/media/v1/__init__.py +++ b/synapse/rest/media/v1/__init__.py @@ -12,33 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -import PIL.Image +from PIL.features import check_codec # check for JPEG support. -try: - PIL.Image._getdecoder("rgb", "jpeg", None) -except OSError as e: - if str(e).startswith("decoder jpeg not available"): - raise Exception( - "FATAL: jpeg codec not supported. Install pillow correctly! " - " 'sudo apt-get install libjpeg-dev' then 'pip uninstall pillow &&" - " pip install pillow --user'" - ) -except Exception: - # any other exception is fine - pass +if not check_codec("jpg"): + raise Exception( + "FATAL: jpeg codec not supported. Install pillow correctly! " + " 'sudo apt-get install libjpeg-dev' then 'pip uninstall pillow &&" + " pip install pillow --user'" + ) # check for PNG support. -try: - PIL.Image._getdecoder("rgb", "zip", None) -except OSError as e: - if str(e).startswith("decoder zip not available"): - raise Exception( - "FATAL: zip codec not supported. Install pillow correctly! " - " 'sudo apt-get install libjpeg-dev' then 'pip uninstall pillow &&" - " pip install pillow --user'" - ) -except Exception: - # any other exception is fine - pass +if not check_codec("zlib"): + raise Exception( + "FATAL: zip codec not supported. Install pillow correctly! " + " 'sudo apt-get install libjpeg-dev' then 'pip uninstall pillow &&" + " pip install pillow --user'" + ) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 90364ebcf70d..9b40fd8a6c23 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -16,7 +16,10 @@ import logging import os import urllib -from typing import Awaitable, Dict, Generator, List, Optional, Tuple +from types import TracebackType +from typing import Awaitable, Dict, Generator, List, Optional, Tuple, Type + +import attr from twisted.internet.interfaces import IConsumer from twisted.protocols.basic import FileSender @@ -24,8 +27,9 @@ from synapse.api.errors import Codes, SynapseError, cs_error from synapse.http.server import finish_request, respond_with_json +from synapse.http.site import SynapseRequest from synapse.logging.context import make_deferred_yieldable -from synapse.util.stringutils import is_ascii +from synapse.util.stringutils import is_ascii, parse_and_validate_server_name logger = logging.getLogger(__name__) @@ -47,6 +51,19 @@ def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: + """Parses the server name, media ID and optional file name from the request URI + + Also performs some rough validation on the server name. + + Args: + request: The `Request`. + + Returns: + A tuple containing the parsed server name, media ID and optional file name. + + Raises: + SynapseError(404): if parsing or validation fail for any reason + """ try: # The type on postpath seems incorrect in Twisted 21.2.0. postpath: List[bytes] = request.postpath # type: ignore @@ -58,6 +75,9 @@ def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: server_name = server_name_bytes.decode("utf-8") media_id = media_id_bytes.decode("utf8") + # Validate the server name, raising if invalid + parse_and_validate_server_name(server_name) + file_name = None if len(postpath) > 2: try: @@ -71,7 +91,7 @@ def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: ) -def respond_404(request: Request) -> None: +def respond_404(request: SynapseRequest) -> None: respond_with_json( request, 404, @@ -81,7 +101,7 @@ def respond_404(request: Request) -> None: async def respond_with_file( - request: Request, + request: SynapseRequest, media_type: str, file_path: str, file_size: Optional[int] = None, @@ -120,7 +140,7 @@ def add_file_headers( upload_name: The name of the requested file, if any. """ - def _quote(x): + def _quote(x: str) -> str: return urllib.parse.quote(x.encode("utf-8")) # Default to a UTF-8 charset for text content types. @@ -218,7 +238,7 @@ def _can_encode_filename_as_token(x: str) -> bool: async def respond_with_responder( - request: Request, + request: SynapseRequest, responder: "Optional[Responder]", media_type: str, file_size: Optional[int], @@ -280,51 +300,74 @@ def write_to_consumer(self, consumer: IConsumer) -> Awaitable: """ pass - def __enter__(self): + def __enter__(self) -> None: pass - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: pass -class FileInfo: - """Details about a requested/uploaded file. - - Attributes: - server_name (str): The server name where the media originated from, - or None if local. - file_id (str): The local ID of the file. For local files this is the - same as the media_id - url_cache (bool): If the file is for the url preview cache - thumbnail (bool): Whether the file is a thumbnail or not. - thumbnail_width (int) - thumbnail_height (int) - thumbnail_method (str) - thumbnail_type (str): Content type of thumbnail, e.g. image/png - thumbnail_length (int): The size of the media file, in bytes. - """ +@attr.s(slots=True, frozen=True, auto_attribs=True) +class ThumbnailInfo: + """Details about a generated thumbnail.""" - def __init__( - self, - server_name, - file_id, - url_cache=False, - thumbnail=False, - thumbnail_width=None, - thumbnail_height=None, - thumbnail_method=None, - thumbnail_type=None, - thumbnail_length=None, - ): - self.server_name = server_name - self.file_id = file_id - self.url_cache = url_cache - self.thumbnail = thumbnail - self.thumbnail_width = thumbnail_width - self.thumbnail_height = thumbnail_height - self.thumbnail_method = thumbnail_method - self.thumbnail_type = thumbnail_type - self.thumbnail_length = thumbnail_length + width: int + height: int + method: str + # Content type of thumbnail, e.g. image/png + type: str + # The size of the media file, in bytes. + length: Optional[int] = None + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class FileInfo: + """Details about a requested/uploaded file.""" + + # The server name where the media originated from, or None if local. + server_name: Optional[str] + # The local ID of the file. For local files this is the same as the media_id + file_id: str + # If the file is for the url preview cache + url_cache: bool = False + # Whether the file is a thumbnail or not. + thumbnail: Optional[ThumbnailInfo] = None + + # The below properties exist to maintain compatibility with third-party modules. + @property + def thumbnail_width(self) -> Optional[int]: + if not self.thumbnail: + return None + return self.thumbnail.width + + @property + def thumbnail_height(self) -> Optional[int]: + if not self.thumbnail: + return None + return self.thumbnail.height + + @property + def thumbnail_method(self) -> Optional[str]: + if not self.thumbnail: + return None + return self.thumbnail.method + + @property + def thumbnail_type(self) -> Optional[str]: + if not self.thumbnail: + return None + return self.thumbnail.type + + @property + def thumbnail_length(self) -> Optional[int]: + if not self.thumbnail: + return None + return self.thumbnail.length def get_filename_from_headers(headers: Dict[bytes, List[bytes]]) -> Optional[str]: diff --git a/synapse/rest/media/v1/config_resource.py b/synapse/rest/media/v1/config_resource.py index a1d36e5cf189..a95804d32775 100644 --- a/synapse/rest/media/v1/config_resource.py +++ b/synapse/rest/media/v1/config_resource.py @@ -16,8 +16,6 @@ from typing import TYPE_CHECKING -from twisted.web.server import Request - from synapse.http.server import DirectServeJsonResource, respond_with_json from synapse.http.site import SynapseRequest @@ -33,11 +31,11 @@ def __init__(self, hs: "HomeServer"): config = hs.config self.clock = hs.get_clock() self.auth = hs.get_auth() - self.limits_dict = {"m.upload.size": config.max_upload_size} + self.limits_dict = {"m.upload.size": config.media.max_upload_size} async def _async_render_GET(self, request: SynapseRequest) -> None: await self.auth.get_user_by_req(request) respond_with_json(request, 200, self.limits_dict, send_cors=True) - async def _async_render_OPTIONS(self, request: Request) -> None: + async def _async_render_OPTIONS(self, request: SynapseRequest) -> None: respond_with_json(request, 200, {}, send_cors=True) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index d6d938953e44..6180fa575ebc 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -15,10 +15,9 @@ import logging from typing import TYPE_CHECKING -from twisted.web.server import Request - from synapse.http.server import DirectServeJsonResource, set_cors_headers from synapse.http.servlet import parse_boolean +from synapse.http.site import SynapseRequest from ._base import parse_media_id, respond_404 @@ -37,7 +36,7 @@ def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"): self.media_repo = media_repo self.server_name = hs.hostname - async def _async_render_GET(self, request: Request) -> None: + async def _async_render_GET(self, request: SynapseRequest) -> None: set_cors_headers(request) request.setHeader( b"Content-Security-Policy", diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 09531ebf548d..1f6441c412d0 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -16,22 +16,133 @@ import functools import os import re -from typing import Callable, List +import string +from typing import Any, Callable, List, TypeVar, Union, cast NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d") -def _wrap_in_base_path(func: Callable[..., str]) -> Callable[..., str]: +F = TypeVar("F", bound=Callable[..., str]) + + +def _wrap_in_base_path(func: F) -> F: """Takes a function that returns a relative path and turns it into an absolute path based on the location of the primary media store """ @functools.wraps(func) - def _wrapped(self, *args, **kwargs): + def _wrapped(self: "MediaFilePaths", *args: Any, **kwargs: Any) -> str: path = func(self, *args, **kwargs) return os.path.join(self.base_path, path) - return _wrapped + return cast(F, _wrapped) + + +GetPathMethod = TypeVar( + "GetPathMethod", bound=Union[Callable[..., str], Callable[..., List[str]]] +) + + +def _wrap_with_jail_check(relative: bool) -> Callable[[GetPathMethod], GetPathMethod]: + """Wraps a path-returning method to check that the returned path(s) do not escape + the media store directory. + + The path-returning method may return either a single path, or a list of paths. + + The check is not expected to ever fail, unless `func` is missing a call to + `_validate_path_component`, or `_validate_path_component` is buggy. + + Args: + relative: A boolean indicating whether the wrapped method returns paths relative + to the media store directory. + + Returns: + A method which will wrap a path-returning method, adding a check to ensure that + the returned path(s) lie within the media store directory. The check will raise + a `ValueError` if it fails. + """ + + def _wrap_with_jail_check_inner(func: GetPathMethod) -> GetPathMethod: + @functools.wraps(func) + def _wrapped( + self: "MediaFilePaths", *args: Any, **kwargs: Any + ) -> Union[str, List[str]]: + path_or_paths = func(self, *args, **kwargs) + + if isinstance(path_or_paths, list): + paths_to_check = path_or_paths + else: + paths_to_check = [path_or_paths] + + for path in paths_to_check: + # Construct the path that will ultimately be used. + # We cannot guess whether `path` is relative to the media store + # directory, since the media store directory may itself be a relative + # path. + if relative: + path = os.path.join(self.base_path, path) + normalized_path = os.path.normpath(path) + + # Now that `normpath` has eliminated `../`s and `./`s from the path, + # `os.path.commonpath` can be used to check whether it lies within the + # media store directory. + if ( + os.path.commonpath([normalized_path, self.normalized_base_path]) + != self.normalized_base_path + ): + # The path resolves to outside the media store directory, + # or `self.base_path` is `.`, which is an unlikely configuration. + raise ValueError(f"Invalid media store path: {path!r}") + + # Note that `os.path.normpath`/`abspath` has a subtle caveat: + # `a/b/c/../c` will normalize to `a/b/c`, but the former refers to a + # different path if `a/b/c` is a symlink. That is, the check above is + # not perfect and may allow a certain restricted subset of untrustworthy + # paths through. Since the check above is secondary to the main + # `_validate_path_component` checks, it's less important for it to be + # perfect. + # + # As an alternative, `os.path.realpath` will resolve symlinks, but + # proves problematic if there are symlinks inside the media store. + # eg. if `url_store/` is symlinked to elsewhere, its canonical path + # won't match that of the main media store directory. + + return path_or_paths + + return cast(GetPathMethod, _wrapped) + + return _wrap_with_jail_check_inner + + +ALLOWED_CHARACTERS = set( + string.ascii_letters + + string.digits + + "_-" + + ".[]:" # Domain names, IPv6 addresses and ports in server names +) +FORBIDDEN_NAMES = { + "", + os.path.curdir, # "." for the current platform + os.path.pardir, # ".." for the current platform +} + + +def _validate_path_component(name: str) -> str: + """Checks that the given string can be safely used as a path component + + Args: + name: The path component to check. + + Returns: + The path component if valid. + + Raises: + ValueError: If `name` cannot be safely used as a path component. + """ + if not ALLOWED_CHARACTERS.issuperset(name) or name in FORBIDDEN_NAMES: + raise ValueError(f"Invalid path component: {name!r}") + + return name class MediaFilePaths: @@ -44,40 +155,45 @@ class MediaFilePaths: def __init__(self, primary_base_path: str): self.base_path = primary_base_path - - def default_thumbnail_rel( - self, - default_top_level: str, - default_sub_type: str, - width: int, - height: int, - content_type: str, - method: str, - ) -> str: - top_level_type, sub_type = content_type.split("/") - file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method) + self.normalized_base_path = os.path.normpath(self.base_path) + + # Refuse to initialize if paths cannot be validated correctly for the current + # platform. + assert os.path.sep not in ALLOWED_CHARACTERS + assert os.path.altsep not in ALLOWED_CHARACTERS + # On Windows, paths have all sorts of weirdness which `_validate_path_component` + # does not consider. In any case, the remote media store can't work correctly + # for certain homeservers there, since ":"s aren't allowed in paths. + assert os.name == "posix" + + @_wrap_with_jail_check(relative=True) + def local_media_filepath_rel(self, media_id: str) -> str: return os.path.join( - "default_thumbnails", default_top_level, default_sub_type, file_name + "local_content", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), ) - default_thumbnail = _wrap_in_base_path(default_thumbnail_rel) - - def local_media_filepath_rel(self, media_id: str) -> str: - return os.path.join("local_content", media_id[0:2], media_id[2:4], media_id[4:]) - local_media_filepath = _wrap_in_base_path(local_media_filepath_rel) + @_wrap_with_jail_check(relative=True) def local_media_thumbnail_rel( self, media_id: str, width: int, height: int, content_type: str, method: str ) -> str: top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method) return os.path.join( - "local_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], file_name + "local_thumbnails", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + _validate_path_component(file_name), ) local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel) + @_wrap_with_jail_check(relative=False) def local_media_thumbnail_dir(self, media_id: str) -> str: """ Retrieve the local store path of thumbnails of a given media_id @@ -90,18 +206,24 @@ def local_media_thumbnail_dir(self, media_id: str) -> str: return os.path.join( self.base_path, "local_thumbnails", - media_id[0:2], - media_id[2:4], - media_id[4:], + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), ) + @_wrap_with_jail_check(relative=True) def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str: return os.path.join( - "remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:] + "remote_content", + _validate_path_component(server_name), + _validate_path_component(file_id[0:2]), + _validate_path_component(file_id[2:4]), + _validate_path_component(file_id[4:]), ) remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel) + @_wrap_with_jail_check(relative=True) def remote_media_thumbnail_rel( self, server_name: str, @@ -115,11 +237,11 @@ def remote_media_thumbnail_rel( file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method) return os.path.join( "remote_thumbnail", - server_name, - file_id[0:2], - file_id[2:4], - file_id[4:], - file_name, + _validate_path_component(server_name), + _validate_path_component(file_id[0:2]), + _validate_path_component(file_id[2:4]), + _validate_path_component(file_id[4:]), + _validate_path_component(file_name), ) remote_media_thumbnail = _wrap_in_base_path(remote_media_thumbnail_rel) @@ -127,50 +249,75 @@ def remote_media_thumbnail_rel( # Legacy path that was used to store thumbnails previously. # Should be removed after some time, when most of the thumbnails are stored # using the new path. + @_wrap_with_jail_check(relative=True) def remote_media_thumbnail_rel_legacy( self, server_name: str, file_id: str, width: int, height: int, content_type: str - ): + ) -> str: top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type) return os.path.join( "remote_thumbnail", - server_name, - file_id[0:2], - file_id[2:4], - file_id[4:], - file_name, + _validate_path_component(server_name), + _validate_path_component(file_id[0:2]), + _validate_path_component(file_id[2:4]), + _validate_path_component(file_id[4:]), + _validate_path_component(file_name), ) + @_wrap_with_jail_check(relative=False) def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str: return os.path.join( self.base_path, "remote_thumbnail", - server_name, - file_id[0:2], - file_id[2:4], - file_id[4:], + _validate_path_component(server_name), + _validate_path_component(file_id[0:2]), + _validate_path_component(file_id[2:4]), + _validate_path_component(file_id[4:]), ) + @_wrap_with_jail_check(relative=True) def url_cache_filepath_rel(self, media_id: str) -> str: if NEW_FORMAT_ID_RE.match(media_id): # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf - return os.path.join("url_cache", media_id[:10], media_id[11:]) + return os.path.join( + "url_cache", + _validate_path_component(media_id[:10]), + _validate_path_component(media_id[11:]), + ) else: - return os.path.join("url_cache", media_id[0:2], media_id[2:4], media_id[4:]) + return os.path.join( + "url_cache", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + ) url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel) + @_wrap_with_jail_check(relative=False) def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]: "The dirs to try and remove if we delete the media_id file" if NEW_FORMAT_ID_RE.match(media_id): - return [os.path.join(self.base_path, "url_cache", media_id[:10])] + return [ + os.path.join( + self.base_path, "url_cache", _validate_path_component(media_id[:10]) + ) + ] else: return [ - os.path.join(self.base_path, "url_cache", media_id[0:2], media_id[2:4]), - os.path.join(self.base_path, "url_cache", media_id[0:2]), + os.path.join( + self.base_path, + "url_cache", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + ), + os.path.join( + self.base_path, "url_cache", _validate_path_component(media_id[0:2]) + ), ] + @_wrap_with_jail_check(relative=True) def url_cache_thumbnail_rel( self, media_id: str, width: int, height: int, content_type: str, method: str ) -> str: @@ -182,36 +329,46 @@ def url_cache_thumbnail_rel( if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - "url_cache_thumbnails", media_id[:10], media_id[11:], file_name + "url_cache_thumbnails", + _validate_path_component(media_id[:10]), + _validate_path_component(media_id[11:]), + _validate_path_component(file_name), ) else: return os.path.join( "url_cache_thumbnails", - media_id[0:2], - media_id[2:4], - media_id[4:], - file_name, + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + _validate_path_component(file_name), ) url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel) - def url_cache_thumbnail_directory(self, media_id: str) -> str: + @_wrap_with_jail_check(relative=True) + def url_cache_thumbnail_directory_rel(self, media_id: str) -> str: # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:] + "url_cache_thumbnails", + _validate_path_component(media_id[:10]), + _validate_path_component(media_id[11:]), ) else: return os.path.join( - self.base_path, "url_cache_thumbnails", - media_id[0:2], - media_id[2:4], - media_id[4:], + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), ) + url_cache_thumbnail_directory = _wrap_in_base_path( + url_cache_thumbnail_directory_rel + ) + + @_wrap_with_jail_check(relative=False) def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]: "The dirs to try and remove if we delete the media_id thumbnails" # Media id is of the form @@ -219,21 +376,35 @@ def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]: if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:] + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[:10]), + _validate_path_component(media_id[11:]), + ), + os.path.join( + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[:10]), ), - os.path.join(self.base_path, "url_cache_thumbnails", media_id[:10]), ] else: return [ os.path.join( self.base_path, "url_cache_thumbnails", - media_id[0:2], - media_id[2:4], - media_id[4:], + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), ), os.path.join( - self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4] + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + ), + os.path.join( + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[0:2]), ), - os.path.join(self.base_path, "url_cache_thumbnails", media_id[0:2]), ] diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 4f702f890c1c..244ba261bbc4 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -21,8 +21,8 @@ import twisted.internet.error import twisted.web.http +from twisted.internet.defer import Deferred from twisted.web.resource import Resource -from twisted.web.server import Request from synapse.api.errors import ( FederationDeniedError, @@ -32,6 +32,8 @@ SynapseError, ) from synapse.config._base import ConfigError +from synapse.config.repository import ThumbnailRequirement +from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import UserID @@ -42,6 +44,7 @@ from ._base import ( FileInfo, Responder, + ThumbnailInfo, get_filename_from_headers, respond_404, respond_with_responder, @@ -73,29 +76,35 @@ def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() self.server_name = hs.hostname self.store = hs.get_datastore() - self.max_upload_size = hs.config.max_upload_size - self.max_image_pixels = hs.config.max_image_pixels + self.max_upload_size = hs.config.media.max_upload_size + self.max_image_pixels = hs.config.media.max_image_pixels Thumbnailer.set_limits(self.max_image_pixels) - self.primary_base_path: str = hs.config.media_store_path + self.primary_base_path: str = hs.config.media.media_store_path self.filepaths: MediaFilePaths = MediaFilePaths(self.primary_base_path) - self.dynamic_thumbnails = hs.config.dynamic_thumbnails - self.thumbnail_requirements = hs.config.thumbnail_requirements + self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails + self.thumbnail_requirements = hs.config.media.thumbnail_requirements self.remote_media_linearizer = Linearizer(name="media_remote") self.recently_accessed_remotes: Set[Tuple[str, str]] = set() self.recently_accessed_locals: Set[str] = set() - self.federation_domain_whitelist = hs.config.federation_domain_whitelist + self.federation_domain_whitelist = ( + hs.config.federation.federation_domain_whitelist + ) # List of StorageProviders where we should search for media and # potentially upload to. storage_providers = [] - for clz, provider_config, wrapper_config in hs.config.media_storage_providers: + for ( + clz, + provider_config, + wrapper_config, + ) in hs.config.media.media_storage_providers: backend = clz(hs, provider_config) provider = StorageProviderWrapper( backend, @@ -113,7 +122,7 @@ def __init__(self, hs: "HomeServer"): self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS ) - def _start_update_recently_accessed(self): + def _start_update_recently_accessed(self) -> Deferred: return run_as_background_process( "update_recently_accessed_media", self._update_recently_accessed ) @@ -184,7 +193,7 @@ async def create_content( return "mxc://%s/%s" % (self.server_name, media_id) async def get_local_media( - self, request: Request, media_id: str, name: Optional[str] + self, request: SynapseRequest, media_id: str, name: Optional[str] ) -> None: """Responds to requests for local media, if exists, or returns 404. @@ -206,11 +215,13 @@ async def get_local_media( self.mark_recently_accessed(None, media_id) media_type = media_info["media_type"] + if not media_type: + media_type = "application/octet-stream" media_length = media_info["media_length"] upload_name = name if name else media_info["upload_name"] url_cache = media_info["url_cache"] - file_info = FileInfo(None, media_id, url_cache=url_cache) + file_info = FileInfo(None, media_id, url_cache=bool(url_cache)) responder = await self.media_storage.fetch_media(file_info) await respond_with_responder( @@ -218,7 +229,11 @@ async def get_local_media( ) async def get_remote_media( - self, request: Request, server_name: str, media_id: str, name: Optional[str] + self, + request: SynapseRequest, + server_name: str, + media_id: str, + name: Optional[str], ) -> None: """Respond to requests for remote media. @@ -320,6 +335,9 @@ async def _get_remote_media_impl( logger.info("Media is quarantined") raise NotFoundError() + if not media_info["media_type"]: + media_info["media_type"] = "application/octet-stream" + responder = await self.media_storage.fetch_media(file_info) if responder: return responder, media_info @@ -341,6 +359,8 @@ async def _get_remote_media_impl( raise e file_id = media_info["filesystem_id"] + if not media_info["media_type"]: + media_info["media_type"] = "application/octet-stream" file_info = FileInfo(server_name, file_id) # We generate thumbnails even if another process downloaded the media @@ -432,7 +452,10 @@ async def _download_remote_file( await finish() - media_type = headers[b"Content-Type"][0].decode("ascii") + if b"Content-Type" in headers: + media_type = headers[b"Content-Type"][0].decode("ascii") + else: + media_type = "application/octet-stream" upload_name = get_filename_from_headers(headers) time_now_ms = self.clock.time_msec() @@ -468,7 +491,9 @@ async def _download_remote_file( return media_info - def _get_thumbnail_requirements(self, media_type): + def _get_thumbnail_requirements( + self, media_type: str + ) -> Tuple[ThumbnailRequirement, ...]: scpos = media_type.find(";") if scpos > 0: media_type = media_type[:scpos] @@ -514,7 +539,7 @@ async def generate_local_exact_thumbnail( t_height: int, t_method: str, t_type: str, - url_cache: Optional[str], + url_cache: bool, ) -> Optional[str]: input_path = await self.media_storage.ensure_media_is_in_local_cache( FileInfo(None, media_id, url_cache=url_cache) @@ -548,11 +573,12 @@ async def generate_local_exact_thumbnail( server_name=None, file_id=media_id, url_cache=url_cache, - thumbnail=True, - thumbnail_width=t_width, - thumbnail_height=t_height, - thumbnail_method=t_method, - thumbnail_type=t_type, + thumbnail=ThumbnailInfo( + width=t_width, + height=t_height, + method=t_method, + type=t_type, + ), ) output_path = await self.media_storage.store_file( @@ -585,7 +611,7 @@ async def generate_remote_exact_thumbnail( t_type: str, ) -> Optional[str]: input_path = await self.media_storage.ensure_media_is_in_local_cache( - FileInfo(server_name, file_id, url_cache=False) + FileInfo(server_name, file_id) ) try: @@ -616,11 +642,12 @@ async def generate_remote_exact_thumbnail( file_info = FileInfo( server_name=server_name, file_id=file_id, - thumbnail=True, - thumbnail_width=t_width, - thumbnail_height=t_height, - thumbnail_method=t_method, - thumbnail_type=t_type, + thumbnail=ThumbnailInfo( + width=t_width, + height=t_height, + method=t_method, + type=t_type, + ), ) output_path = await self.media_storage.store_file( @@ -742,12 +769,13 @@ async def _generate_thumbnails( file_info = FileInfo( server_name=server_name, file_id=file_id, - thumbnail=True, - thumbnail_width=t_width, - thumbnail_height=t_height, - thumbnail_method=t_method, - thumbnail_type=t_type, url_cache=url_cache, + thumbnail=ThumbnailInfo( + width=t_width, + height=t_height, + method=t_method, + type=t_type, + ), ) with self.media_storage.store_into_file(file_info) as (f, fname, finish): @@ -836,7 +864,9 @@ async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]: return {"deleted": deleted} - async def delete_local_media(self, media_id: str) -> Tuple[List[str], int]: + async def delete_local_media_ids( + self, media_ids: List[str] + ) -> Tuple[List[str], int]: """ Delete the given local or remote media ID from this server @@ -845,7 +875,7 @@ async def delete_local_media(self, media_id: str) -> Tuple[List[str], int]: Returns: A tuple of (list of deleted media IDs, total deleted media IDs). """ - return await self._remove_local_media_from_disk([media_id]) + return await self._remove_local_media_from_disk(media_ids) async def delete_old_local_media( self, @@ -959,7 +989,7 @@ class MediaRepositoryResource(Resource): def __init__(self, hs: "HomeServer"): # If we're not configured to use it, raise if we somehow got here. - if not hs.config.can_load_media_repo: + if not hs.config.media.can_load_media_repo: raise ConfigError("Synapse is not configured to use a media repo.") super().__init__() @@ -970,7 +1000,7 @@ def __init__(self, hs: "HomeServer"): self.putChild( b"thumbnail", ThumbnailResource(hs, media_repo, media_repo.media_storage) ) - if hs.config.url_preview_enabled: + if hs.config.media.url_preview_enabled: self.putChild( b"preview_url", PreviewUrlResource(hs, media_repo, media_repo.media_storage), diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 56cdc1b4edd3..fca239d8c7ec 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -15,7 +15,20 @@ import logging import os import shutil -from typing import IO, TYPE_CHECKING, Any, Callable, Optional, Sequence +from types import TracebackType +from typing import ( + IO, + TYPE_CHECKING, + Any, + Awaitable, + BinaryIO, + Callable, + Generator, + Optional, + Sequence, + Tuple, + Type, +) import attr @@ -83,12 +96,14 @@ async def store_file(self, source: IO, file_info: FileInfo) -> str: return fname - async def write_to_file(self, source: IO, output: IO): + async def write_to_file(self, source: IO, output: IO) -> None: """Asynchronously write the `source` to `output`.""" await defer_to_thread(self.reactor, _write_file_synchronously, source, output) @contextlib.contextmanager - def store_into_file(self, file_info: FileInfo): + def store_into_file( + self, file_info: FileInfo + ) -> Generator[Tuple[BinaryIO, str, Callable[[], Awaitable[None]]], None, None]: """Context manager used to get a file like object to write into, as described by file_info. @@ -117,15 +132,14 @@ def store_into_file(self, file_info: FileInfo): fname = os.path.join(self.local_media_directory, path) dirname = os.path.dirname(fname) - if not os.path.exists(dirname): - os.makedirs(dirname) + os.makedirs(dirname, exist_ok=True) finished_called = [False] try: with open(fname, "wb") as f: - async def finish(): + async def finish() -> None: # Ensure that all writes have been flushed and close the # file. f.flush() @@ -176,9 +190,9 @@ async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]: self.filepaths.remote_media_thumbnail_rel_legacy( server_name=file_info.server_name, file_id=file_info.file_id, - width=file_info.thumbnail_width, - height=file_info.thumbnail_height, - content_type=file_info.thumbnail_type, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, ) ) @@ -220,17 +234,16 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy( server_name=file_info.server_name, file_id=file_info.file_id, - width=file_info.thumbnail_width, - height=file_info.thumbnail_height, - content_type=file_info.thumbnail_type, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, ) legacy_local_path = os.path.join(self.local_media_directory, legacy_path) if os.path.exists(legacy_local_path): return legacy_local_path dirname = os.path.dirname(local_path) - if not os.path.exists(dirname): - os.makedirs(dirname) + os.makedirs(dirname, exist_ok=True) for provider in self.storage_providers: res: Any = await provider.fetch(path, file_info) @@ -255,10 +268,10 @@ def _file_info_to_path(self, file_info: FileInfo) -> str: if file_info.thumbnail: return self.filepaths.url_cache_thumbnail_rel( media_id=file_info.file_id, - width=file_info.thumbnail_width, - height=file_info.thumbnail_height, - content_type=file_info.thumbnail_type, - method=file_info.thumbnail_method, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, ) return self.filepaths.url_cache_filepath_rel(file_info.file_id) @@ -267,10 +280,10 @@ def _file_info_to_path(self, file_info: FileInfo) -> str: return self.filepaths.remote_media_thumbnail_rel( server_name=file_info.server_name, file_id=file_info.file_id, - width=file_info.thumbnail_width, - height=file_info.thumbnail_height, - content_type=file_info.thumbnail_type, - method=file_info.thumbnail_method, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, ) return self.filepaths.remote_media_filepath_rel( file_info.server_name, file_info.file_id @@ -279,10 +292,10 @@ def _file_info_to_path(self, file_info: FileInfo) -> str: if file_info.thumbnail: return self.filepaths.local_media_thumbnail_rel( media_id=file_info.file_id, - width=file_info.thumbnail_width, - height=file_info.thumbnail_height, - content_type=file_info.thumbnail_type, - method=file_info.thumbnail_method, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, ) return self.filepaths.local_media_filepath_rel(file_info.file_id) @@ -315,7 +328,12 @@ def write_to_consumer(self, consumer: IConsumer) -> Deferred: FileSender().beginFileTransfer(self.open_file, consumer) ) - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: self.open_file.close() @@ -339,7 +357,7 @@ class ReadableFileWrapper: clock = attr.ib(type=Clock) path = attr.ib(type=str) - async def write_chunks_to(self, callback: Callable[[bytes], None]): + async def write_chunks_to(self, callback: Callable[[bytes], None]) -> None: """Reads the file in chunks and calls the callback with each chunk.""" with open(self.path, "rb") as file: diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py new file mode 100644 index 000000000000..2a59552c20a3 --- /dev/null +++ b/synapse/rest/media/v1/oembed.py @@ -0,0 +1,252 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import urllib.parse +from typing import TYPE_CHECKING, List, Optional + +import attr + +from synapse.types import JsonDict +from synapse.util import json_decoder + +if TYPE_CHECKING: + from lxml import etree + + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class OEmbedResult: + # The Open Graph result (converted from the oEmbed result). + open_graph_result: JsonDict + # Number of milliseconds to cache the content, according to the oEmbed response. + # + # This will be None if no cache-age is provided in the oEmbed response (or + # if the oEmbed response cannot be turned into an Open Graph response). + cache_age: Optional[int] + + +class OEmbedProvider: + """ + A helper for accessing oEmbed content. + + It can be used to check if a URL should be accessed via oEmbed and for + requesting/parsing oEmbed content. + """ + + def __init__(self, hs: "HomeServer"): + self._oembed_patterns = {} + for oembed_endpoint in hs.config.oembed.oembed_patterns: + api_endpoint = oembed_endpoint.api_endpoint + + # Only JSON is supported at the moment. This could be declared in + # the formats field. Otherwise, if the endpoint ends in .xml assume + # it doesn't support JSON. + if ( + oembed_endpoint.formats is not None + and "json" not in oembed_endpoint.formats + ) or api_endpoint.endswith(".xml"): + logger.info( + "Ignoring oEmbed endpoint due to not supporting JSON: %s", + api_endpoint, + ) + continue + + # Iterate through each URL pattern and point it to the endpoint. + for pattern in oembed_endpoint.url_patterns: + self._oembed_patterns[pattern] = api_endpoint + + def get_oembed_url(self, url: str) -> Optional[str]: + """ + Check whether the URL should be downloaded as oEmbed content instead. + + Args: + url: The URL to check. + + Returns: + A URL to use instead or None if the original URL should be used. + """ + for url_pattern, endpoint in self._oembed_patterns.items(): + if url_pattern.fullmatch(url): + # TODO Specify max height / width. + + # Note that only the JSON format is supported, some endpoints want + # this in the URL, others want it as an argument. + endpoint = endpoint.replace("{format}", "json") + + args = {"url": url, "format": "json"} + query_str = urllib.parse.urlencode(args, True) + return f"{endpoint}?{query_str}" + + # No match. + return None + + def autodiscover_from_html(self, tree: "etree.Element") -> Optional[str]: + """ + Search an HTML document for oEmbed autodiscovery information. + + Args: + tree: The parsed HTML body. + + Returns: + The URL to use for oEmbed information, or None if no URL was found. + """ + # Search for link elements with the proper rel and type attributes. + for tag in tree.xpath( + "//link[@rel='alternate'][@type='application/json+oembed']" + ): + if "href" in tag.attrib: + return tag.attrib["href"] + + # Some providers (e.g. Flickr) use alternative instead of alternate. + for tag in tree.xpath( + "//link[@rel='alternative'][@type='application/json+oembed']" + ): + if "href" in tag.attrib: + return tag.attrib["href"] + + return None + + def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: + """ + Parse the oEmbed response into an Open Graph response. + + Args: + url: The URL which is being previewed (not the one which was + requested). + raw_body: The oEmbed response as JSON encoded as bytes. + + Returns: + json-encoded Open Graph data + """ + + try: + # oEmbed responses *must* be UTF-8 according to the spec. + oembed = json_decoder.decode(raw_body.decode("utf-8")) + + # The version is a required string field, but not always provided, + # or sometimes provided as a float. Be lenient. + oembed_version = oembed.get("version", "1.0") + if oembed_version != "1.0" and oembed_version != 1: + raise RuntimeError(f"Invalid oEmbed version: {oembed_version}") + + # Ensure the cache age is None or an int. + cache_age = oembed.get("cache_age") + if cache_age: + cache_age = int(cache_age) * 1000 + + # The results. + open_graph_response = { + "og:url": url, + } + + # Use either title or author's name as the title. + title = oembed.get("title") or oembed.get("author_name") + if title: + open_graph_response["og:title"] = title + + # Use the provider name and as the site. + provider_name = oembed.get("provider_name") + if provider_name: + open_graph_response["og:site_name"] = provider_name + + # If a thumbnail exists, use it. Note that dimensions will be calculated later. + if "thumbnail_url" in oembed: + open_graph_response["og:image"] = oembed["thumbnail_url"] + + # Process each type separately. + oembed_type = oembed["type"] + if oembed_type == "rich": + calc_description_and_urls(open_graph_response, oembed["html"]) + + elif oembed_type == "photo": + # If this is a photo, use the full image, not the thumbnail. + open_graph_response["og:image"] = oembed["url"] + + elif oembed_type == "video": + open_graph_response["og:type"] = "video.other" + calc_description_and_urls(open_graph_response, oembed["html"]) + open_graph_response["og:video:width"] = oembed["width"] + open_graph_response["og:video:height"] = oembed["height"] + + elif oembed_type == "link": + open_graph_response["og:type"] = "website" + + else: + raise RuntimeError(f"Unknown oEmbed type: {oembed_type}") + + except Exception as e: + # Trap any exception and let the code follow as usual. + logger.warning("Error parsing oEmbed metadata from %s: %r", url, e) + open_graph_response = {} + cache_age = None + + return OEmbedResult(open_graph_response, cache_age) + + +def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]: + results = [] + for tag in tree.xpath("//*/" + tag_name): + if "src" in tag.attrib: + results.append(tag.attrib["src"]) + return results + + +def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None: + """ + Calculate description for an HTML document. + + This uses lxml to convert the HTML document into plaintext. If errors + occur during processing of the document, an empty response is returned. + + Args: + open_graph_response: The current Open Graph summary. This is updated with additional fields. + html_body: The HTML document, as bytes. + + Returns: + The summary + """ + # If there's no body, nothing useful is going to be found. + if not html_body: + return + + from lxml import etree + + # Create an HTML parser. If this fails, log and return no metadata. + parser = etree.HTMLParser(recover=True, encoding="utf-8") + + # Attempt to parse the body. If this fails, log and return no metadata. + tree = etree.fromstring(html_body, parser) + + # The data was successfully parsed, but no tree was found. + if tree is None: + return + + # Attempt to find interesting URLs (images, videos, embeds). + if "og:image" not in open_graph_response: + image_urls = _fetch_urls(tree, "img") + if image_urls: + open_graph_response["og:image"] = image_urls[0] + + video_urls = _fetch_urls(tree, "video") + _fetch_urls(tree, "embed") + if video_urls: + open_graph_response["og:video"] = video_urls[0] + + from synapse.rest.media.v1.preview_url_resource import _calc_description + + description = _calc_description(tree) + if description: + open_graph_response["og:description"] = description diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 0f051d4041ef..054f3c296da2 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import codecs import datetime import errno import fnmatch @@ -22,13 +23,13 @@ import shutil import sys import traceback -from typing import TYPE_CHECKING, Any, Dict, Generator, Iterable, Optional, Union +from typing import TYPE_CHECKING, Dict, Generator, Iterable, Optional, Set, Tuple, Union from urllib import parse as urlparse import attr +from twisted.internet.defer import Deferred from twisted.internet.error import DNSLookupError -from twisted.web.server import Request from synapse.api.errors import Codes, SynapseError from synapse.http.client import SimpleHttpClient @@ -43,6 +44,8 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.rest.media.v1._base import get_filename_from_headers from synapse.rest.media.v1.media_storage import MediaStorage +from synapse.rest.media.v1.oembed import OEmbedProvider +from synapse.types import JsonDict, UserID from synapse.util import json_encoder from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.expiringcache import ExpiringCache @@ -70,66 +73,47 @@ OG_TAG_VALUE_MAXLEN = 1000 ONE_HOUR = 60 * 60 * 1000 +ONE_DAY = 24 * ONE_HOUR +IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY -# A map of globs to API endpoints. -_oembed_globs = { - # Twitter. - "https://publish.twitter.com/oembed": [ - "https://twitter.com/*/status/*", - "https://*.twitter.com/*/status/*", - "https://twitter.com/*/moments/*", - "https://*.twitter.com/*/moments/*", - # Include the HTTP versions too. - "http://twitter.com/*/status/*", - "http://*.twitter.com/*/status/*", - "http://twitter.com/*/moments/*", - "http://*.twitter.com/*/moments/*", - ], -} -# Convert the globs to regular expressions. -_oembed_patterns = {} -for endpoint, globs in _oembed_globs.items(): - for glob in globs: - # Convert the glob into a sane regular expression to match against. The - # rules followed will be slightly different for the domain portion vs. - # the rest. - # - # 1. The scheme must be one of HTTP / HTTPS (and have no globs). - # 2. The domain can have globs, but we limit it to characters that can - # reasonably be a domain part. - # TODO: This does not attempt to handle Unicode domain names. - # 3. Other parts allow a glob to be any one, or more, characters. - results = urlparse.urlparse(glob) - - # Ensure the scheme does not have wildcards (and is a sane scheme). - if results.scheme not in {"http", "https"}: - raise ValueError("Insecure oEmbed glob scheme: %s" % (results.scheme,)) - - pattern = urlparse.urlunparse( - [ - results.scheme, - re.escape(results.netloc).replace("\\*", "[a-zA-Z0-9_-]+"), - ] - + [re.escape(part).replace("\\*", ".+") for part in results[2:]] - ) - _oembed_patterns[re.compile(pattern)] = endpoint +@attr.s(slots=True, frozen=True, auto_attribs=True) +class MediaInfo: + """ + Information parsed from downloading media being previewed. + """ -@attr.s(slots=True) -class OEmbedResult: - # Either HTML content or URL must be provided. - html = attr.ib(type=Optional[str]) - url = attr.ib(type=Optional[str]) - title = attr.ib(type=Optional[str]) - # Number of seconds to cache the content. - cache_age = attr.ib(type=int) + # The Content-Type header of the response. + media_type: str + # The length (in bytes) of the downloaded media. + media_length: int + # The media filename, according to the server. This is parsed from the + # returned headers, if possible. + download_name: Optional[str] + # The time of the preview. + created_ts_ms: int + # Information from the media storage provider about where the file is stored + # on disk. + filesystem_id: str + filename: str + # The URI being previewed. + uri: str + # The HTTP response code. + response_code: int + # The timestamp (in milliseconds) of when this preview expires. + expires: int + # The ETag header of the response. + etag: Optional[str] -class OEmbedError(Exception): - """An error occurred processing the oEmbed object.""" +class PreviewUrlResource(DirectServeJsonResource): + """ + Generating URL previews is a complicated task which many potential pitfalls. + See docs/development/url_previews.md for discussion of the design and + algorithm followed in this module. + """ -class PreviewUrlResource(DirectServeJsonResource): isLeaf = True def __init__( @@ -143,20 +127,22 @@ def __init__( self.auth = hs.get_auth() self.clock = hs.get_clock() self.filepaths = media_repo.filepaths - self.max_spider_size = hs.config.max_spider_size + self.max_spider_size = hs.config.media.max_spider_size self.server_name = hs.hostname self.store = hs.get_datastore() self.client = SimpleHttpClient( hs, treq_args={"browser_like_redirects": True}, - ip_whitelist=hs.config.url_preview_ip_range_whitelist, - ip_blacklist=hs.config.url_preview_ip_range_blacklist, + ip_whitelist=hs.config.media.url_preview_ip_range_whitelist, + ip_blacklist=hs.config.media.url_preview_ip_range_blacklist, use_proxy=True, ) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path self.media_storage = media_storage + self._oembed = OEmbedProvider(hs) + # We run the background jobs if we're the instance specified (or no # instance is specified, where we assume there is only one instance # serving media). @@ -166,8 +152,8 @@ def __init__( or instance_running_jobs == hs.get_instance_name() ) - self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist - self.url_preview_accept_language = hs.config.url_preview_accept_language + self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist + self.url_preview_accept_language = hs.config.media.url_preview_accept_language # memory cache mapping urls to an ObservableDeferred returning # JSON-encoded OG metadata @@ -183,7 +169,7 @@ def __init__( self._start_expire_url_cache_data, 10 * 1000 ) - async def _async_render_OPTIONS(self, request: Request) -> None: + async def _async_render_OPTIONS(self, request: SynapseRequest) -> None: request.setHeader(b"Allow", b"OPTIONS, GET") respond_with_json(request, 200, {}, send_cors=True) @@ -245,7 +231,7 @@ async def _async_render_GET(self, request: SynapseRequest) -> None: og = await make_deferred_yieldable(observable.observe()) respond_with_json_bytes(request, 200, og, send_cors=True) - async def _do_preview(self, url: str, user: str, ts: int) -> bytes: + async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes: """Check the db, and download the URL and build a preview Args: @@ -271,22 +257,30 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes: og = og.encode("utf8") return og - media_info = await self._download_url(url, user) + # If this URL can be accessed via oEmbed, use that instead. + url_to_download = url + oembed_url = self._oembed.get_oembed_url(url) + if oembed_url: + url_to_download = oembed_url + + media_info = await self._download_url(url_to_download, user) logger.debug("got media_info of '%s'", media_info) - if _is_media(media_info["media_type"]): - file_id = media_info["filesystem_id"] + # The number of milliseconds that the response should be considered valid. + expiration_ms = media_info.expires + + if _is_media(media_info.media_type): + file_id = media_info.filesystem_id dims = await self.media_repo._generate_thumbnails( - None, file_id, file_id, media_info["media_type"], url_cache=True + None, file_id, file_id, media_info.media_type, url_cache=True ) og = { - "og:description": media_info["download_name"], - "og:image": "mxc://%s/%s" - % (self.server_name, media_info["filesystem_id"]), - "og:image:type": media_info["media_type"], - "matrix:image:size": media_info["media_length"], + "og:description": media_info.download_name, + "og:image": f"mxc://{self.server_name}/{media_info.filesystem_id}", + "og:image:type": media_info.media_type, + "matrix:image:size": media_info.media_length, } if dims: @@ -296,44 +290,40 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes: logger.warning("Couldn't get dims for %s" % url) # define our OG response for this media - elif _is_html(media_info["media_type"]): + elif _is_html(media_info.media_type): # TODO: somehow stop a big HTML tree from exploding synapse's RAM - with open(media_info["filename"], "rb") as file: + with open(media_info.filename, "rb") as file: body = file.read() - encoding = get_html_media_encoding(body, media_info["media_type"]) - og = decode_and_calc_og(body, media_info["uri"], encoding) + tree = decode_body(body, media_info.uri, media_info.media_type) + if tree is not None: + # Check if this HTML document points to oEmbed information and + # defer to that. + oembed_url = self._oembed.autodiscover_from_html(tree) + og = {} + if oembed_url: + oembed_info = await self._download_url(oembed_url, user) + og, expiration_ms = await self._handle_oembed_response( + url, oembed_info, expiration_ms + ) - # pre-cache the image for posterity - # FIXME: it might be cleaner to use the same flow as the main /preview_url - # request itself and benefit from the same caching etc. But for now we - # just rely on the caching on the master request to speed things up. - if "og:image" in og and og["og:image"]: - image_info = await self._download_url( - _rebase_url(og["og:image"], media_info["uri"]), user - ) + # If there was no oEmbed URL (or oEmbed parsing failed), attempt + # to generate the Open Graph information from the HTML. + if not oembed_url or not og: + og = _calc_og(tree, media_info.uri) + + await self._precache_image_url(user, media_info, og) + else: + og = {} + + elif oembed_url: + # Handle the oEmbed information. + og, expiration_ms = await self._handle_oembed_response( + url, media_info, expiration_ms + ) + await self._precache_image_url(user, media_info, og) - if _is_media(image_info["media_type"]): - # TODO: make sure we don't choke on white-on-transparent images - file_id = image_info["filesystem_id"] - dims = await self.media_repo._generate_thumbnails( - None, file_id, file_id, image_info["media_type"], url_cache=True - ) - if dims: - og["og:image:width"] = dims["width"] - og["og:image:height"] = dims["height"] - else: - logger.warning("Couldn't get dims for %s", og["og:image"]) - - og["og:image"] = "mxc://%s/%s" % ( - self.server_name, - image_info["filesystem_id"], - ) - og["og:image:type"] = image_info["media_type"] - og["matrix:image:size"] = image_info["media_length"] - else: - del og["og:image"] else: logger.warning("Failed to find any OG data in %s", url) og = {} @@ -354,101 +344,23 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes: jsonog = json_encoder.encode(og) + # Cap the amount of time to consider a response valid. + expiration_ms = min(expiration_ms, ONE_DAY) + # store OG in history-aware DB cache await self.store.store_url_cache( url, - media_info["response_code"], - media_info["etag"], - media_info["expires"] + media_info["created_ts"], + media_info.response_code, + media_info.etag, + media_info.created_ts_ms + expiration_ms, jsonog, - media_info["filesystem_id"], - media_info["created_ts"], + media_info.filesystem_id, + media_info.created_ts_ms, ) return jsonog.encode("utf8") - def _get_oembed_url(self, url: str) -> Optional[str]: - """ - Check whether the URL should be downloaded as oEmbed content instead. - - Args: - url: The URL to check. - - Returns: - A URL to use instead or None if the original URL should be used. - """ - for url_pattern, endpoint in _oembed_patterns.items(): - if url_pattern.fullmatch(url): - return endpoint - - # No match. - return None - - async def _get_oembed_content(self, endpoint: str, url: str) -> OEmbedResult: - """ - Request content from an oEmbed endpoint. - - Args: - endpoint: The oEmbed API endpoint. - url: The URL to pass to the API. - - Returns: - An object representing the metadata returned. - - Raises: - OEmbedError if fetching or parsing of the oEmbed information fails. - """ - try: - logger.debug("Trying to get oEmbed content for url '%s'", url) - result = await self.client.get_json( - endpoint, - # TODO Specify max height / width. - # Note that only the JSON format is supported. - args={"url": url}, - ) - - # Ensure there's a version of 1.0. - if result.get("version") != "1.0": - raise OEmbedError("Invalid version: %s" % (result.get("version"),)) - - oembed_type = result.get("type") - - # Ensure the cache age is None or an int. - cache_age = result.get("cache_age") - if cache_age: - cache_age = int(cache_age) - - oembed_result = OEmbedResult(None, None, result.get("title"), cache_age) - - # HTML content. - if oembed_type == "rich": - oembed_result.html = result.get("html") - return oembed_result - - if oembed_type == "photo": - oembed_result.url = result.get("url") - return oembed_result - - # TODO Handle link and video types. - - if "thumbnail_url" in result: - oembed_result.url = result.get("thumbnail_url") - return oembed_result - - raise OEmbedError("Incompatible oEmbed information.") - - except OEmbedError as e: - # Trap OEmbedErrors first so we can directly re-raise them. - logger.warning("Error parsing oEmbed metadata from %s: %r", url, e) - raise - - except Exception as e: - # Trap any exception and let the code follow as usual. - # FIXME: pass through 404s and other error messages nicely - logger.warning("Error downloading oEmbed metadata from %s: %r", url, e) - raise OEmbedError() from e - - async def _download_url(self, url: str, user: str) -> Dict[str, Any]: + async def _download_url(self, url: str, user: UserID) -> MediaInfo: # TODO: we should probably honour robots.txt... except in practice # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? @@ -457,88 +369,52 @@ async def _download_url(self, url: str, user: str) -> Dict[str, Any]: file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True) - # If this URL can be accessed via oEmbed, use that instead. - url_to_download: Optional[str] = url - oembed_url = self._get_oembed_url(url) - if oembed_url: - # The result might be a new URL to download, or it might be HTML content. + with self.media_storage.store_into_file(file_info) as (f, fname, finish): try: - oembed_result = await self._get_oembed_content(oembed_url, url) - if oembed_result.url: - url_to_download = oembed_result.url - elif oembed_result.html: - url_to_download = None - except OEmbedError: - # If an error occurs, try doing a normal preview. - pass - - if url_to_download: - with self.media_storage.store_into_file(file_info) as (f, fname, finish): - try: - logger.debug("Trying to get preview for url '%s'", url_to_download) - length, headers, uri, code = await self.client.get_file( - url_to_download, - output_stream=f, - max_size=self.max_spider_size, - headers={"Accept-Language": self.url_preview_accept_language}, - ) - except SynapseError: - # Pass SynapseErrors through directly, so that the servlet - # handler will return a SynapseError to the client instead of - # blank data or a 500. - raise - except DNSLookupError: - # DNS lookup returned no results - # Note: This will also be the case if one of the resolved IP - # addresses is blacklisted - raise SynapseError( - 502, - "DNS resolution failure during URL preview generation", - Codes.UNKNOWN, - ) - except Exception as e: - # FIXME: pass through 404s and other error messages nicely - logger.warning("Error downloading %s: %r", url_to_download, e) - - raise SynapseError( - 500, - "Failed to download content: %s" - % (traceback.format_exception_only(sys.exc_info()[0], e),), - Codes.UNKNOWN, - ) - await finish() + logger.debug("Trying to get preview for url '%s'", url) + length, headers, uri, code = await self.client.get_file( + url, + output_stream=f, + max_size=self.max_spider_size, + headers={"Accept-Language": self.url_preview_accept_language}, + ) + except SynapseError: + # Pass SynapseErrors through directly, so that the servlet + # handler will return a SynapseError to the client instead of + # blank data or a 500. + raise + except DNSLookupError: + # DNS lookup returned no results + # Note: This will also be the case if one of the resolved IP + # addresses is blacklisted + raise SynapseError( + 502, + "DNS resolution failure during URL preview generation", + Codes.UNKNOWN, + ) + except Exception as e: + # FIXME: pass through 404s and other error messages nicely + logger.warning("Error downloading %s: %r", url, e) - if b"Content-Type" in headers: - media_type = headers[b"Content-Type"][0].decode("ascii") - else: - media_type = "application/octet-stream" + raise SynapseError( + 500, + "Failed to download content: %s" + % (traceback.format_exception_only(sys.exc_info()[0], e),), + Codes.UNKNOWN, + ) + await finish() - download_name = get_filename_from_headers(headers) + if b"Content-Type" in headers: + media_type = headers[b"Content-Type"][0].decode("ascii") + else: + media_type = "application/octet-stream" - # FIXME: we should calculate a proper expiration based on the - # Cache-Control and Expire headers. But for now, assume 1 hour. - expires = ONE_HOUR - etag = ( - headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None - ) - else: - # we can only get here if we did an oembed request and have an oembed_result.html - assert oembed_result.html is not None - assert oembed_url is not None - - html_bytes = oembed_result.html.encode("utf-8") - with self.media_storage.store_into_file(file_info) as (f, fname, finish): - f.write(html_bytes) - await finish() - - media_type = "text/html" - download_name = oembed_result.title - length = len(html_bytes) - # If a specific cache age was not given, assume 1 hour. - expires = oembed_result.cache_age or ONE_HOUR - uri = oembed_url - code = 200 - etag = None + download_name = get_filename_from_headers(headers) + + # FIXME: we should calculate a proper expiration based on the + # Cache-Control and Expire headers. But for now, assume 1 hour. + expires = ONE_HOUR + etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None try: time_now_ms = self.clock.time_msec() @@ -560,27 +436,99 @@ async def _download_url(self, url: str, user: str) -> Dict[str, Any]: # therefore not expire it. raise - return { - "media_type": media_type, - "media_length": length, - "download_name": download_name, - "created_ts": time_now_ms, - "filesystem_id": file_id, - "filename": fname, - "uri": uri, - "response_code": code, - "expires": expires, - "etag": etag, - } - - def _start_expire_url_cache_data(self): + return MediaInfo( + media_type=media_type, + media_length=length, + download_name=download_name, + created_ts_ms=time_now_ms, + filesystem_id=file_id, + filename=fname, + uri=uri, + response_code=code, + expires=expires, + etag=etag, + ) + + async def _precache_image_url( + self, user: UserID, media_info: MediaInfo, og: JsonDict + ) -> None: + """ + Pre-cache the image (if one exists) for posterity + + Args: + user: The user requesting the preview. + media_info: The media being previewed. + og: The Open Graph dictionary. This is modified with image information. + """ + # If there's no image or it is blank, there's nothing to do. + if "og:image" not in og or not og["og:image"]: + return + + # FIXME: it might be cleaner to use the same flow as the main /preview_url + # request itself and benefit from the same caching etc. But for now we + # just rely on the caching on the master request to speed things up. + image_info = await self._download_url( + _rebase_url(og["og:image"], media_info.uri), user + ) + + if _is_media(image_info.media_type): + # TODO: make sure we don't choke on white-on-transparent images + file_id = image_info.filesystem_id + dims = await self.media_repo._generate_thumbnails( + None, file_id, file_id, image_info.media_type, url_cache=True + ) + if dims: + og["og:image:width"] = dims["width"] + og["og:image:height"] = dims["height"] + else: + logger.warning("Couldn't get dims for %s", og["og:image"]) + + og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}" + og["og:image:type"] = image_info.media_type + og["matrix:image:size"] = image_info.media_length + else: + del og["og:image"] + + async def _handle_oembed_response( + self, url: str, media_info: MediaInfo, expiration_ms: int + ) -> Tuple[JsonDict, int]: + """ + Parse the downloaded oEmbed info. + + Args: + url: The URL which is being previewed (not the one which was + requested). + media_info: The media being previewed. + expiration_ms: The length of time, in milliseconds, the media is valid for. + + Returns: + A tuple of: + The Open Graph dictionary, if the oEmbed info can be parsed. + The (possibly updated) length of time, in milliseconds, the media is valid for. + """ + # If JSON was not returned, there's nothing to do. + if not _is_json(media_info.media_type): + return {}, expiration_ms + + with open(media_info.filename, "rb") as file: + body = file.read() + + oembed_response = self._oembed.parse_oembed_response(url, body) + open_graph_result = oembed_response.open_graph_result + + # Use the cache age from the oEmbed result, if one was given. + if open_graph_result and oembed_response.cache_age is not None: + expiration_ms = oembed_response.cache_age + + return open_graph_result, expiration_ms + + def _start_expire_url_cache_data(self) -> Deferred: return run_as_background_process( "expire_url_cache_data", self._expire_url_cache_data ) async def _expire_url_cache_data(self) -> None: """Clean up expired url cache content, media and thumbnails.""" - # TODO: Delete from backup media store assert self._worker_run_media_background_jobs @@ -592,6 +540,27 @@ async def _expire_url_cache_data(self) -> None: logger.info("Still running DB updates; skipping expiry") return + def try_remove_parent_dirs(dirs: Iterable[str]) -> None: + """Attempt to remove the given chain of parent directories + + Args: + dirs: The list of directory paths to delete, with children appearing + before their parents. + """ + for dir in dirs: + try: + os.rmdir(dir) + except FileNotFoundError: + # Already deleted, continue with deleting the rest + pass + except OSError as e: + # Failed, skip deleting the rest of the parent dirs + if e.errno != errno.ENOTEMPTY: + logger.warning( + "Failed to remove media directory: %r: %s", dir, e + ) + break + # First we delete expired url cache entries media_ids = await self.store.get_expired_url_cache(now) @@ -600,20 +569,16 @@ async def _expire_url_cache_data(self) -> None: fname = self.filepaths.url_cache_filepath(media_id) try: os.remove(fname) + except FileNotFoundError: + pass # If the path doesn't exist, meh except OSError as e: - # If the path doesn't exist, meh - if e.errno != errno.ENOENT: - logger.warning("Failed to remove media: %r: %s", media_id, e) - continue + logger.warning("Failed to remove media: %r: %s", media_id, e) + continue removed_media.append(media_id) - try: - dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) - for dir in dirs: - os.rmdir(dir) - except Exception: - pass + dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) + try_remove_parent_dirs(dirs) await self.store.delete_url_cache(removed_media) @@ -626,7 +591,7 @@ async def _expire_url_cache_data(self) -> None: # These may be cached for a bit on the client (i.e., they # may have a room open with a preview url thing open). # So we wait a couple of days before deleting, just in case. - expire_before = now - 2 * 24 * ONE_HOUR + expire_before = now - IMAGE_CACHE_EXPIRY_MS media_ids = await self.store.get_url_cache_media_before(expire_before) removed_media = [] @@ -634,36 +599,30 @@ async def _expire_url_cache_data(self) -> None: fname = self.filepaths.url_cache_filepath(media_id) try: os.remove(fname) + except FileNotFoundError: + pass # If the path doesn't exist, meh except OSError as e: - # If the path doesn't exist, meh - if e.errno != errno.ENOENT: - logger.warning("Failed to remove media: %r: %s", media_id, e) - continue + logger.warning("Failed to remove media: %r: %s", media_id, e) + continue - try: - dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) - for dir in dirs: - os.rmdir(dir) - except Exception: - pass + dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) + try_remove_parent_dirs(dirs) thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id) try: shutil.rmtree(thumbnail_dir) + except FileNotFoundError: + pass # If the path doesn't exist, meh except OSError as e: - # If the path doesn't exist, meh - if e.errno != errno.ENOENT: - logger.warning("Failed to remove media: %r: %s", media_id, e) - continue + logger.warning("Failed to remove media: %r: %s", media_id, e) + continue removed_media.append(media_id) - try: - dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id) - for dir in dirs: - os.rmdir(dir) - except Exception: - pass + dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id) + # Note that one of the directories to be deleted has already been + # removed by the `rmtree` above. + try_remove_parent_dirs(dirs) await self.store.delete_url_cache_media(removed_media) @@ -673,16 +632,27 @@ async def _expire_url_cache_data(self) -> None: logger.debug("No media removed from url cache") -def get_html_media_encoding(body: bytes, content_type: str) -> str: +def _normalise_encoding(encoding: str) -> Optional[str]: + """Use the Python codec's name as the normalised entry.""" + try: + return codecs.lookup(encoding).name + except LookupError: + return None + + +def get_html_media_encodings(body: bytes, content_type: Optional[str]) -> Iterable[str]: """ - Get the encoding of the body based on the (presumably) HTML body or media_type. + Get potential encoding of the body based on the (presumably) HTML body or the content-type header. The precedence used for finding a character encoding is: - 1. meta tag with a charset declared. + 1. tag with a charset declared. 2. The XML document's character encoding attribute. 3. The Content-Type header. - 4. Fallback to UTF-8. + 4. Fallback to utf-8. + 5. Fallback to windows-1252. + + This roughly follows the algorithm used by BeautifulSoup's bs4.dammit.EncodingDetector. Args: body: The HTML document, as bytes. @@ -691,84 +661,100 @@ def get_html_media_encoding(body: bytes, content_type: str) -> str: Returns: The character encoding of the body, as a string. """ + # There's no point in returning an encoding more than once. + attempted_encodings: Set[str] = set() + # Limit searches to the first 1kb, since it ought to be at the top. body_start = body[:1024] - # Let's try and figure out if it has an encoding set in a meta tag. + # Check if it has an encoding set in a meta tag. match = _charset_match.search(body_start) if match: - return match.group(1).decode("ascii") + encoding = _normalise_encoding(match.group(1).decode("ascii")) + if encoding: + attempted_encodings.add(encoding) + yield encoding # TODO Support - # If we didn't find a match, see if it an XML document with an encoding. + # Check if it has an XML document with an encoding. match = _xml_encoding_match.match(body_start) if match: - return match.group(1).decode("ascii") - - # If we don't find a match, we'll look at the HTTP Content-Type, and - # if that doesn't exist, we'll fall back to UTF-8. - content_match = _content_type_match.match(content_type) - if content_match: - return content_match.group(1) - - return "utf-8" - - -def decode_and_calc_og( - body: bytes, media_uri: str, request_encoding: Optional[str] = None -) -> Dict[str, Optional[str]]: + encoding = _normalise_encoding(match.group(1).decode("ascii")) + if encoding and encoding not in attempted_encodings: + attempted_encodings.add(encoding) + yield encoding + + # Check the HTTP Content-Type header for a character set. + if content_type: + content_match = _content_type_match.match(content_type) + if content_match: + encoding = _normalise_encoding(content_match.group(1)) + if encoding and encoding not in attempted_encodings: + attempted_encodings.add(encoding) + yield encoding + + # Finally, fallback to UTF-8, then windows-1252. + for fallback in ("utf-8", "cp1252"): + if fallback not in attempted_encodings: + yield fallback + + +def decode_body( + body: bytes, uri: str, content_type: Optional[str] = None +) -> Optional["etree.Element"]: """ - Calculate metadata for an HTML document. - - This uses lxml to parse the HTML document into the OG response. If errors - occur during processing of the document, an empty response is returned. + This uses lxml to parse the HTML document. Args: body: The HTML document, as bytes. - media_url: The URI used to download the body. - request_encoding: The character encoding of the body, as a string. + uri: The URI used to download the body. + content_type: The Content-Type header. Returns: - The OG response as a dictionary. + The parsed HTML body, or None if an error occurred during processed. """ # If there's no body, nothing useful is going to be found. if not body: - return {} + return None + + # The idea here is that multiple encodings are tried until one works. + # Unfortunately the result is never used and then LXML will decode the string + # again with the found encoding. + for encoding in get_html_media_encodings(body, content_type): + try: + body.decode(encoding) + except Exception: + pass + else: + break + else: + logger.warning("Unable to decode HTML body for %s", uri) + return None from lxml import etree - # Create an HTML parser. If this fails, log and return no metadata. - try: - parser = etree.HTMLParser(recover=True, encoding=request_encoding) - except LookupError: - # blindly consider the encoding as utf-8. - parser = etree.HTMLParser(recover=True, encoding="utf-8") - except Exception as e: - logger.warning("Unable to create HTML parser: %s" % (e,)) - return {} + # Create an HTML parser. + parser = etree.HTMLParser(recover=True, encoding=encoding) - def _attempt_calc_og(body_attempt: Union[bytes, str]) -> Dict[str, Optional[str]]: - # Attempt to parse the body. If this fails, log and return no metadata. - tree = etree.fromstring(body_attempt, parser) + # Attempt to parse the body. Returns None if the body was successfully + # parsed, but no tree was found. + return etree.fromstring(body, parser) - # The data was successfully parsed, but no tree was found. - if tree is None: - return {} - return _calc_og(tree, media_uri) +def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: + """ + Calculate metadata for an HTML document. - # Attempt to parse the body. If this fails, log and return no metadata. - try: - return _attempt_calc_og(body) - except UnicodeDecodeError: - # blindly try decoding the body as utf-8, which seems to fix - # the charset mismatches on https://google.com - return _attempt_calc_og(body.decode("utf-8", "ignore")) + This uses lxml to search the HTML document for Open Graph data. + Args: + tree: The parsed HTML document. + media_url: The URI used to download the body. -def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: - # suck our tree into lxml and define our OG response. + Returns: + The Open Graph response as a dictionary. + """ # if we see any image URLs in the OG response, then spider them # (although the client could choose to do this by asking for previews of those @@ -842,35 +828,7 @@ def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: if meta_description: og["og:description"] = meta_description[0] else: - # grab any text nodes which are inside the tag... - # unless they are within an HTML5 semantic markup tag... - #

,