From 1537ad73253d568bd0a1b3c08f8cdcb71b0b3c90 Mon Sep 17 00:00:00 2001 From: Jashon Osala Date: Wed, 27 Sep 2023 12:34:03 +0300 Subject: [PATCH 01/13] chore: Added cookie secret to template files. --- misc/docker.env.template | 1 + misc/secrets.toml.template | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/misc/docker.env.template b/misc/docker.env.template index 30432e31..eb739d2b 100644 --- a/misc/docker.env.template +++ b/misc/docker.env.template @@ -2,3 +2,4 @@ STREAMLIT_SERVER_ADDRESS=0.0.0.0 STREAMLIT_SERVER_PORT=8501 #default DOCQ_DATA=./.persisted/ OPENAI_API_KEY # ideally set value on shell, don't insert a value here becuase it's a secret. +COOKIE_SECRET_KEY=cookie_password \ No newline at end of file diff --git a/misc/secrets.toml.template b/misc/secrets.toml.template index 194aa400..5bb3007a 100644 --- a/misc/secrets.toml.template +++ b/misc/secrets.toml.template @@ -1,2 +1,3 @@ DOCQ_DATA = "./.persisted/" -OPENAI_API_KEY = "YOUR-OPENAI-API-KEY" \ No newline at end of file +OPENAI_API_KEY = "YOUR-OPENAI-API-KEY" +COOKIE_SECRET_KEY = "cookies_password" \ No newline at end of file From b1f0bb57fd681a3c3ba0d75038de1fc90f10c618 Mon Sep 17 00:00:00 2001 From: Jashon Osala Date: Wed, 27 Sep 2023 12:34:55 +0300 Subject: [PATCH 02/13] chore: Added cryptography to lock file. --- poetry.lock | 191 +++++++++++++------------------------------------ pyproject.toml | 1 + 2 files changed, 50 insertions(+), 142 deletions(-) diff --git a/poetry.lock b/poetry.lock index 56ef0b9e..b0481f2c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,9 @@ -# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "aiohttp" version = "3.8.5" description = "Async http client/server framework (asyncio)" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -113,7 +112,6 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -128,7 +126,6 @@ frozenlist = ">=1.1.0" name = "altair" version = "4.2.2" description = "Altair: A declarative statistical visualization library for Python." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -151,7 +148,6 @@ dev = ["black", "docutils", "flake8", "ipython", "m2r", "mistune (<2.0.0)", "pyt name = "argon2-cffi" version = "21.3.0" description = "The secure Argon2 password hashing algorithm." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -171,7 +167,6 @@ tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pytest"] name = "argon2-cffi-bindings" version = "21.2.0" description = "Low-level CFFI bindings for Argon2" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -209,7 +204,6 @@ tests = ["pytest"] name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -221,7 +215,6 @@ files = [ name = "atomicwrites" version = "1.4.1" description = "Atomic file writes." -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -232,7 +225,6 @@ files = [ name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -251,7 +243,6 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "azure-core" version = "1.27.1" description = "Microsoft Azure Core Library for Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -271,7 +262,6 @@ aio = ["aiohttp (>=3.0)"] name = "beautifulsoup4" version = "4.12.2" description = "Screen-scraping library" -category = "main" optional = false python-versions = ">=3.6.0" files = [ @@ -290,7 +280,6 @@ lxml = ["lxml"] name = "black" version = "22.12.0" description = "The uncompromising code formatter." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -325,7 +314,6 @@ uvloop = ["uvloop (>=0.15.2)"] name = "blinker" version = "1.6.2" description = "Fast, simple object-to-object and broadcast signaling" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -337,7 +325,6 @@ files = [ name = "bracex" version = "2.3.post1" description = "Bash style brace expander." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -349,7 +336,6 @@ files = [ name = "cachetools" version = "5.3.1" description = "Extensible memoizing collections and decorators" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -361,7 +347,6 @@ files = [ name = "cairocffi" version = "1.6.0" description = "cffi-based cairo bindings for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -381,7 +366,6 @@ xcb = ["xcffib (>=1.4.0)"] name = "cairosvg" version = "2.7.0" description = "A Simple SVG Converter based on Cairo" -category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -404,7 +388,6 @@ test = ["flake8", "isort", "pytest"] name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -416,7 +399,6 @@ files = [ name = "cffi" version = "1.15.1" description = "Foreign Function Interface for Python calling C code." -category = "main" optional = false python-versions = "*" files = [ @@ -493,7 +475,6 @@ pycparser = "*" name = "cfgv" version = "3.3.1" description = "Validate configuration and produce human readable error messages." -category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -505,7 +486,6 @@ files = [ name = "charset-normalizer" version = "3.1.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -590,7 +570,6 @@ files = [ name = "click" version = "8.1.3" description = "Composable command line interface toolkit" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -605,7 +584,6 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -617,7 +595,6 @@ files = [ name = "coverage" version = "7.2.7" description = "Code coverage measurement for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -689,11 +666,55 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli"] +[[package]] +name = "cryptography" +version = "41.0.4" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839"}, + {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143"}, + {file = "cryptography-41.0.4-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397"}, + {file = "cryptography-41.0.4-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860"}, + {file = "cryptography-41.0.4-cp37-abi3-win32.whl", hash = "sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd"}, + {file = "cryptography-41.0.4-cp37-abi3-win_amd64.whl", hash = "sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311"}, + {file = "cryptography-41.0.4.tar.gz", hash = "sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a"}, +] + +[package.dependencies] +cffi = ">=1.12" + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] +nox = ["nox"] +pep8test = ["black", "check-sdist", "mypy", "ruff"] +sdist = ["build"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test-randomorder = ["pytest-randomly"] + [[package]] name = "cssselect2" version = "0.7.0" description = "CSS selectors for Python ElementTree" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -713,7 +734,6 @@ test = ["flake8", "isort", "pytest"] name = "dataclasses-json" version = "0.5.9" description = "Easily serialize dataclasses to and from JSON" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -733,7 +753,6 @@ dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest ( name = "decorator" version = "5.1.1" description = "Decorators for Humans" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -745,7 +764,6 @@ files = [ name = "defusedxml" version = "0.7.1" description = "XML bomb protection for Python stdlib modules" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -757,7 +775,6 @@ files = [ name = "distlib" version = "0.3.6" description = "Distribution utilities" -category = "dev" optional = false python-versions = "*" files = [ @@ -769,7 +786,6 @@ files = [ name = "docx2txt" version = "0.8" description = "A pure python-based utility to extract text and images from docx files." -category = "main" optional = false python-versions = "*" files = [ @@ -780,7 +796,6 @@ files = [ name = "entrypoints" version = "0.4" description = "Discover and load entry points from installed packages." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -792,7 +807,6 @@ files = [ name = "filelock" version = "3.12.0" description = "A platform independent file lock." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -808,7 +822,6 @@ testing = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "diff-cover (>=7.5)", "p name = "frozenlist" version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -879,7 +892,6 @@ files = [ name = "fsspec" version = "2023.9.0" description = "File-system specification" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -915,7 +927,6 @@ tqdm = ["tqdm"] name = "ghp-import" version = "2.1.0" description = "Copy your docs directly to the gh-pages branch." -category = "dev" optional = false python-versions = "*" files = [ @@ -933,7 +944,6 @@ dev = ["flake8", "markdown", "twine", "wheel"] name = "gitdb" version = "4.0.10" description = "Git Object Database" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -948,7 +958,6 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.35" description = "GitPython is a Python library used to interact with Git repositories" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -963,7 +972,6 @@ gitdb = ">=4.0.1,<5" name = "greenlet" version = "2.0.2" description = "Lightweight in-process concurrent programming" -category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" files = [ @@ -1037,7 +1045,6 @@ test = ["objgraph", "psutil"] name = "identify" version = "2.5.24" description = "File identification library for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1052,7 +1059,6 @@ license = ["ukkonen"] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1064,7 +1070,6 @@ files = [ name = "importlib-metadata" version = "6.6.0" description = "Read metadata from Python packages" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1084,7 +1089,6 @@ testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packag name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1096,7 +1100,6 @@ files = [ name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1114,7 +1117,6 @@ i18n = ["Babel (>=2.7)"] name = "joblib" version = "1.3.2" description = "Lightweight pipelining with Python functions" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1126,7 +1128,6 @@ files = [ name = "jsonschema" version = "4.17.3" description = "An implementation of JSON Schema validation for Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1146,7 +1147,6 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "langchain" version = "0.0.288" description = "Building applications with LLMs through composability" -category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ @@ -1185,7 +1185,6 @@ text-helpers = ["chardet (>=5.1.0,<6.0.0)"] name = "langsmith" version = "0.0.37" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." -category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ @@ -1201,7 +1200,6 @@ requests = ">=2,<3" name = "llama-index" version = "0.8.27" description = "Interface between LLMs and your data" -category = "main" optional = false python-versions = "*" files = [ @@ -1230,7 +1228,6 @@ urllib3 = "<2" name = "mako" version = "1.2.4" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1250,7 +1247,6 @@ testing = ["pytest"] name = "markdown" version = "3.3.7" description = "Python implementation of Markdown." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1265,7 +1261,6 @@ testing = ["coverage", "pyyaml"] name = "markdown-it-py" version = "2.2.0" description = "Python port of markdown-it. Markdown parsing, done right!" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1290,7 +1285,6 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "markupsafe" version = "2.1.2" description = "Safely add untrusted strings to HTML/XML markup." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1350,7 +1344,6 @@ files = [ name = "marshmallow" version = "3.20.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1371,7 +1364,6 @@ tests = ["pytest", "pytz", "simplejson"] name = "marshmallow-enum" version = "1.5.1" description = "Enum field for Marshmallow" -category = "main" optional = false python-versions = "*" files = [ @@ -1386,7 +1378,6 @@ marshmallow = ">=2.0.0" name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1398,7 +1389,6 @@ files = [ name = "mergedeep" version = "1.3.4" description = "A deep merge function for 🐍." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1410,7 +1400,6 @@ files = [ name = "mkapi" version = "1.0.14" description = "An Auto API Documentation tool." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1426,7 +1415,6 @@ markdown = "*" name = "mkdocs" version = "1.4.3" description = "Project documentation with Markdown." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1454,7 +1442,6 @@ min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-imp name = "mkdocs-awesome-pages-plugin" version = "2.9.1" description = "An MkDocs plugin that simplifies configuring page titles and their order" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1471,7 +1458,6 @@ wcmatch = ">=7" name = "mkdocs-gen-files" version = "0.4.0" description = "MkDocs plugin to programmatically generate documentation pages during the build" -category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1486,7 +1472,6 @@ mkdocs = ">=1.0.3,<2.0.0" name = "mkdocs-material" version = "8.5.11" description = "Documentation that simply works" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1507,7 +1492,6 @@ requests = ">=2.26" name = "mkdocs-material-extensions" version = "1.1.1" description = "Extension pack for Python Markdown and MkDocs Material." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1519,7 +1503,6 @@ files = [ name = "multidict" version = "6.0.4" description = "multidict implementation" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1603,7 +1586,6 @@ files = [ name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1615,7 +1597,6 @@ files = [ name = "natsort" version = "8.3.1" description = "Simple yet flexible natural sorting in Python." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1631,7 +1612,6 @@ icu = ["PyICU (>=1.0.0)"] name = "nest-asyncio" version = "1.5.7" description = "Patch asyncio to allow nested event loops" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1643,7 +1623,6 @@ files = [ name = "nltk" version = "3.8.1" description = "Natural Language Toolkit" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1669,7 +1648,6 @@ twitter = ["twython"] name = "nodeenv" version = "1.8.0" description = "Node.js virtual environment builder" -category = "dev" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" files = [ @@ -1684,7 +1662,6 @@ setuptools = "*" name = "numexpr" version = "2.8.6" description = "Fast numerical expression evaluator for NumPy" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1727,7 +1704,6 @@ numpy = ">=1.13.3" name = "numpy" version = "1.24.3" description = "Fundamental package for array computing in Python" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1765,7 +1741,6 @@ files = [ name = "openai" version = "0.28.0" description = "Python client library for the OpenAI API" -category = "main" optional = false python-versions = ">=3.7.1" files = [ @@ -1780,7 +1755,7 @@ tqdm = "*" [package.extras] datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] -dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "pytest-mock"] +dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"] embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] @@ -1788,7 +1763,6 @@ wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1 name = "opendal" version = "0.38.1" description = "OpenDAL Python Binding" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1807,7 +1781,6 @@ test = ["behave"] name = "packaging" version = "23.1" description = "Core utilities for Python packages" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1819,7 +1792,6 @@ files = [ name = "pandas" version = "2.0.2" description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1886,7 +1858,6 @@ xml = ["lxml (>=4.6.3)"] name = "parse" version = "1.19.0" description = "parse() is the opposite of format()" -category = "dev" optional = false python-versions = "*" files = [ @@ -1897,7 +1868,6 @@ files = [ name = "parse-type" version = "0.6.0" description = "Simplifies to build parse types based on the parse module" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*" files = [ @@ -1917,7 +1887,6 @@ docs = ["sphinx (>=1.2)"] name = "pastel" version = "0.2.1" description = "Bring colors to your terminal." -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1929,7 +1898,6 @@ files = [ name = "pathspec" version = "0.11.1" description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1941,7 +1909,6 @@ files = [ name = "pillow" version = "9.5.0" description = "Python Imaging Library (Fork)" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2021,7 +1988,6 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa name = "platformdirs" version = "3.5.1" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2037,7 +2003,6 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest- name = "pluggy" version = "1.0.0" description = "plugin and hook calling mechanisms for python" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2053,7 +2018,6 @@ testing = ["pytest", "pytest-benchmark"] name = "poethepoet" version = "0.16.5" description = "A task runner that works well with poetry." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2072,7 +2036,6 @@ poetry-plugin = ["poetry (>=1.0,<2.0)"] name = "pre-commit" version = "2.21.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2091,7 +2054,6 @@ virtualenv = ">=20.10.0" name = "protobuf" version = "3.20.3" description = "Protocol Buffers" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2123,7 +2085,6 @@ files = [ name = "py" version = "1.11.0" description = "library with cross-python path, ini-parsing, io, code, log facilities" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -2135,7 +2096,6 @@ files = [ name = "pyarrow" version = "12.0.0" description = "Python library for Apache Arrow" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2173,7 +2133,6 @@ numpy = ">=1.16.6" name = "pycparser" version = "2.21" description = "C parser in Python" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2185,7 +2144,6 @@ files = [ name = "pydantic" version = "1.10.12" description = "Data validation and settings management using python type hints" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2238,7 +2196,6 @@ email = ["email-validator (>=1.0.3)"] name = "pydeck" version = "0.8.0" description = "Widget for deck.gl maps" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2258,7 +2215,6 @@ jupyter = ["ipykernel (>=5.1.2)", "ipython (>=5.8.0)", "ipywidgets (>=7,<8)", "t name = "pygments" version = "2.15.1" description = "Pygments is a syntax highlighting package written in Python." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2273,7 +2229,6 @@ plugins = ["importlib-metadata"] name = "pymdown-extensions" version = "10.0.1" description = "Extension pack for Python Markdown." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2289,7 +2244,6 @@ pyyaml = "*" name = "pympler" version = "1.0.1" description = "A development tool to measure, monitor and analyze the memory behavior of Python objects." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2301,7 +2255,6 @@ files = [ name = "pypdf" version = "3.9.0" description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2320,7 +2273,6 @@ image = ["Pillow"] name = "pyrsistent" version = "0.19.3" description = "Persistent/Functional/Immutable data structures" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2357,7 +2309,6 @@ files = [ name = "pytest" version = "7.1.1" description = "pytest: simple powerful testing with Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2382,7 +2333,6 @@ testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2. name = "pytest-bdd" version = "6.1.1" description = "BDD for pytest" -category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -2401,7 +2351,6 @@ typing-extensions = "*" name = "pytest-cov" version = "3.0.0" description = "Pytest plugin for measuring coverage." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2420,7 +2369,6 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtuale name = "pytest-html" version = "3.2.0" description = "pytest plugin for generating HTML reports" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2437,7 +2385,6 @@ pytest-metadata = "*" name = "pytest-metadata" version = "3.0.0" description = "pytest plugin for test session metadata" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2455,7 +2402,6 @@ test = ["black (>=22.1.0)", "flake8 (>=4.0.1)", "pre-commit (>=2.17.0)", "tox (> name = "pytest-reverse" version = "1.5.0" description = "Pytest plugin to reverse test order." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2470,7 +2416,6 @@ pytest = "*" name = "pytest-sugar" version = "0.9.7" description = "pytest-sugar is a plugin for pytest that changes the default look and feel of pytest (e.g. progressbar, show tests that fail instantly)." -category = "dev" optional = false python-versions = "*" files = [ @@ -2490,7 +2435,6 @@ dev = ["black", "flake8", "pre-commit"] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -2505,7 +2449,6 @@ six = ">=1.5" name = "pytz" version = "2023.3" description = "World timezone definitions, modern and historical" -category = "main" optional = false python-versions = "*" files = [ @@ -2517,7 +2460,6 @@ files = [ name = "pytz-deprecation-shim" version = "0.1.0.post0" description = "Shims to make deprecation of pytz easier" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -2532,7 +2474,6 @@ tzdata = {version = "*", markers = "python_version >= \"3.6\""} name = "pyyaml" version = "6.0" description = "YAML parser and emitter for Python" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2582,7 +2523,6 @@ files = [ name = "pyyaml-env-tag" version = "0.1" description = "A custom YAML tag for referencing environment variables in YAML files. " -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2597,7 +2537,6 @@ pyyaml = "*" name = "regex" version = "2023.8.8" description = "Alternative regular expression module, to replace re." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2695,7 +2634,6 @@ files = [ name = "requests" version = "2.31.0" description = "Python HTTP for Humans." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2717,7 +2655,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "rich" version = "13.4.1" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -2736,7 +2673,6 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] name = "ruff" version = "0.0.253" description = "An extremely fast Python linter, written in Rust." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2763,7 +2699,6 @@ files = [ name = "setuptools" version = "67.8.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2780,7 +2715,6 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -2792,7 +2726,6 @@ files = [ name = "smmap" version = "5.0.0" description = "A pure Python implementation of a sliding window memory map manager" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2804,7 +2737,6 @@ files = [ name = "soupsieve" version = "2.5" description = "A modern CSS selector implementation for Beautiful Soup." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2816,7 +2748,6 @@ files = [ name = "sqlalchemy" version = "2.0.20" description = "Database Abstraction Library" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2864,7 +2795,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\""} typing-extensions = ">=4.2.0" [package.extras] @@ -2895,7 +2826,6 @@ sqlcipher = ["sqlcipher3-binary"] name = "st-pages" version = "0.4.1" description = "An experimental version of Streamlit Multi-Page Apps" -category = "main" optional = false python-versions = ">=3.8, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*" files = [ @@ -2911,7 +2841,6 @@ streamlit = ">=1.10.0" name = "streamlit" version = "1.24.0" description = "A faster way to build and share data apps" -category = "main" optional = false python-versions = ">=3.8, !=3.9.7" files = [ @@ -2952,7 +2881,6 @@ snowflake = ["snowflake-snowpark-python"] name = "tenacity" version = "8.2.2" description = "Retry code until it succeeds" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2967,7 +2895,6 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] name = "termcolor" version = "2.3.0" description = "ANSI color formatting for output in terminal" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2982,7 +2909,6 @@ tests = ["pytest", "pytest-cov"] name = "tiktoken" version = "0.5.1" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3028,7 +2954,6 @@ blobfile = ["blobfile (>=2)"] name = "tinycss2" version = "1.2.1" description = "A tiny CSS parser" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3047,7 +2972,6 @@ test = ["flake8", "isort", "pytest"] name = "toml" version = "0.10.2" description = "Python Library for Tom's Obvious, Minimal Language" -category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3059,7 +2983,6 @@ files = [ name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3071,7 +2994,6 @@ files = [ name = "toolz" version = "0.12.0" description = "List processing tools and functional utilities" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -3083,7 +3005,6 @@ files = [ name = "tornado" version = "6.3.3" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -category = "main" optional = false python-versions = ">= 3.8" files = [ @@ -3104,7 +3025,6 @@ files = [ name = "tqdm" version = "4.66.1" description = "Fast, Extensible Progress Meter" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3125,7 +3045,6 @@ telegram = ["requests"] name = "typing-extensions" version = "4.5.0" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3137,7 +3056,6 @@ files = [ name = "typing-inspect" version = "0.9.0" description = "Runtime inspection utilities for typing module." -category = "main" optional = false python-versions = "*" files = [ @@ -3153,7 +3071,6 @@ typing-extensions = ">=3.7.4" name = "tzdata" version = "2023.3" description = "Provider of IANA time zone data" -category = "main" optional = false python-versions = ">=2" files = [ @@ -3165,7 +3082,6 @@ files = [ name = "tzlocal" version = "4.3.1" description = "tzinfo object for the local timezone" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3184,7 +3100,6 @@ devenv = ["black", "check-manifest", "flake8", "pyroma", "pytest (>=4.3)", "pyte name = "urllib3" version = "1.26.16" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -3201,7 +3116,6 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] name = "validators" version = "0.20.0" description = "Python Data Validation for Humans™." -category = "main" optional = false python-versions = ">=3.4" files = [ @@ -3218,7 +3132,6 @@ test = ["flake8 (>=2.4.0)", "isort (>=4.2.2)", "pytest (>=2.2.3)"] name = "virtualenv" version = "20.23.0" description = "Virtual Python Environment builder" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3239,7 +3152,6 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "coverage-enable-subprocess name = "watchdog" version = "3.0.0" description = "Filesystem events monitoring" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3279,7 +3191,6 @@ watchmedo = ["PyYAML (>=3.10)"] name = "wcmatch" version = "8.4.1" description = "Wildcard/glob file name matcher." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3294,7 +3205,6 @@ bracex = ">=2.1.1" name = "webencodings" version = "0.5.1" description = "Character encoding aliases for legacy web content" -category = "dev" optional = false python-versions = "*" files = [ @@ -3306,7 +3216,6 @@ files = [ name = "xdoctest" version = "1.1.1" description = "A rewrite of the builtin doctest module" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3334,7 +3243,6 @@ tests-strict = ["codecov (==2.0.15)", "pytest (==4.6.0)", "pytest (==4.6.0)", "p name = "yarl" version = "1.9.2" description = "Yet another URL library" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3422,7 +3330,6 @@ multidict = ">=4.0" name = "zipp" version = "3.15.0" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3437,4 +3344,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "e25a66ab0535aa105f6f617627aeb1f7056d08d8792ed318d631b25f3cb883f9" +content-hash = "5d9ac6c0559c579362251b09f48ec9458cb0fb697562679477ed01959c7ba646" diff --git a/pyproject.toml b/pyproject.toml index f3d52a23..034a144c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ argon2-cffi = "^21.3.0" azure-core = "^1.27.1" opendal = "^0.38.1" llama-index = "0.8.27" +cryptography = "^41.0.3" [tool.poetry.group.dev.dependencies] pre-commit = "^2.18.1" From a9387f2e8c9369e8dc916a87b05407a1406267a2 Mon Sep 17 00:00:00 2001 From: Jashon Osala Date: Wed, 27 Sep 2023 13:10:43 +0300 Subject: [PATCH 03/13] chore: Add setup configurations for auth_utils. --- source/docq/config.py | 3 +++ source/docq/setup.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/source/docq/config.py b/source/docq/config.py index 9bbeba18..05beecfb 100644 --- a/source/docq/config.py +++ b/source/docq/config.py @@ -5,6 +5,8 @@ ENV_VAR_DOCQ_DATA = "DOCQ_DATA" ENV_VAR_DOCQ_DEMO = "DOCQ_DEMO" ENV_VAR_OPENAI_API_KEY = "OPENAI_API_KEY" +ENV_VAR_COOKIE_SECRET_KEY = "COOKIE_SECRET_KEY" +COOKIE_NAME = "docqai/_docq" class SpaceType(Enum): @@ -22,6 +24,7 @@ class FeatureType(Enum): ASK_SHARED = "Ask Shared Documents" ASK_PUBLIC = "Ask Public Documents" CHAT_PRIVATE = "General Chat" + AUTO_LOGIN = "Auto Login" class LogType(Enum): diff --git a/source/docq/setup.py b/source/docq/setup.py index bb6fae93..dd73378a 100644 --- a/source/docq/setup.py +++ b/source/docq/setup.py @@ -9,7 +9,7 @@ manage_user_groups, manage_users, ) -from .support import store +from .support import auth_utils, store def _config_logging() -> None: @@ -29,4 +29,5 @@ def init() -> None: store._init() manage_organisations._init_default_org_if_necessary() manage_users._init_admin_if_necessary() + auth_utils.init_session_cache() logging.info("Docq initialized") From e13009076bb7ee7a3ca6c3323dd03607830386a5 Mon Sep 17 00:00:00 2001 From: Jashon Osala Date: Wed, 27 Sep 2023 13:33:03 +0300 Subject: [PATCH 04/13] feat!: Added auth_utils to allow setting auth state without UI interaction. --- source/docq/support/auth_utils.py | 224 ++++++++++++++++++++++++++ tests/docq/support/auth_utils_test.py | 143 ++++++++++++++++ 2 files changed, 367 insertions(+) create mode 100644 source/docq/support/auth_utils.py create mode 100644 tests/docq/support/auth_utils_test.py diff --git a/source/docq/support/auth_utils.py b/source/docq/support/auth_utils.py new file mode 100644 index 00000000..40e15068 --- /dev/null +++ b/source/docq/support/auth_utils.py @@ -0,0 +1,224 @@ +"""Cache user sessions.""" +import hashlib +import hmac +import json +import logging as log +import os +from datetime import datetime, timedelta +from secrets import token_hex +from typing import Callable, Dict, Optional + +from cachetools import TTLCache +from cryptography.fernet import Fernet +from streamlit.components.v1 import html +from streamlit.web.server.websocket_headers import _get_websocket_headers + +from ..config import COOKIE_NAME, ENV_VAR_COOKIE_SECRET_KEY, FeatureType +from ..manage_settings import SystemSettingsKey, get_organisation_settings + +EXPIRY_HOURS = 4 +CACHE_CONFIG = (1024 * 1, 60 * 60 * EXPIRY_HOURS) +AUTH_KEY = Fernet.generate_key() +AUTH_SESSION_SECRET_KEY: str = os.environ.get(ENV_VAR_COOKIE_SECRET_KEY) + +# Session Cache. +cached_sessions:TTLCache[str, bytes] = TTLCache(*CACHE_CONFIG) +session_data:TTLCache[str, str]= TTLCache(*CACHE_CONFIG) + +def init_session_cache() -> None: + """Initialize session cache.""" + if AUTH_SESSION_SECRET_KEY is None: + log.fatal("Failed to initialize session cache: COOKIE_SECRET_KEY not set") + raise ValueError("COOKIE_SECRET_KEY must be set") + if len(AUTH_SESSION_SECRET_KEY) < 16: + log.fatal("Failed to initialize session cache: COOKIE_SECRET_KEY must be 16 or more characters") + raise ValueError("COOKIE_SECRET_KEY must be 16 or more characters") + + +def _set_cookie(cookie: str) -> None: + """Set client cookie for authentication.""" + try: + expiry = datetime.now() + timedelta(hours=EXPIRY_HOURS) + html(f""" + + """, width=0, height=0) + except Exception as e: + log.error("Failed to set cookie: %s", e) + + +def _clear_cookie() -> None: + """Clear client cookie.""" + html(f""" + + """, width=0, height=0) + + +def _get_cookies() -> Optional[Dict[str, str]]: + """Return client cookies.""" + try: + headers = _get_websocket_headers() + if headers is None: + return None + cookie_str = str(headers.get("Cookie")) + cookies: Dict[str, str] = {} + for cookie in cookie_str.split(";"): + key, val = cookie.split("=") + cookies[key.strip()] = val.strip() + return cookies + except Exception as e: + log.error("Failed to get cookies: %s", e) + return None + + +def _create_hmac( msg: str) -> str: + """Create a HMAC hash.""" + return hmac.new( + AUTH_SESSION_SECRET_KEY.encode(), + msg.encode(), + hashlib.sha256 + ).hexdigest() + + +def _verify_hmac(msg: str, digest: str) -> bool: + """Verify credibility of HMAC hash.""" + return hmac.compare_digest( + _create_hmac(msg), + digest + ) + + +def generate_session_id(length: int = 32) -> str: + """Generate a secure and unique session_id.""" + id_ = token_hex(length // 2) + hmac_ = _create_hmac(id_) + session_data[hmac_] = id_ + return hmac_ + + +def _set_session_id(session_id: str) -> None: + """Set the session_id in the cookie.""" + _set_cookie(session_id) + + +def _get_session_id() -> Optional[str]: + """Return the session_id from the cookie.""" + try: + cookies = _get_cookies() + if cookies is not None: + cookie = cookies.get(COOKIE_NAME) + if cookie is None: + return None + if cookie not in cached_sessions: + return None + if not _verify_hmac(session_data[cookie], cookie): + log.warning("Session ID not verified: %s", cookie) + return None + return cookie + except Exception as e: + log.error("Failed to get session id: %s", e) + return None + + +def _encrypt_auth(*args: tuple, **kwargs: dict) -> bytes: + """Encrypt the auth data.""" + try: + data = json.dumps([args, kwargs]).encode() + cipher = Fernet(AUTH_KEY) + return cipher.encrypt(data) + except Exception as e: + log.error("Failed to encrypt auth data: %s", e) + return None + + +def _decrypt_auth(configs: bytes) -> tuple[tuple, dict]: + """Decrypt the auth data.""" + try: + cipher = Fernet(AUTH_KEY) + data = cipher.decrypt(configs) + data_ = list(json.loads(data)) + return tuple(data_[0]), data_[1] + except Exception as e: + log.error("Failed to decrypt auth data: %s", e) + return None + + +def _update_auth_expiry(session_id: str) -> None: + """Update the auth expiry time.""" + try: + cached_sessions[session_id] = cached_sessions[session_id] + session_data[session_id] = session_data[session_id] + _set_session_id(session_id) + except Exception as e: + log.error("Failed to update auth expiry: %s", e) + + +def _auto_login_enabled(org_id: int) -> bool: + """Check if auto login feature is enabled.""" + try: + system_settings = get_organisation_settings(org_id=org_id, key=SystemSettingsKey.ENABLED_FEATURES) + if system_settings: # Only enable feature when explicitly enabled (dafault to Disabled) + return FeatureType.AUTO_LOGIN.name in system_settings + return False + except Exception as e: + log.error("Failed to check if auto login is enabled: %s", e) + return False + + +def cache_session_state(set_configs: Callable) -> Callable: + """Cache the auth session value to remember credentials on page reload.""" + + def wrapper(*args: tuple, **kwargs: dict) -> tuple: + """Auth wrapper.""" + try: + if "anonymous" in kwargs and kwargs["anonymous"]: + return set_configs(*args, **kwargs) + + session_id = _get_session_id() + if not session_id: + session_id = generate_session_id() + _set_session_id(session_id) + if session_id: + set_configs(*args, **kwargs) + cached_sessions[session_id] = _encrypt_auth(*args, **kwargs) + _update_auth_expiry(session_id) + except Exception as e: + log.error("Error caching auth session state: %s", e) + return None + + return wrapper + + +def get_auth_configs() -> Optional[tuple[tuple, dict]]: + """Get cached session state configs for auth.""" + try: + session_id = _get_session_id() + if session_id in cached_sessions: + configs = cached_sessions[session_id] + args, kwargs = _decrypt_auth(configs) + selected_org_id = kwargs.get("selected_org_id") or args[1] + if not _auto_login_enabled(selected_org_id): + return None + return _decrypt_auth(configs) + else: + return None + except Exception as e: + log.error("Failed to get auth result: %s", e) + return None + + +def session_logout() -> None: + """Clear all the data used to remember user session.""" + try: + session_id = _get_session_id() + if session_id in cached_sessions: + del cached_sessions[session_id] + if session_id in session_data: + del session_data[session_id] + _clear_cookie() + except Exception as e: + log.error("Failed to logout: %s", e) diff --git a/tests/docq/support/auth_utils_test.py b/tests/docq/support/auth_utils_test.py new file mode 100644 index 00000000..d162bf48 --- /dev/null +++ b/tests/docq/support/auth_utils_test.py @@ -0,0 +1,143 @@ +"""Test auth utils.""" +import unittest +from secrets import token_hex +from typing import Self +from unittest.mock import Mock, patch + +from docq.config import FeatureType +from docq.support import auth_utils +from docq.support.auth_utils import ( + _auto_login_enabled, + _clear_cookie, + _create_hmac, + _decrypt_auth, + _encrypt_auth, + _get_cookies, + _get_session_id, + _set_cookie, + _set_session_id, + _verify_hmac, + cache_session_state, + cached_sessions, + generate_session_id, + get_auth_configs, + session_data, + session_logout, +) + + +class TestAuthUtils(unittest.TestCase): + """Test auth utils.""" + + def setUp(self: Self) -> None: + """Setup module.""" + auth_utils.AUTH_SESSION_SECRET_KEY = token_hex(32) + + @patch("docq.support.auth_utils.html") + def test_set_cookie(self: Self, mock_html: Mock) -> None: + """Test set cookie.""" + _set_cookie("cookie") + mock_html.assert_called_once() + + @patch("docq.support.auth_utils.html") + def test_clear_cookie(self: Self, mock_html: Mock) -> None: + """Test clear cookie.""" + _clear_cookie() + mock_html.assert_called_once() + + @patch("docq.support.auth_utils._get_websocket_headers") + def test_get_cookies(self: Self, mock_headers: Mock) -> None: + """Test get cookies.""" + mock_headers.return_value = {"Cookie": "key=value"} + result = _get_cookies() + assert result == {"key": "value"} + + def test_create_hmac(self: Self) -> None: + """Test create hmac.""" + msg = "test" + digest = _create_hmac(msg) + assert isinstance(digest, str) + + def test_verify_hmac(self: Self) -> None: + """Test verify hmac.""" + msg = "test" + digest = _create_hmac(msg) + result = _verify_hmac(msg, digest) + assert result + + def test_generate_session_id(self: Self) -> None: + """Test generate session id.""" + id_ = generate_session_id() + assert isinstance(id_, str) + assert len(id_) == 64 + + @patch("docq.support.auth_utils._set_cookie") + def test_set_session_id(self: Self, mock_set_cookie: Mock) -> None: + """Test set session id.""" + session_id = "test" + _set_session_id(session_id) + mock_set_cookie.assert_called_once_with(session_id) + + @patch("docq.support.auth_utils._get_cookies") + def test_get_session_id(self: Self, mock_get_cookies: Mock) -> None: + """Test get session id.""" + session_id = generate_session_id() + cached_sessions[session_id] = _encrypt_auth(("9999", "user", 1)) + mock_get_cookies.return_value = {"docqai/_docq": session_id} + result = _get_session_id() + assert result == session_id + + def test_encrypt_decrypt_auth(self: Self) -> None: + """Test encrypt decrypt auth.""" + auth, kwargs = ("9999", "user", 1), {} + encrypted_auth = _encrypt_auth(*auth, **kwargs) + decrypted_auth = _decrypt_auth(encrypted_auth) + assert (auth, kwargs) == decrypted_auth + + @patch("docq.support.auth_utils._get_session_id") + @patch("docq.support.auth_utils._auto_login_enabled") + def test_cache_auth( + self: Self, + mock_auto_login_enabled: Mock, + mock_get_session_id: Mock + ) -> None: + """Test cache auth.""" + mock_func = Mock(return_value=("9999", "user", 1)) + session_id = generate_session_id() + mock_get_session_id.return_value = session_id + mock_auto_login_enabled.return_value = True + cache_session_state(mock_func)() + assert session_id in cached_sessions + assert mock_func.call_count == 1 + + @patch("docq.support.auth_utils._auto_login_enabled") + @patch("docq.support.auth_utils._get_session_id") + def test_auth_result(self: Self, mock_get_session_id: Mock, mock_auto_login_enabled: Mock) -> None: + """Test auth result.""" + args, kwargs = (("9999", "user", 1), {}) + mock_func = Mock() + session_id = generate_session_id() + mock_get_session_id.return_value = session_id + mock_auto_login_enabled.return_value = True + cache_session_state(mock_func)(*args, **kwargs) + result = get_auth_configs() + assert result == (args, kwargs), "Auth result should be same as input" + + @patch("docq.support.auth_utils._get_session_id") + def test_session_logout(self: Self, mock_get_session_id: Mock) -> None: + """Test session logout.""" + session_id = generate_session_id() + cached_sessions[session_id] = _encrypt_auth(("9999", "user", 1)) + session_data[session_id] = session_id + mock_get_session_id.return_value = session_id + session_logout() + assert session_id not in cached_sessions , "Cached session should be deleted on logout" + assert session_id not in session_data, "Session data should be deleted on logout" + + @patch("docq.support.auth_utils.get_organisation_settings") + def test_auto_login_enabled(self: Self, mock_get_system_settings: Mock) -> None: + """Test auto login enabled.""" + mock_get_system_settings.return_value = [FeatureType.AUTO_LOGIN.name] + result = _auto_login_enabled(9999) + assert mock_get_system_settings.call_count == 1 + assert result, "Auto login should be enabled" From 61053f76df30c82e3017d42c77491b3fcd848568 Mon Sep 17 00:00:00 2001 From: Jashon Osala Date: Wed, 27 Sep 2023 13:35:15 +0300 Subject: [PATCH 05/13] chore: Added handlers for auto login feature. --- web/utils/handlers.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/web/utils/handlers.py b/web/utils/handlers.py index de6a1ee0..629f4223 100644 --- a/web/utils/handlers.py +++ b/web/utils/handlers.py @@ -6,7 +6,7 @@ import math import random from datetime import datetime -from typing import Any, List, Optional, Tuple +from typing import Any, Callable, List, Optional, Tuple import streamlit as st from docq import ( @@ -24,6 +24,7 @@ from docq.access_control.main import SpaceAccessor, SpaceAccessType from docq.data_source.list import SpaceDataSources from docq.domain import DocumentListItem, SpaceKey +from docq.support.auth_utils import cache_session_state, get_auth_configs, session_logout from .constants import ( MAX_NUMBER_OF_PERSONAL_DOCS, @@ -50,6 +51,7 @@ ) +@cache_session_state def _set_session_state_configs( user_id: int, selected_org_id: int, @@ -136,8 +138,22 @@ def handle_login(username: str, password: str) -> bool: return False +def handle_auto_login(auth_layout: Callable) -> Callable: + """Authenticate automatically without UI interaction.""" + def _auth_wrapper(*args: tuple, **kwargs: dict) -> tuple: + auth_configs = get_auth_configs() + if auth_configs and len(auth_configs) == 2: + _args, _kwargs = auth_configs + _set_session_state_configs(*_args, **_kwargs) + return auth_layout(*args, **kwargs) + return auth_layout(*args, **kwargs) + return _auth_wrapper + + def handle_logout() -> None: + """Handle logout.""" reset_session_state() + session_logout() def handle_create_user() -> int: From 3478faa34ed237d45ad9248e67efba5bb3548035 Mon Sep 17 00:00:00 2001 From: Jashon Osala Date: Wed, 27 Sep 2023 13:36:36 +0300 Subject: [PATCH 06/13] chore: Added auto login decorator to auth_required method. --- web/utils/layout.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/web/utils/layout.py b/web/utils/layout.py index 1f5650e5..bf6c2536 100644 --- a/web/utils/layout.py +++ b/web/utils/layout.py @@ -23,6 +23,7 @@ get_space_data_source_choice_by_type, get_system_settings, handle_archive_org, + handle_auto_login, handle_chat_input, handle_create_new_chat, handle_create_org, @@ -264,6 +265,7 @@ def public_access() -> None: __always_hidden_pages() +@handle_auto_login def auth_required(show_login_form: bool = True, requiring_admin: bool = False, show_logout_button: bool = True) -> bool: """Decide layout based on current user's access.""" auth = get_auth_session() From 47d65741752ad42155c0a2371fd841855fb28d9c Mon Sep 17 00:00:00 2001 From: Jashon Osala Date: Thu, 28 Sep 2023 00:53:47 +0300 Subject: [PATCH 07/13] chore: Replace usage of decorators from auth utils. --- source/docq/support/auth_utils.py | 48 +++++++++++++-------------- tests/docq/support/auth_utils_test.py | 23 +++++++++---- web/utils/handlers.py | 29 ++++++++-------- web/utils/layout.py | 5 ++- 4 files changed, 58 insertions(+), 47 deletions(-) diff --git a/source/docq/support/auth_utils.py b/source/docq/support/auth_utils.py index 40e15068..9f9e359b 100644 --- a/source/docq/support/auth_utils.py +++ b/source/docq/support/auth_utils.py @@ -6,7 +6,7 @@ import os from datetime import datetime, timedelta from secrets import token_hex -from typing import Callable, Dict, Optional +from typing import Any, Dict, Optional from cachetools import TTLCache from cryptography.fernet import Fernet @@ -30,9 +30,9 @@ def init_session_cache() -> None: if AUTH_SESSION_SECRET_KEY is None: log.fatal("Failed to initialize session cache: COOKIE_SECRET_KEY not set") raise ValueError("COOKIE_SECRET_KEY must be set") - if len(AUTH_SESSION_SECRET_KEY) < 16: - log.fatal("Failed to initialize session cache: COOKIE_SECRET_KEY must be 16 or more characters") - raise ValueError("COOKIE_SECRET_KEY must be 16 or more characters") + if len(AUTH_SESSION_SECRET_KEY) < 32: + log.fatal("Failed to initialize session cache: COOKIE_SECRET_KEY must be 32 or more characters") + raise ValueError("COOKIE_SECRET_KEY must be 32 or more characters") def _set_cookie(cookie: str) -> None: @@ -169,28 +169,26 @@ def _auto_login_enabled(org_id: int) -> bool: return False -def cache_session_state(set_configs: Callable) -> Callable: - """Cache the auth session value to remember credentials on page reload.""" - - def wrapper(*args: tuple, **kwargs: dict) -> tuple: - """Auth wrapper.""" - try: - if "anonymous" in kwargs and kwargs["anonymous"]: - return set_configs(*args, **kwargs) - - session_id = _get_session_id() - if not session_id: - session_id = generate_session_id() - _set_session_id(session_id) - if session_id: - set_configs(*args, **kwargs) - cached_sessions[session_id] = _encrypt_auth(*args, **kwargs) - _update_auth_expiry(session_id) - except Exception as e: - log.error("Error caching auth session state: %s", e) - return None +def cache_session_state_configs(*args: tuple, **kwargs: dict[str, Any]) -> None: + """Caches the session state configs for auth. + + This will cache any arguments and keyword arguments passed to it and can be retrived + by calling the get_auth_configs function: + >>> docq.support.auth_utils.get_auth_configs() - return wrapper + Args: + args: Arguments to be passed to the auth function. + kwargs: Keyword arguments to be passed to the auth function. + """ + try: + session_id = _get_session_id() + if not session_id: + session_id = generate_session_id() + _set_session_id(session_id) + cached_sessions[session_id] = _encrypt_auth(*args, **kwargs) + _update_auth_expiry(session_id) + except Exception as e: + log.error("Error caching auth session state: %s", e) def get_auth_configs() -> Optional[tuple[tuple, dict]]: diff --git a/tests/docq/support/auth_utils_test.py b/tests/docq/support/auth_utils_test.py index d162bf48..6b4cbf47 100644 --- a/tests/docq/support/auth_utils_test.py +++ b/tests/docq/support/auth_utils_test.py @@ -17,7 +17,7 @@ _set_cookie, _set_session_id, _verify_hmac, - cache_session_state, + cache_session_state_configs, cached_sessions, generate_session_id, get_auth_configs, @@ -33,18 +33,21 @@ def setUp(self: Self) -> None: """Setup module.""" auth_utils.AUTH_SESSION_SECRET_KEY = token_hex(32) + @patch("docq.support.auth_utils.html") def test_set_cookie(self: Self, mock_html: Mock) -> None: """Test set cookie.""" _set_cookie("cookie") mock_html.assert_called_once() + @patch("docq.support.auth_utils.html") def test_clear_cookie(self: Self, mock_html: Mock) -> None: """Test clear cookie.""" _clear_cookie() mock_html.assert_called_once() + @patch("docq.support.auth_utils._get_websocket_headers") def test_get_cookies(self: Self, mock_headers: Mock) -> None: """Test get cookies.""" @@ -52,12 +55,14 @@ def test_get_cookies(self: Self, mock_headers: Mock) -> None: result = _get_cookies() assert result == {"key": "value"} + def test_create_hmac(self: Self) -> None: """Test create hmac.""" msg = "test" digest = _create_hmac(msg) assert isinstance(digest, str) + def test_verify_hmac(self: Self) -> None: """Test verify hmac.""" msg = "test" @@ -65,12 +70,14 @@ def test_verify_hmac(self: Self) -> None: result = _verify_hmac(msg, digest) assert result + def test_generate_session_id(self: Self) -> None: """Test generate session id.""" id_ = generate_session_id() assert isinstance(id_, str) assert len(id_) == 64 + @patch("docq.support.auth_utils._set_cookie") def test_set_session_id(self: Self, mock_set_cookie: Mock) -> None: """Test set session id.""" @@ -78,6 +85,7 @@ def test_set_session_id(self: Self, mock_set_cookie: Mock) -> None: _set_session_id(session_id) mock_set_cookie.assert_called_once_with(session_id) + @patch("docq.support.auth_utils._get_cookies") def test_get_session_id(self: Self, mock_get_cookies: Mock) -> None: """Test get session id.""" @@ -87,6 +95,7 @@ def test_get_session_id(self: Self, mock_get_cookies: Mock) -> None: result = _get_session_id() assert result == session_id + def test_encrypt_decrypt_auth(self: Self) -> None: """Test encrypt decrypt auth.""" auth, kwargs = ("9999", "user", 1), {} @@ -94,6 +103,7 @@ def test_encrypt_decrypt_auth(self: Self) -> None: decrypted_auth = _decrypt_auth(encrypted_auth) assert (auth, kwargs) == decrypted_auth + @patch("docq.support.auth_utils._get_session_id") @patch("docq.support.auth_utils._auto_login_enabled") def test_cache_auth( @@ -102,27 +112,27 @@ def test_cache_auth( mock_get_session_id: Mock ) -> None: """Test cache auth.""" - mock_func = Mock(return_value=("9999", "user", 1)) + args, kwargs = ("9999", "user", 1), {} session_id = generate_session_id() mock_get_session_id.return_value = session_id mock_auto_login_enabled.return_value = True - cache_session_state(mock_func)() + cache_session_state_configs(*args, **kwargs) assert session_id in cached_sessions - assert mock_func.call_count == 1 + @patch("docq.support.auth_utils._auto_login_enabled") @patch("docq.support.auth_utils._get_session_id") def test_auth_result(self: Self, mock_get_session_id: Mock, mock_auto_login_enabled: Mock) -> None: """Test auth result.""" args, kwargs = (("9999", "user", 1), {}) - mock_func = Mock() session_id = generate_session_id() mock_get_session_id.return_value = session_id mock_auto_login_enabled.return_value = True - cache_session_state(mock_func)(*args, **kwargs) + cache_session_state_configs(*args, **kwargs) result = get_auth_configs() assert result == (args, kwargs), "Auth result should be same as input" + @patch("docq.support.auth_utils._get_session_id") def test_session_logout(self: Self, mock_get_session_id: Mock) -> None: """Test session logout.""" @@ -134,6 +144,7 @@ def test_session_logout(self: Self, mock_get_session_id: Mock) -> None: assert session_id not in cached_sessions , "Cached session should be deleted on logout" assert session_id not in session_data, "Session data should be deleted on logout" + @patch("docq.support.auth_utils.get_organisation_settings") def test_auto_login_enabled(self: Self, mock_get_system_settings: Mock) -> None: """Test auto login enabled.""" diff --git a/web/utils/handlers.py b/web/utils/handlers.py index 629f4223..a88c6310 100644 --- a/web/utils/handlers.py +++ b/web/utils/handlers.py @@ -6,7 +6,7 @@ import math import random from datetime import datetime -from typing import Any, Callable, List, Optional, Tuple +from typing import Any, List, Optional, Tuple import streamlit as st from docq import ( @@ -24,7 +24,7 @@ from docq.access_control.main import SpaceAccessor, SpaceAccessType from docq.data_source.list import SpaceDataSources from docq.domain import DocumentListItem, SpaceKey -from docq.support.auth_utils import cache_session_state, get_auth_configs, session_logout +from docq.support.auth_utils import cache_session_state_configs, get_auth_configs, session_logout from .constants import ( MAX_NUMBER_OF_PERSONAL_DOCS, @@ -51,7 +51,6 @@ ) -@cache_session_state def _set_session_state_configs( user_id: int, selected_org_id: int, @@ -90,6 +89,14 @@ def _set_session_state_configs( } ) else: + cache_session_state_configs( + user_id=user_id, + selected_org_id=selected_org_id, + name=name, + username=username, + super_admin=super_admin, + selected_org_admin=selected_org_admin, + ) set_auth_session( { SessionKeyNameForAuth.ID.name: user_id, @@ -138,16 +145,12 @@ def handle_login(username: str, password: str) -> bool: return False -def handle_auto_login(auth_layout: Callable) -> Callable: - """Authenticate automatically without UI interaction.""" - def _auth_wrapper(*args: tuple, **kwargs: dict) -> tuple: - auth_configs = get_auth_configs() - if auth_configs and len(auth_configs) == 2: - _args, _kwargs = auth_configs - _set_session_state_configs(*_args, **_kwargs) - return auth_layout(*args, **kwargs) - return auth_layout(*args, **kwargs) - return _auth_wrapper +def handle_set_cached_session_configs() -> None: + """Set cached auth configs.""" + auth_configs = get_auth_configs() + if auth_configs and len(auth_configs) == 2: + _args, _kwargs = auth_configs + _set_session_state_configs(*_args, **_kwargs) def handle_logout() -> None: diff --git a/web/utils/layout.py b/web/utils/layout.py index bf6c2536..3fd7b363 100644 --- a/web/utils/layout.py +++ b/web/utils/layout.py @@ -23,7 +23,6 @@ get_space_data_source_choice_by_type, get_system_settings, handle_archive_org, - handle_auto_login, handle_chat_input, handle_create_new_chat, handle_create_org, @@ -44,6 +43,7 @@ handle_org_selection_change, handle_public_session, handle_reindex_space, + handle_set_cached_session_configs, handle_update_org, handle_update_space_details, handle_update_space_group, @@ -265,9 +265,9 @@ def public_access() -> None: __always_hidden_pages() -@handle_auto_login def auth_required(show_login_form: bool = True, requiring_admin: bool = False, show_logout_button: bool = True) -> bool: """Decide layout based on current user's access.""" + handle_set_cached_session_configs() auth = get_auth_session() __always_hidden_pages() if auth: @@ -287,7 +287,6 @@ def auth_required(show_login_form: bool = True, requiring_admin: bool = False, s return False - def public_session_setup() -> None: """Initialize session state for the public pages.""" handle_public_session() From 2f5c0afdf273af2eeeccf8a40e9f759c22cb3f58 Mon Sep 17 00:00:00 2001 From: Jashon Osala Date: Thu, 28 Sep 2023 01:21:29 +0300 Subject: [PATCH 08/13] chore: Added a UI to handle errors without leaking information to end user. --- web/index.py | 5 ++--- web/utils/layout.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/web/index.py b/web/index.py index 2f12c244..794c0e35 100644 --- a/web/index.py +++ b/web/index.py @@ -1,11 +1,10 @@ """Page: Home (no auth required).""" import streamlit as st -from docq import setup from st_pages import Page, Section, add_page_title, show_pages -from utils.layout import org_selection_ui, production_layout, public_access +from utils.layout import load_setup_ui, org_selection_ui, production_layout, public_access -setup.init() +load_setup_ui() production_layout() diff --git a/web/utils/layout.py b/web/utils/layout.py index 892d3eb4..f74e2083 100644 --- a/web/utils/layout.py +++ b/web/utils/layout.py @@ -11,6 +11,7 @@ from st_pages import hide_pages from streamlit.components.v1 import html from streamlit.delta_generator import DeltaGenerator +from docq import setup from .constants import ALLOWED_DOC_EXTS, SessionKeyNameForAuth, SessionKeyNameForChat from .formatters import format_archived, format_datetime, format_filesize, format_timestamp @@ -878,3 +879,14 @@ def org_selection_ui() -> None: ) if selected: handle_org_selection_change(selected[0]) + + + +def load_setup_ui() -> None: + """UI to run setup and prevent showing errors to the user.""" + try: + setup.init() + except Exception as e: + st.error("Error while setting up the app please refer to logs for more details.") + log.exception("Error while setting up the app: %s", e) + st.stop() From cc91247ffd50b1fd8ff9ce786e95419bc46387dd Mon Sep 17 00:00:00 2001 From: Jashon Osala Date: Thu, 28 Sep 2023 01:33:00 +0300 Subject: [PATCH 09/13] chore: Update page load failed message. --- web/utils/layout.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/utils/layout.py b/web/utils/layout.py index f74e2083..0e8c3dfb 100644 --- a/web/utils/layout.py +++ b/web/utils/layout.py @@ -4,6 +4,7 @@ from typing import List, Tuple import streamlit as st +from docq import setup from docq.access_control.main import SpaceAccessType from docq.config import FeatureType, LogType, SpaceType, SystemSettingsKey from docq.domain import DocumentListItem, FeatureKey, SpaceKey @@ -11,7 +12,6 @@ from st_pages import hide_pages from streamlit.components.v1 import html from streamlit.delta_generator import DeltaGenerator -from docq import setup from .constants import ALLOWED_DOC_EXTS, SessionKeyNameForAuth, SessionKeyNameForChat from .formatters import format_archived, format_datetime, format_filesize, format_timestamp @@ -887,6 +887,6 @@ def load_setup_ui() -> None: try: setup.init() except Exception as e: - st.error("Error while setting up the app please refer to logs for more details.") + st.error("Docq encountered an error while initializing please refer to logs for more details.") log.exception("Error while setting up the app: %s", e) st.stop() From 2ec4826f575cde8e2de6ab9a0bb43d2e4ce6147a Mon Sep 17 00:00:00 2001 From: Janaka Abeywardhana Date: Sun, 1 Oct 2023 15:37:16 +0100 Subject: [PATCH 10/13] Janaka/update/ux reloading web pages (#113) * refactor: auth token based logic * fix cookie setting * refactor: remove AUTO_LOGIN as feature type adding ability to toggle token based auth per org is making it more brittle and risky * refactor: rename env vars to prefix "DOCQ_" * refactor: rename session cookie var to be more specific * tests: fix auth utils tests after refactor * fix: remove _auto_login_feature_enabled * chore: tweak debug logs in auth utils --- .gitignore | 2 +- misc/docker.env.template | 4 +- misc/secrets.toml.template | 4 +- source/docq/config.py | 7 +- source/docq/manage_settings.py | 2 +- source/docq/support/auth_utils.py | 218 ++++++++++++++------------ tests/docq/support/auth_utils_test.py | 120 ++++++-------- web/index.py | 4 +- web/utils/handlers.py | 71 +++++---- web/utils/layout.py | 53 +++++-- web/utils/sessions.py | 21 ++- 11 files changed, 269 insertions(+), 237 deletions(-) diff --git a/.gitignore b/.gitignore index e24be176..fa6f90f1 100644 --- a/.gitignore +++ b/.gitignore @@ -176,7 +176,7 @@ cython_debug/ # Exported from Poetry, used in docker build only requirements.txt # Used for file storage in local development only -.persisted/ +.persisted*/ # Used for running Streamlit by storing configs and secrets locally .streamlit/ # Used by GitHub Pages local build before uploading to GitHub diff --git a/misc/docker.env.template b/misc/docker.env.template index eb739d2b..efddb56b 100644 --- a/misc/docker.env.template +++ b/misc/docker.env.template @@ -1,5 +1,5 @@ STREAMLIT_SERVER_ADDRESS=0.0.0.0 STREAMLIT_SERVER_PORT=8501 #default DOCQ_DATA=./.persisted/ -OPENAI_API_KEY # ideally set value on shell, don't insert a value here becuase it's a secret. -COOKIE_SECRET_KEY=cookie_password \ No newline at end of file +DOCQ_OPENAI_API_KEY # ideally set value on shell, don't insert a value here becuase it's a secret. +DOCQ_COOKIE_HMAC_SECRET_KEY=cookie_password \ No newline at end of file diff --git a/misc/secrets.toml.template b/misc/secrets.toml.template index 5bb3007a..9b6b7136 100644 --- a/misc/secrets.toml.template +++ b/misc/secrets.toml.template @@ -1,3 +1,3 @@ DOCQ_DATA = "./.persisted/" -OPENAI_API_KEY = "YOUR-OPENAI-API-KEY" -COOKIE_SECRET_KEY = "cookies_password" \ No newline at end of file +DOCQ_OPENAI_API_KEY = "YOUR-OPENAI-API-KEY" +DOCQ_COOKIE_HMAC_SECRET_KEY = "32_char_secret_used_to_encrypt" \ No newline at end of file diff --git a/source/docq/config.py b/source/docq/config.py index 05beecfb..1568a48e 100644 --- a/source/docq/config.py +++ b/source/docq/config.py @@ -4,9 +4,9 @@ ENV_VAR_DOCQ_DATA = "DOCQ_DATA" ENV_VAR_DOCQ_DEMO = "DOCQ_DEMO" -ENV_VAR_OPENAI_API_KEY = "OPENAI_API_KEY" -ENV_VAR_COOKIE_SECRET_KEY = "COOKIE_SECRET_KEY" -COOKIE_NAME = "docqai/_docq" +ENV_VAR_OPENAI_API_KEY = "DOCQ_OPENAI_API_KEY" +ENV_VAR_DOCQ_COOKIE_HMAC_SECRET_KEY = "DOCQ_COOKIE_HMAC_SECRET_KEY" +SESSION_COOKIE_NAME = "docqai/_docq" class SpaceType(Enum): @@ -24,7 +24,6 @@ class FeatureType(Enum): ASK_SHARED = "Ask Shared Documents" ASK_PUBLIC = "Ask Public Documents" CHAT_PRIVATE = "General Chat" - AUTO_LOGIN = "Auto Login" class LogType(Enum): diff --git a/source/docq/manage_settings.py b/source/docq/manage_settings.py index 7c365dda..0eaf5ade 100644 --- a/source/docq/manage_settings.py +++ b/source/docq/manage_settings.py @@ -59,11 +59,11 @@ def _get_settings(org_id: int, user_id: int = None) -> dict: def _update_settings(settings: dict, org_id: int, user_id: int = None) -> bool: - log.debug("Updating settings for user %d", user_id) with closing( sqlite3.connect(_get_sqlite_file(user_id), detect_types=sqlite3.PARSE_DECLTYPES) ) as connection, closing(connection.cursor()) as cursor: user_id = user_id or USER_ID_AS_SYSTEM + log.debug("Updating settings for user %d", user_id) cursor.executemany( "INSERT OR REPLACE INTO settings (user_id, org_id, key, val) VALUES (?, ?, ?, ?)", [(user_id, org_id, key, json.dumps(val)) for key, val in settings.items()], diff --git a/source/docq/support/auth_utils.py b/source/docq/support/auth_utils.py index 9f9e359b..90856860 100644 --- a/source/docq/support/auth_utils.py +++ b/source/docq/support/auth_utils.py @@ -6,56 +6,70 @@ import os from datetime import datetime, timedelta from secrets import token_hex -from typing import Any, Dict, Optional +from typing import Dict, Optional from cachetools import TTLCache from cryptography.fernet import Fernet from streamlit.components.v1 import html from streamlit.web.server.websocket_headers import _get_websocket_headers -from ..config import COOKIE_NAME, ENV_VAR_COOKIE_SECRET_KEY, FeatureType -from ..manage_settings import SystemSettingsKey, get_organisation_settings +from ..config import SESSION_COOKIE_NAME, ENV_VAR_DOCQ_COOKIE_HMAC_SECRET_KEY EXPIRY_HOURS = 4 CACHE_CONFIG = (1024 * 1, 60 * 60 * EXPIRY_HOURS) AUTH_KEY = Fernet.generate_key() -AUTH_SESSION_SECRET_KEY: str = os.environ.get(ENV_VAR_COOKIE_SECRET_KEY) +AUTH_SESSION_SECRET_KEY: str = os.environ.get(ENV_VAR_DOCQ_COOKIE_HMAC_SECRET_KEY) + +# Chase of session data keyed by session id +cached_sessions: TTLCache[str, bytes] = TTLCache(*CACHE_CONFIG) + +# Cache of session id's keyed by hmac hash +session_data: TTLCache[str, str] = TTLCache(*CACHE_CONFIG) + + +# TODO: the code that handles the cookie should move to the web side. session state tracking is in the backend but not a public API as it's just cross cutting. -# Session Cache. -cached_sessions:TTLCache[str, bytes] = TTLCache(*CACHE_CONFIG) -session_data:TTLCache[str, str]= TTLCache(*CACHE_CONFIG) def init_session_cache() -> None: """Initialize session cache.""" if AUTH_SESSION_SECRET_KEY is None: - log.fatal("Failed to initialize session cache: COOKIE_SECRET_KEY not set") - raise ValueError("COOKIE_SECRET_KEY must be set") + log.fatal("Failed to initialize session cache: DOCQ_COOKIE_HMAC_SECRET_KEY not set") + raise ValueError("DOCQ_COOKIE_HMAC_SECRET_KEY must be set") if len(AUTH_SESSION_SECRET_KEY) < 32: - log.fatal("Failed to initialize session cache: COOKIE_SECRET_KEY must be 32 or more characters") - raise ValueError("COOKIE_SECRET_KEY must be 32 or more characters") + log.fatal("Failed to initialize session cache: DOCQ_COOKIE_HMAC_SECRET_KEY must be 32 or more characters") + raise ValueError("DOCQ_COOKIE_HMAC_SECRET_KEY must be 32 or more characters") def _set_cookie(cookie: str) -> None: """Set client cookie for authentication.""" try: expiry = datetime.now() + timedelta(hours=EXPIRY_HOURS) - html(f""" + html( + f""" - """, width=0, height=0) + """, + width=0, + height=0, + ) except Exception as e: log.error("Failed to set cookie: %s", e) -def _clear_cookie() -> None: +def _clear_cookie(cookie_name: str) -> None: """Clear client cookie.""" - html(f""" + html( + f""" - """, width=0, height=0) + """, + width=0, + height=0, + ) + log.debug("Clear client cookie: %s", cookie_name) def _get_cookies() -> Optional[Dict[str, str]]: @@ -75,59 +89,70 @@ def _get_cookies() -> Optional[Dict[str, str]]: return None -def _create_hmac( msg: str) -> str: +def _create_hmac(msg: str) -> str: """Create a HMAC hash.""" - return hmac.new( - AUTH_SESSION_SECRET_KEY.encode(), - msg.encode(), - hashlib.sha256 - ).hexdigest() + return hmac.new(AUTH_SESSION_SECRET_KEY.encode(), msg.encode(), hashlib.sha256).hexdigest() def _verify_hmac(msg: str, digest: str) -> bool: """Verify credibility of HMAC hash.""" - return hmac.compare_digest( - _create_hmac(msg), - digest - ) + return hmac.compare_digest(_create_hmac(msg), digest) -def generate_session_id(length: int = 32) -> str: - """Generate a secure and unique session_id.""" +def generate_hmac_session_id(length: int = 32) -> str: + """Generate a secure (HMAC) and unique session_id then track in session cache.""" id_ = token_hex(length // 2) hmac_ = _create_hmac(id_) session_data[hmac_] = id_ + log.debug("Generated new hmac session id: %s", hmac_) return hmac_ -def _set_session_id(session_id: str) -> None: - """Set the session_id in the cookie.""" - _set_cookie(session_id) +def _set_cookie_session_id(hmac_session_id: str) -> None: + """Set the encrypted session_id in the cookie.""" + _set_cookie(hmac_session_id) + log.debug("_set_cookie_session_id() - hmac session id: %s", hmac_session_id) -def _get_session_id() -> Optional[str]: - """Return the session_id from the cookie.""" +def _get_cookie_session_id() -> str | None: + """Return the Docq encrypted HMAC session_id from the cookie.""" try: + hmac_session_id = None cookies = _get_cookies() if cookies is not None: - cookie = cookies.get(COOKIE_NAME) - if cookie is None: - return None - if cookie not in cached_sessions: - return None - if not _verify_hmac(session_data[cookie], cookie): - log.warning("Session ID not verified: %s", cookie) - return None - return cookie + hmac_session_id = cookies.get(SESSION_COOKIE_NAME) + return hmac_session_id except Exception as e: log.error("Failed to get session id: %s", e) return None -def _encrypt_auth(*args: tuple, **kwargs: dict) -> bytes: - """Encrypt the auth data.""" +def verify_cookie_hmac_session_id() -> str | None: + """Verify the encrypted session_id from the cookie. + + Return: + str: The hmac_session_id if verified. + None: If not verified. + """ + hmac_session_id = None + hmac_session_id = _get_cookie_session_id() + if hmac_session_id is None: + log.debug("No session id in cookie found") + elif hmac_session_id not in cached_sessions: + log.debug( + "verify_cookie_hmac_session_id(): HMAC Session ID not found in cache. Session expired or was explicitly removed: %s" + ) + hmac_session_id = None + elif hmac_session_id not in session_data or not _verify_hmac(session_data[hmac_session_id], hmac_session_id): + log.warning("verify_cookie_hmac_session_id(): HMAC Session ID failed verification: %s") + hmac_session_id = None + return hmac_session_id + + +def _encrypt(payload: dict) -> bytes: + """Encrypt some data.""" try: - data = json.dumps([args, kwargs]).encode() + data = json.dumps(payload).encode() cipher = Fernet(AUTH_KEY) return cipher.encrypt(data) except Exception as e: @@ -135,88 +160,75 @@ def _encrypt_auth(*args: tuple, **kwargs: dict) -> bytes: return None -def _decrypt_auth(configs: bytes) -> tuple[tuple, dict]: - """Decrypt the auth data.""" +def _decrypt(encrypted_payload: bytes) -> dict: + """Decrypt some data.""" try: cipher = Fernet(AUTH_KEY) - data = cipher.decrypt(configs) - data_ = list(json.loads(data)) - return tuple(data_[0]), data_[1] + data = cipher.decrypt(encrypted_payload) + result = json.loads(data) + return result except Exception as e: log.error("Failed to decrypt auth data: %s", e) return None -def _update_auth_expiry(session_id: str) -> None: +def _reset_expiry_cache_auth_session(session_id: str) -> None: """Update the auth expiry time.""" try: cached_sessions[session_id] = cached_sessions[session_id] session_data[session_id] = session_data[session_id] - _set_session_id(session_id) + # _set_cookie_session_id(session_id) except Exception as e: log.error("Failed to update auth expiry: %s", e) -def _auto_login_enabled(org_id: int) -> bool: - """Check if auto login feature is enabled.""" - try: - system_settings = get_organisation_settings(org_id=org_id, key=SystemSettingsKey.ENABLED_FEATURES) - if system_settings: # Only enable feature when explicitly enabled (dafault to Disabled) - return FeatureType.AUTO_LOGIN.name in system_settings - return False - except Exception as e: - log.error("Failed to check if auto login is enabled: %s", e) - return False - - -def cache_session_state_configs(*args: tuple, **kwargs: dict[str, Any]) -> None: - """Caches the session state configs for auth. - - This will cache any arguments and keyword arguments passed to it and can be retrived - by calling the get_auth_configs function: - >>> docq.support.auth_utils.get_auth_configs() +def set_cache_auth_session(val: dict) -> None: + """Caches the session state configs for auth, persisting across connections. Args: - args: Arguments to be passed to the auth function. - kwargs: Keyword arguments to be passed to the auth function. + val (dict): The session state for auth. """ try: - session_id = _get_session_id() - if not session_id: - session_id = generate_session_id() - _set_session_id(session_id) - cached_sessions[session_id] = _encrypt_auth(*args, **kwargs) - _update_auth_expiry(session_id) + hmac_session_id = _get_cookie_session_id() + if hmac_session_id is None: + hmac_session_id = generate_hmac_session_id() + _set_cookie_session_id(hmac_session_id) + cached_sessions[hmac_session_id] = _encrypt(val) + _reset_expiry_cache_auth_session(hmac_session_id) except Exception as e: - log.error("Error caching auth session state: %s", e) + log.error("Error caching auth session: %s", e) -def get_auth_configs() -> Optional[tuple[tuple, dict]]: - """Get cached session state configs for auth.""" +def get_cache_auth_session() -> dict | None: + """Verify the session auth token and get the cached session state for the current session. The current session is identified by a session_id wrapped in a auth token in a browser session cookie.""" try: - session_id = _get_session_id() - if session_id in cached_sessions: - configs = cached_sessions[session_id] - args, kwargs = _decrypt_auth(configs) - selected_org_id = kwargs.get("selected_org_id") or args[1] - if not _auto_login_enabled(selected_org_id): - return None - return _decrypt_auth(configs) - else: - return None + decrypted_auth_session_data = None + hmac_session_id = _get_cookie_session_id() + if hmac_session_id in cached_sessions: + encrypted_auth_session_data = cached_sessions[hmac_session_id] + decrypted_auth_session_data = _decrypt(encrypted_auth_session_data) + return decrypted_auth_session_data except Exception as e: - log.error("Failed to get auth result: %s", e) + log.error("Failed to get auth session from cache: %s", e) return None -def session_logout() -> None: - """Clear all the data used to remember user session.""" +def remove_cache_auth_session() -> None: + """Remove the cached session state for the current session. The current session is identified by a session_id in a particular browsersession cookie.""" + try: + hmac_session_id = _get_cookie_session_id() + if hmac_session_id in cached_sessions: + del cached_sessions[hmac_session_id] + if hmac_session_id in session_data: + del session_data[hmac_session_id] + except Exception as e: + log.error("Failed to remove auth session from cache: %s", e) + + +def reset_cache_and_cookie_auth_session() -> None: + """Clear all the data used to remember user session (auth session cache and session cookie). This must be called at login and cookie.""" try: - session_id = _get_session_id() - if session_id in cached_sessions: - del cached_sessions[session_id] - if session_id in session_data: - del session_data[session_id] - _clear_cookie() + remove_cache_auth_session() + _clear_cookie(SESSION_COOKIE_NAME) except Exception as e: - log.error("Failed to logout: %s", e) + log.error("Failed to clear session data caches (hmac, session data, and session cookie ): %s", e) diff --git a/tests/docq/support/auth_utils_test.py b/tests/docq/support/auth_utils_test.py index 6b4cbf47..78b6d91c 100644 --- a/tests/docq/support/auth_utils_test.py +++ b/tests/docq/support/auth_utils_test.py @@ -4,25 +4,24 @@ from typing import Self from unittest.mock import Mock, patch -from docq.config import FeatureType from docq.support import auth_utils from docq.support.auth_utils import ( - _auto_login_enabled, + SESSION_COOKIE_NAME, _clear_cookie, _create_hmac, - _decrypt_auth, - _encrypt_auth, + _decrypt, + _encrypt, + _get_cookie_session_id, _get_cookies, - _get_session_id, _set_cookie, - _set_session_id, + _set_cookie_session_id, _verify_hmac, - cache_session_state_configs, cached_sessions, - generate_session_id, - get_auth_configs, + generate_hmac_session_id, + get_cache_auth_session, + reset_cache_and_cookie_auth_session, session_data, - session_logout, + set_cache_auth_session, ) @@ -33,21 +32,18 @@ def setUp(self: Self) -> None: """Setup module.""" auth_utils.AUTH_SESSION_SECRET_KEY = token_hex(32) - @patch("docq.support.auth_utils.html") def test_set_cookie(self: Self, mock_html: Mock) -> None: """Test set cookie.""" _set_cookie("cookie") mock_html.assert_called_once() - @patch("docq.support.auth_utils.html") def test_clear_cookie(self: Self, mock_html: Mock) -> None: """Test clear cookie.""" - _clear_cookie() + _clear_cookie(SESSION_COOKIE_NAME) mock_html.assert_called_once() - @patch("docq.support.auth_utils._get_websocket_headers") def test_get_cookies(self: Self, mock_headers: Mock) -> None: """Test get cookies.""" @@ -55,14 +51,12 @@ def test_get_cookies(self: Self, mock_headers: Mock) -> None: result = _get_cookies() assert result == {"key": "value"} - def test_create_hmac(self: Self) -> None: """Test create hmac.""" msg = "test" digest = _create_hmac(msg) assert isinstance(digest, str) - def test_verify_hmac(self: Self) -> None: """Test verify hmac.""" msg = "test" @@ -70,85 +64,65 @@ def test_verify_hmac(self: Self) -> None: result = _verify_hmac(msg, digest) assert result - def test_generate_session_id(self: Self) -> None: """Test generate session id.""" - id_ = generate_session_id() + id_ = generate_hmac_session_id() assert isinstance(id_, str) assert len(id_) == 64 - @patch("docq.support.auth_utils._set_cookie") def test_set_session_id(self: Self, mock_set_cookie: Mock) -> None: """Test set session id.""" session_id = "test" - _set_session_id(session_id) + _set_cookie_session_id(session_id) mock_set_cookie.assert_called_once_with(session_id) - @patch("docq.support.auth_utils._get_cookies") - def test_get_session_id(self: Self, mock_get_cookies: Mock) -> None: + def test_get_cookie_session_id(self: Self, mock_get_cookies: Mock) -> None: """Test get session id.""" - session_id = generate_session_id() - cached_sessions[session_id] = _encrypt_auth(("9999", "user", 1)) - mock_get_cookies.return_value = {"docqai/_docq": session_id} - result = _get_session_id() + session_id = generate_hmac_session_id() + cached_sessions[session_id] = _encrypt(("9999", "user", 1)) + mock_get_cookies.return_value = {SESSION_COOKIE_NAME: session_id} + result = _get_cookie_session_id() assert result == session_id - def test_encrypt_decrypt_auth(self: Self) -> None: """Test encrypt decrypt auth.""" - auth, kwargs = ("9999", "user", 1), {} - encrypted_auth = _encrypt_auth(*auth, **kwargs) - decrypted_auth = _decrypt_auth(encrypted_auth) - assert (auth, kwargs) == decrypted_auth + payload = {"org_id": "9999", "username": "user name", "user_id": 1} + encrypted_auth = _encrypt(payload) + decrypted_auth = _decrypt(encrypted_auth) + assert payload == decrypted_auth - - @patch("docq.support.auth_utils._get_session_id") - @patch("docq.support.auth_utils._auto_login_enabled") - def test_cache_auth( - self: Self, - mock_auto_login_enabled: Mock, - mock_get_session_id: Mock - ) -> None: + @patch("docq.support.auth_utils._get_cookie_session_id") + def test_cache_auth(self: Self, mock_get_cookie_session_id: Mock) -> None: """Test cache auth.""" - args, kwargs = ("9999", "user", 1), {} - session_id = generate_session_id() - mock_get_session_id.return_value = session_id - mock_auto_login_enabled.return_value = True - cache_session_state_configs(*args, **kwargs) + payload = {"org_id": "9999", "username": "user name", "user_id": 1} + session_id = generate_hmac_session_id() + mock_get_cookie_session_id.return_value = session_id + set_cache_auth_session(payload) assert session_id in cached_sessions - - @patch("docq.support.auth_utils._auto_login_enabled") - @patch("docq.support.auth_utils._get_session_id") - def test_auth_result(self: Self, mock_get_session_id: Mock, mock_auto_login_enabled: Mock) -> None: + @patch("docq.support.auth_utils._get_cookie_session_id") + def test_auth_result( + self: Self, + mock_get_cookie_session_id: Mock, + ) -> None: """Test auth result.""" - args, kwargs = (("9999", "user", 1), {}) - session_id = generate_session_id() - mock_get_session_id.return_value = session_id - mock_auto_login_enabled.return_value = True - cache_session_state_configs(*args, **kwargs) - result = get_auth_configs() - assert result == (args, kwargs), "Auth result should be same as input" - - - @patch("docq.support.auth_utils._get_session_id") - def test_session_logout(self: Self, mock_get_session_id: Mock) -> None: + payload = {"org_id": "9999", "username": "user name", "user_id": 1} + session_id = generate_hmac_session_id() + mock_get_cookie_session_id.return_value = session_id + # mock_auto_login_enabled.return_value = True + set_cache_auth_session(payload) + result = get_cache_auth_session() + assert result == {"org_id": "9999", "username": "user name", "user_id": 1} + + @patch("docq.support.auth_utils._get_cookie_session_id") + def test_session_logout(self: Self, mock_get_cookie_session_id: Mock) -> None: """Test session logout.""" - session_id = generate_session_id() - cached_sessions[session_id] = _encrypt_auth(("9999", "user", 1)) + session_id = generate_hmac_session_id() + cached_sessions[session_id] = _encrypt(("9999", "user", 1)) session_data[session_id] = session_id - mock_get_session_id.return_value = session_id - session_logout() - assert session_id not in cached_sessions , "Cached session should be deleted on logout" + mock_get_cookie_session_id.return_value = session_id + reset_cache_and_cookie_auth_session() + assert session_id not in cached_sessions, "Cached session should be deleted on logout" assert session_id not in session_data, "Session data should be deleted on logout" - - - @patch("docq.support.auth_utils.get_organisation_settings") - def test_auto_login_enabled(self: Self, mock_get_system_settings: Mock) -> None: - """Test auto login enabled.""" - mock_get_system_settings.return_value = [FeatureType.AUTO_LOGIN.name] - result = _auto_login_enabled(9999) - assert mock_get_system_settings.call_count == 1 - assert result, "Auto login should be enabled" diff --git a/web/index.py b/web/index.py index 794c0e35..710fd1a1 100644 --- a/web/index.py +++ b/web/index.py @@ -2,9 +2,9 @@ import streamlit as st from st_pages import Page, Section, add_page_title, show_pages -from utils.layout import load_setup_ui, org_selection_ui, production_layout, public_access +from utils.layout import init_with_pretty_error_ui, org_selection_ui, production_layout, public_access -load_setup_ui() +init_with_pretty_error_ui() production_layout() diff --git a/web/utils/handlers.py b/web/utils/handlers.py index a88c6310..4e6d8229 100644 --- a/web/utils/handlers.py +++ b/web/utils/handlers.py @@ -24,7 +24,7 @@ from docq.access_control.main import SpaceAccessor, SpaceAccessType from docq.data_source.list import SpaceDataSources from docq.domain import DocumentListItem, SpaceKey -from docq.support.auth_utils import cache_session_state_configs, get_auth_configs, session_logout +from docq.support.auth_utils import get_cache_auth_session, reset_cache_and_cookie_auth_session, set_cache_auth_session from .constants import ( MAX_NUMBER_OF_PERSONAL_DOCS, @@ -41,6 +41,7 @@ get_chat_session, get_public_space_group_id, get_selected_org_id, + get_settings_session, get_username, reset_session_state, set_auth_session, @@ -60,7 +61,8 @@ def _set_session_state_configs( super_admin: bool = False, selected_org_admin: bool = False, space_group_id: Optional[int] = None, - public_session_id: Optional[str] = None ) -> None: + public_session_id: Optional[str] = None, +) -> None: """Set the session state for the configs. Args: @@ -86,17 +88,18 @@ def _set_session_state_configs( SessionKeyNameForAuth.PUBLIC_SESSION_ID.name: public_session_id, SessionKeyNameForAuth.PUBLIC_SPACE_GROUP_ID.name: space_group_id, SessionKeyNameForAuth.ANONYMOUS.name: anonymous, - } + }, + True, ) else: - cache_session_state_configs( - user_id=user_id, - selected_org_id=selected_org_id, - name=name, - username=username, - super_admin=super_admin, - selected_org_admin=selected_org_admin, - ) + # cache_session_state_configs( + # user_id=user_id, + # selected_org_id=selected_org_id, + # name=name, + # username=username, + # super_admin=super_admin, + # selected_org_admin=selected_org_admin, + # ) set_auth_session( { SessionKeyNameForAuth.ID.name: user_id, @@ -106,7 +109,8 @@ def _set_session_state_configs( SessionKeyNameForAuth.SELECTED_ORG_ID.name: selected_org_id, SessionKeyNameForAuth.SELECTED_ORG_ADMIN.name: selected_org_admin, SessionKeyNameForAuth.ANONYMOUS.name: anonymous, - } + }, + True, ) set_settings_session( { @@ -121,15 +125,17 @@ def _set_session_state_configs( def handle_login(username: str, password: str) -> bool: """Handle login.""" reset_session_state() + reset_cache_and_cookie_auth_session() result = manage_users.authenticate(username, password) - current_user_id = result[0] - member_orgs = manage_organisations.list_organisations( - user_id=current_user_id - ) # we can't use handle_list_orgs() here - default_org_id = member_orgs[0][0] - selected_org_admin = current_user_id in [x[0] for x in member_orgs[0][2]] - log.info("Login result: %s", result) + if result: + current_user_id = result[0] + member_orgs = manage_organisations.list_organisations( + user_id=current_user_id + ) # we can't use handle_list_orgs() here + default_org_id = member_orgs[0][0] + selected_org_admin = current_user_id in [x[0] for x in member_orgs[0][2]] + log.info("Login result: %s", result) _set_session_state_configs( user_id=current_user_id, selected_org_id=default_org_id, @@ -145,18 +151,11 @@ def handle_login(username: str, password: str) -> bool: return False -def handle_set_cached_session_configs() -> None: - """Set cached auth configs.""" - auth_configs = get_auth_configs() - if auth_configs and len(auth_configs) == 2: - _args, _kwargs = auth_configs - _set_session_state_configs(*_args, **_kwargs) - - def handle_logout() -> None: """Handle logout.""" reset_session_state() - session_logout() + reset_cache_and_cookie_auth_session() + log.info("Logout") def handle_create_user() -> int: @@ -354,10 +353,7 @@ def _get_chat_spaces(feature: domain.FeatureKey) -> tuple[Optional[SpaceKey], Li if feature.type_ == config.FeatureType.ASK_PUBLIC: personal_space = None - shared_spaces = [ - domain.SpaceKey(config.SpaceType.SHARED, s_[0], select_org_id) - for s_ in list_public_spaces() - ] + shared_spaces = [domain.SpaceKey(config.SpaceType.SHARED, s_[0], select_org_id) for s_ in list_public_spaces()] return personal_space, shared_spaces shared_spaces = None @@ -533,6 +529,7 @@ def get_enabled_features() -> list[domain.FeatureKey]: def handle_update_system_settings() -> None: current_org_id = get_selected_org_id() + manage_settings.update_organisation_settings( { config.SystemSettingsKey.ENABLED_FEATURES.name: [ @@ -541,6 +538,14 @@ def handle_update_system_settings() -> None: }, org_id=current_org_id, ) + set_settings_session( + { + config.SystemSettingsKey.ENABLED_FEATURES.name: [ + f.name for f in st.session_state[f"system_settings_{config.SystemSettingsKey.ENABLED_FEATURES.name}"] + ], + }, + SessionKeyNameForSettings.SYSTEM, + ) def get_max_number_of_documents(type_: config.SpaceType): @@ -636,7 +641,7 @@ def handle_public_session() -> None: space_group_id=space_group_id, public_session_id=session_id, ) - else: # if no query params are provided, set space_group_id and public_session_id to -1 to disable ASK_PUBLIC feature + else: # if no query params are provided, set space_group_id and public_session_id to -1 to disable ASK_PUBLIC feature _set_session_state_configs( user_id=None, selected_org_id=None, diff --git a/web/utils/layout.py b/web/utils/layout.py index 0e8c3dfb..a55a182c 100644 --- a/web/utils/layout.py +++ b/web/utils/layout.py @@ -8,7 +8,11 @@ from docq.access_control.main import SpaceAccessType from docq.config import FeatureType, LogType, SpaceType, SystemSettingsKey from docq.domain import DocumentListItem, FeatureKey, SpaceKey -from docq.manage_users import list_users_by_org +from docq.support.auth_utils import ( + get_cache_auth_session, + reset_cache_and_cookie_auth_session, + verify_cookie_hmac_session_id, +) from st_pages import hide_pages from streamlit.components.v1 import html from streamlit.delta_generator import DeltaGenerator @@ -16,6 +20,7 @@ from .constants import ALLOWED_DOC_EXTS, SessionKeyNameForAuth, SessionKeyNameForChat from .formatters import format_archived, format_datetime, format_filesize, format_timestamp from .handlers import ( + _set_session_state_configs, get_enabled_features, get_max_number_of_documents, get_shared_space, @@ -44,7 +49,6 @@ handle_org_selection_change, handle_public_session, handle_reindex_space, - handle_set_cached_session_configs, handle_update_org, handle_update_space_details, handle_update_space_group, @@ -70,7 +74,8 @@ get_public_space_group_id, get_selected_org_id, is_current_user_super_admin, - set_selected_org_id, + reset_session_state, + session_state_exists, ) _chat_ui_script = """ @@ -200,6 +205,7 @@ def __no_admin_menu() -> None: ] ) + def __embed_page_config() -> None: st.markdown( """ @@ -239,7 +245,7 @@ def __login_form() -> None: if handle_login(username, password): st.experimental_rerun() else: - st.error("Invalid username or password.") + st.error("The Username and Password you entered doesn't match what we have.") st.stop() else: st.stop() @@ -268,10 +274,33 @@ def public_access() -> None: def auth_required(show_login_form: bool = True, requiring_admin: bool = False, show_logout_button: bool = True) -> bool: """Decide layout based on current user's access.""" - handle_set_cached_session_configs() - auth = get_auth_session() + log.debug("auth_required() called") + auth = None __always_hidden_pages() + + session_state_existed = session_state_exists() + log.debug("auth_required(): session_state_existed: %s", session_state_existed) + if session_state_existed: + auth = get_auth_session() + elif verify_cookie_hmac_session_id() is not None: + # there's a valid auth session token. Let's get session state from cache. + auth = get_cache_auth_session() + log.debug("auth_required(): Got auth session state from cache: %s", auth) + if auth: + log.debug("auth_required(): Valid auth session found: %s", auth) + if not session_state_existed: + # the user probably refreshed the page resetting Streamlit session state because it's bound to a browser session connection. + _set_session_state_configs( + user_id=auth[SessionKeyNameForAuth.ID.name], + selected_org_id=auth[SessionKeyNameForAuth.SELECTED_ORG_ID.name], + name=auth[SessionKeyNameForAuth.NAME.name], + username=auth[SessionKeyNameForAuth.USERNAME.name], + anonymous=False, + super_admin=auth[SessionKeyNameForAuth.SUPER_ADMIN.name], + selected_org_admin=auth[SessionKeyNameForAuth.SELECTED_ORG_ADMIN.name], + ) + if show_logout_button: __logout_button() @@ -283,6 +312,9 @@ def auth_required(show_login_form: bool = True, requiring_admin: bool = False, s return True else: + log.debug("auth_required(): No valid auth session found. User needs to re-authenticate.") + reset_session_state() + reset_cache_and_cookie_auth_session() if show_login_form: __login_form() return False @@ -314,7 +346,7 @@ def public_space_enabled(feature: FeatureKey) -> None: feature_is_ready, spaces = (space_group_id != -1 or session_id != -1), None if feature_is_ready: spaces = list_public_spaces() - if not feature_is_ready or not spaces: # Stop the app if there are no public spaces. + if not feature_is_ready or not spaces: # Stop the app if there are no public spaces. st.error("This feature is not ready.") st.info("Please contact your administrator to configure this feature.") st.stop() @@ -881,12 +913,11 @@ def org_selection_ui() -> None: handle_org_selection_change(selected[0]) - -def load_setup_ui() -> None: +def init_with_pretty_error_ui() -> None: """UI to run setup and prevent showing errors to the user.""" try: setup.init() except Exception as e: - st.error("Docq encountered an error while initializing please refer to logs for more details.") - log.exception("Error while setting up the app: %s", e) + st.error("Something went wrong starting Docq.") + log.fatal("Error: setup.init() failed with %s", e) st.stop() diff --git a/web/utils/sessions.py b/web/utils/sessions.py index bca975d7..aee05b7d 100644 --- a/web/utils/sessions.py +++ b/web/utils/sessions.py @@ -1,9 +1,11 @@ """Session utilities.""" +import logging from typing import Any import streamlit as st from docq import config, manage_users +from docq.support.auth_utils import set_cache_auth_session from .constants import ( SESSION_KEY_NAME_DOCQ, @@ -25,9 +27,15 @@ def _init_session_state() -> None: st.session_state[SESSION_KEY_NAME_DOCQ][SessionKeySubName.CHAT.name][n.name] = {} +def session_state_exists() -> bool: + """Check if any session state exists.""" + return SESSION_KEY_NAME_DOCQ in st.session_state + + def reset_session_state() -> None: """Reset the session state. This must be called for user login and logout.""" st.session_state[SESSION_KEY_NAME_DOCQ] = {} + logging.debug("called reset_session_state()") def _get_session_value(name: SessionKeySubName, key_: str = None, subkey_: str = None) -> Any | None: @@ -72,6 +80,14 @@ def set_chat_session(val: Any | None, type_: config.FeatureType = None, key_: Se ) +def set_auth_session(val: dict = None, cache: bool = False) -> None: + """Set the auth session value.""" + _set_session_value(val, SessionKeySubName.AUTH) + if cache: + # this persists the auth session across browser session in Streamlit i.e. when the user hits refresh. + set_cache_auth_session(val) + + def get_auth_session() -> dict: """Get the auth session value.""" return _get_session_value(SessionKeySubName.AUTH) @@ -96,11 +112,6 @@ def set_if_current_user_is_selected_org_admin(selected_org_id: int) -> None: _set_session_value(is_org_admin, SessionKeySubName.AUTH, SessionKeyNameForAuth.SELECTED_ORG_ADMIN.name) -def set_auth_session(val: dict = None) -> None: - """Set the auth session value.""" - _set_session_value(val, SessionKeySubName.AUTH) - - def get_authenticated_user_id() -> int | None: """Get the authenticated user id.""" return _get_session_value(SessionKeySubName.AUTH, SessionKeyNameForAuth.ID.name) From f7675b945bbb1b5c866bbbd3f19e0867b739aaae Mon Sep 17 00:00:00 2001 From: Janaka Abeywardhana Date: Mon, 2 Oct 2023 00:52:36 +0100 Subject: [PATCH 11/13] fix: session caching bug refactor: cache var names --- source/docq/support/auth_utils.py | 62 ++++++++++++++++----------- tests/docq/support/auth_utils_test.py | 16 +++---- 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/source/docq/support/auth_utils.py b/source/docq/support/auth_utils.py index 90856860..18ccd57b 100644 --- a/source/docq/support/auth_utils.py +++ b/source/docq/support/auth_utils.py @@ -13,18 +13,19 @@ from streamlit.components.v1 import html from streamlit.web.server.websocket_headers import _get_websocket_headers -from ..config import SESSION_COOKIE_NAME, ENV_VAR_DOCQ_COOKIE_HMAC_SECRET_KEY +from ..config import ENV_VAR_DOCQ_COOKIE_HMAC_SECRET_KEY, SESSION_COOKIE_NAME EXPIRY_HOURS = 4 -CACHE_CONFIG = (1024 * 1, 60 * 60 * EXPIRY_HOURS) +TTL = 60 * 60 * EXPIRY_HOURS +CACHE_CONFIG = (1024 * 1, TTL) AUTH_KEY = Fernet.generate_key() AUTH_SESSION_SECRET_KEY: str = os.environ.get(ENV_VAR_DOCQ_COOKIE_HMAC_SECRET_KEY) -# Chase of session data keyed by session id -cached_sessions: TTLCache[str, bytes] = TTLCache(*CACHE_CONFIG) +# Cache of session data keyed by hmac hash (hmac of session id) +cached_session_data: TTLCache[str, bytes] = TTLCache(*CACHE_CONFIG) -# Cache of session id's keyed by hmac hash -session_data: TTLCache[str, str] = TTLCache(*CACHE_CONFIG) +# Cache of session id's keyed by hmac hash (hmac of session id) +cached_session_ids: TTLCache[str, str] = TTLCache(*CACHE_CONFIG) # TODO: the code that handles the cookie should move to the web side. session state tracking is in the backend but not a public API as it's just cross cutting. @@ -103,7 +104,7 @@ def generate_hmac_session_id(length: int = 32) -> str: """Generate a secure (HMAC) and unique session_id then track in session cache.""" id_ = token_hex(length // 2) hmac_ = _create_hmac(id_) - session_data[hmac_] = id_ + cached_session_ids[hmac_] = id_ log.debug("Generated new hmac session id: %s", hmac_) return hmac_ @@ -136,15 +137,18 @@ def verify_cookie_hmac_session_id() -> str | None: """ hmac_session_id = None hmac_session_id = _get_cookie_session_id() + if hmac_session_id is None: - log.debug("No session id in cookie found") - elif hmac_session_id not in cached_sessions: - log.debug( - "verify_cookie_hmac_session_id(): HMAC Session ID not found in cache. Session expired or was explicitly removed: %s" + log.debug("verify_cookie_hmac_session_id(): No session id (auth token) cookie found.") + elif hmac_session_id not in cached_session_ids: + log.warning( + "verify_cookie_hmac_session_id(): item with key=hmac_session_id `cached_session_ids`. The auth session either expired or explicitly removed." ) + log.debug("cached session ids : %s", cached_session_ids.keys()) + log.debug("cached session data: %s", cached_session_data.keys()) hmac_session_id = None - elif hmac_session_id not in session_data or not _verify_hmac(session_data[hmac_session_id], hmac_session_id): - log.warning("verify_cookie_hmac_session_id(): HMAC Session ID failed verification: %s") + elif not _verify_hmac(cached_session_ids[hmac_session_id], hmac_session_id): + log.warning("verify_cookie_hmac_session_id(): HMAC Session ID failed verification.") hmac_session_id = None return hmac_session_id @@ -172,11 +176,11 @@ def _decrypt(encrypted_payload: bytes) -> dict: return None -def _reset_expiry_cache_auth_session(session_id: str) -> None: +def _reset_expiry_cache_auth_session(hmac_session_id: str) -> None: """Update the auth expiry time.""" try: - cached_sessions[session_id] = cached_sessions[session_id] - session_data[session_id] = session_data[session_id] + cached_session_data[hmac_session_id] = cached_session_data[hmac_session_id] + cached_session_ids[hmac_session_id] = cached_session_ids[hmac_session_id] # _set_cookie_session_id(session_id) except Exception as e: log.error("Failed to update auth expiry: %s", e) @@ -190,10 +194,16 @@ def set_cache_auth_session(val: dict) -> None: """ try: hmac_session_id = _get_cookie_session_id() - if hmac_session_id is None: + log.debug("set_cache_auth_session() - hmac session id: %s", hmac_session_id) + + if hmac_session_id is None or hmac_session_id not in cached_session_ids: + log.debug( + "set_cache_auth_session() - Valid session id (auth token) not found. session_data: %s", + cached_session_ids.keys(), + ) hmac_session_id = generate_hmac_session_id() - _set_cookie_session_id(hmac_session_id) - cached_sessions[hmac_session_id] = _encrypt(val) + _set_cookie_session_id(hmac_session_id) + cached_session_data[hmac_session_id] = _encrypt(val) _reset_expiry_cache_auth_session(hmac_session_id) except Exception as e: log.error("Error caching auth session: %s", e) @@ -204,8 +214,8 @@ def get_cache_auth_session() -> dict | None: try: decrypted_auth_session_data = None hmac_session_id = _get_cookie_session_id() - if hmac_session_id in cached_sessions: - encrypted_auth_session_data = cached_sessions[hmac_session_id] + if hmac_session_id in cached_session_data: + encrypted_auth_session_data = cached_session_data[hmac_session_id] decrypted_auth_session_data = _decrypt(encrypted_auth_session_data) return decrypted_auth_session_data except Exception as e: @@ -217,10 +227,12 @@ def remove_cache_auth_session() -> None: """Remove the cached session state for the current session. The current session is identified by a session_id in a particular browsersession cookie.""" try: hmac_session_id = _get_cookie_session_id() - if hmac_session_id in cached_sessions: - del cached_sessions[hmac_session_id] - if hmac_session_id in session_data: - del session_data[hmac_session_id] + if hmac_session_id in cached_session_data: + del cached_session_data[hmac_session_id] + log.debug("Removed from cached_session: %s", hmac_session_id) + if hmac_session_id in cached_session_ids: + del cached_session_ids[hmac_session_id] + log.debug("Removed from session_data: %s", hmac_session_id) except Exception as e: log.error("Failed to remove auth session from cache: %s", e) diff --git a/tests/docq/support/auth_utils_test.py b/tests/docq/support/auth_utils_test.py index 78b6d91c..001bba7a 100644 --- a/tests/docq/support/auth_utils_test.py +++ b/tests/docq/support/auth_utils_test.py @@ -16,11 +16,11 @@ _set_cookie, _set_cookie_session_id, _verify_hmac, - cached_sessions, + cached_session_data, generate_hmac_session_id, get_cache_auth_session, reset_cache_and_cookie_auth_session, - session_data, + cached_session_ids, set_cache_auth_session, ) @@ -81,7 +81,7 @@ def test_set_session_id(self: Self, mock_set_cookie: Mock) -> None: def test_get_cookie_session_id(self: Self, mock_get_cookies: Mock) -> None: """Test get session id.""" session_id = generate_hmac_session_id() - cached_sessions[session_id] = _encrypt(("9999", "user", 1)) + cached_session_data[session_id] = _encrypt(("9999", "user", 1)) mock_get_cookies.return_value = {SESSION_COOKIE_NAME: session_id} result = _get_cookie_session_id() assert result == session_id @@ -100,7 +100,7 @@ def test_cache_auth(self: Self, mock_get_cookie_session_id: Mock) -> None: session_id = generate_hmac_session_id() mock_get_cookie_session_id.return_value = session_id set_cache_auth_session(payload) - assert session_id in cached_sessions + assert session_id in cached_session_data @patch("docq.support.auth_utils._get_cookie_session_id") def test_auth_result( @@ -120,9 +120,9 @@ def test_auth_result( def test_session_logout(self: Self, mock_get_cookie_session_id: Mock) -> None: """Test session logout.""" session_id = generate_hmac_session_id() - cached_sessions[session_id] = _encrypt(("9999", "user", 1)) - session_data[session_id] = session_id + cached_session_data[session_id] = _encrypt(("9999", "user", 1)) + cached_session_ids[session_id] = session_id mock_get_cookie_session_id.return_value = session_id reset_cache_and_cookie_auth_session() - assert session_id not in cached_sessions, "Cached session should be deleted on logout" - assert session_id not in session_data, "Session data should be deleted on logout" + assert session_id not in cached_session_data, "Cached session should be deleted on logout" + assert session_id not in cached_session_ids, "Session data should be deleted on logout" From 7dd92b766800f63d0f260334f6b01bc30eab6946 Mon Sep 17 00:00:00 2001 From: Janaka Abeywardhana Date: Mon, 2 Oct 2023 01:00:47 +0100 Subject: [PATCH 12/13] refactor: improve var names chore: adjust logging not to leak hmac session id into logs. --- source/docq/support/auth_utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/source/docq/support/auth_utils.py b/source/docq/support/auth_utils.py index 18ccd57b..10e5e2e3 100644 --- a/source/docq/support/auth_utils.py +++ b/source/docq/support/auth_utils.py @@ -15,9 +15,9 @@ from ..config import ENV_VAR_DOCQ_COOKIE_HMAC_SECRET_KEY, SESSION_COOKIE_NAME -EXPIRY_HOURS = 4 -TTL = 60 * 60 * EXPIRY_HOURS -CACHE_CONFIG = (1024 * 1, TTL) +TTL_HOURS = 4 +TTL_SEC = 60 * 60 * TTL_HOURS +CACHE_CONFIG = (1024 * 1, TTL_SEC) AUTH_KEY = Fernet.generate_key() AUTH_SESSION_SECRET_KEY: str = os.environ.get(ENV_VAR_DOCQ_COOKIE_HMAC_SECRET_KEY) @@ -44,7 +44,7 @@ def init_session_cache() -> None: def _set_cookie(cookie: str) -> None: """Set client cookie for authentication.""" try: - expiry = datetime.now() + timedelta(hours=EXPIRY_HOURS) + expiry = datetime.now() + timedelta(hours=TTL_HOURS) html( f"""