diff --git a/.gitignore b/.gitignore index e24be176..fa6f90f1 100644 --- a/.gitignore +++ b/.gitignore @@ -176,7 +176,7 @@ cython_debug/ # Exported from Poetry, used in docker build only requirements.txt # Used for file storage in local development only -.persisted/ +.persisted*/ # Used for running Streamlit by storing configs and secrets locally .streamlit/ # Used by GitHub Pages local build before uploading to GitHub diff --git a/misc/docker.env.template b/misc/docker.env.template index 30432e31..efddb56b 100644 --- a/misc/docker.env.template +++ b/misc/docker.env.template @@ -1,4 +1,5 @@ STREAMLIT_SERVER_ADDRESS=0.0.0.0 STREAMLIT_SERVER_PORT=8501 #default DOCQ_DATA=./.persisted/ -OPENAI_API_KEY # ideally set value on shell, don't insert a value here becuase it's a secret. +DOCQ_OPENAI_API_KEY # ideally set value on shell, don't insert a value here becuase it's a secret. +DOCQ_COOKIE_HMAC_SECRET_KEY=cookie_password \ No newline at end of file diff --git a/misc/secrets.toml.template b/misc/secrets.toml.template index 194aa400..9b6b7136 100644 --- a/misc/secrets.toml.template +++ b/misc/secrets.toml.template @@ -1,2 +1,3 @@ DOCQ_DATA = "./.persisted/" -OPENAI_API_KEY = "YOUR-OPENAI-API-KEY" \ No newline at end of file +DOCQ_OPENAI_API_KEY = "YOUR-OPENAI-API-KEY" +DOCQ_COOKIE_HMAC_SECRET_KEY = "32_char_secret_used_to_encrypt" \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 56ef0b9e..b0481f2c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,9 @@ -# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "aiohttp" version = "3.8.5" description = "Async http client/server framework (asyncio)" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -113,7 +112,6 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -128,7 +126,6 @@ frozenlist = ">=1.1.0" name = "altair" version = "4.2.2" description = "Altair: A declarative statistical visualization library for Python." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -151,7 +148,6 @@ dev = ["black", "docutils", "flake8", "ipython", "m2r", "mistune (<2.0.0)", "pyt name = "argon2-cffi" version = "21.3.0" description = "The secure Argon2 password hashing algorithm." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -171,7 +167,6 @@ tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pytest"] name = "argon2-cffi-bindings" version = "21.2.0" description = "Low-level CFFI bindings for Argon2" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -209,7 +204,6 @@ tests = ["pytest"] name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -221,7 +215,6 @@ files = [ name = "atomicwrites" version = "1.4.1" description = "Atomic file writes." -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -232,7 +225,6 @@ files = [ name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -251,7 +243,6 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "azure-core" version = "1.27.1" description = "Microsoft Azure Core Library for Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -271,7 +262,6 @@ aio = ["aiohttp (>=3.0)"] name = "beautifulsoup4" version = "4.12.2" description = "Screen-scraping library" -category = "main" optional = false python-versions = ">=3.6.0" files = [ @@ -290,7 +280,6 @@ lxml = ["lxml"] name = "black" version = "22.12.0" description = "The uncompromising code formatter." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -325,7 +314,6 @@ uvloop = ["uvloop (>=0.15.2)"] name = "blinker" version = "1.6.2" description = "Fast, simple object-to-object and broadcast signaling" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -337,7 +325,6 @@ files = [ name = "bracex" version = "2.3.post1" description = "Bash style brace expander." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -349,7 +336,6 @@ files = [ name = "cachetools" version = "5.3.1" description = "Extensible memoizing collections and decorators" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -361,7 +347,6 @@ files = [ name = "cairocffi" version = "1.6.0" description = "cffi-based cairo bindings for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -381,7 +366,6 @@ xcb = ["xcffib (>=1.4.0)"] name = "cairosvg" version = "2.7.0" description = "A Simple SVG Converter based on Cairo" -category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -404,7 +388,6 @@ test = ["flake8", "isort", "pytest"] name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -416,7 +399,6 @@ files = [ name = "cffi" version = "1.15.1" description = "Foreign Function Interface for Python calling C code." -category = "main" optional = false python-versions = "*" files = [ @@ -493,7 +475,6 @@ pycparser = "*" name = "cfgv" version = "3.3.1" description = "Validate configuration and produce human readable error messages." -category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -505,7 +486,6 @@ files = [ name = "charset-normalizer" version = "3.1.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -590,7 +570,6 @@ files = [ name = "click" version = "8.1.3" description = "Composable command line interface toolkit" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -605,7 +584,6 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -617,7 +595,6 @@ files = [ name = "coverage" version = "7.2.7" description = "Code coverage measurement for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -689,11 +666,55 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli"] +[[package]] +name = "cryptography" +version = "41.0.4" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839"}, + {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143"}, + {file = "cryptography-41.0.4-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397"}, + {file = "cryptography-41.0.4-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860"}, + {file = "cryptography-41.0.4-cp37-abi3-win32.whl", hash = "sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd"}, + {file = "cryptography-41.0.4-cp37-abi3-win_amd64.whl", hash = "sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311"}, + {file = "cryptography-41.0.4.tar.gz", hash = "sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a"}, +] + +[package.dependencies] +cffi = ">=1.12" + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] +nox = ["nox"] +pep8test = ["black", "check-sdist", "mypy", "ruff"] +sdist = ["build"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test-randomorder = ["pytest-randomly"] + [[package]] name = "cssselect2" version = "0.7.0" description = "CSS selectors for Python ElementTree" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -713,7 +734,6 @@ test = ["flake8", "isort", "pytest"] name = "dataclasses-json" version = "0.5.9" description = "Easily serialize dataclasses to and from JSON" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -733,7 +753,6 @@ dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest ( name = "decorator" version = "5.1.1" description = "Decorators for Humans" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -745,7 +764,6 @@ files = [ name = "defusedxml" version = "0.7.1" description = "XML bomb protection for Python stdlib modules" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -757,7 +775,6 @@ files = [ name = "distlib" version = "0.3.6" description = "Distribution utilities" -category = "dev" optional = false python-versions = "*" files = [ @@ -769,7 +786,6 @@ files = [ name = "docx2txt" version = "0.8" description = "A pure python-based utility to extract text and images from docx files." -category = "main" optional = false python-versions = "*" files = [ @@ -780,7 +796,6 @@ files = [ name = "entrypoints" version = "0.4" description = "Discover and load entry points from installed packages." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -792,7 +807,6 @@ files = [ name = "filelock" version = "3.12.0" description = "A platform independent file lock." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -808,7 +822,6 @@ testing = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "diff-cover (>=7.5)", "p name = "frozenlist" version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -879,7 +892,6 @@ files = [ name = "fsspec" version = "2023.9.0" description = "File-system specification" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -915,7 +927,6 @@ tqdm = ["tqdm"] name = "ghp-import" version = "2.1.0" description = "Copy your docs directly to the gh-pages branch." -category = "dev" optional = false python-versions = "*" files = [ @@ -933,7 +944,6 @@ dev = ["flake8", "markdown", "twine", "wheel"] name = "gitdb" version = "4.0.10" description = "Git Object Database" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -948,7 +958,6 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.35" description = "GitPython is a Python library used to interact with Git repositories" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -963,7 +972,6 @@ gitdb = ">=4.0.1,<5" name = "greenlet" version = "2.0.2" description = "Lightweight in-process concurrent programming" -category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" files = [ @@ -1037,7 +1045,6 @@ test = ["objgraph", "psutil"] name = "identify" version = "2.5.24" description = "File identification library for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1052,7 +1059,6 @@ license = ["ukkonen"] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1064,7 +1070,6 @@ files = [ name = "importlib-metadata" version = "6.6.0" description = "Read metadata from Python packages" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1084,7 +1089,6 @@ testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packag name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1096,7 +1100,6 @@ files = [ name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1114,7 +1117,6 @@ i18n = ["Babel (>=2.7)"] name = "joblib" version = "1.3.2" description = "Lightweight pipelining with Python functions" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1126,7 +1128,6 @@ files = [ name = "jsonschema" version = "4.17.3" description = "An implementation of JSON Schema validation for Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1146,7 +1147,6 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "langchain" version = "0.0.288" description = "Building applications with LLMs through composability" -category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ @@ -1185,7 +1185,6 @@ text-helpers = ["chardet (>=5.1.0,<6.0.0)"] name = "langsmith" version = "0.0.37" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." -category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ @@ -1201,7 +1200,6 @@ requests = ">=2,<3" name = "llama-index" version = "0.8.27" description = "Interface between LLMs and your data" -category = "main" optional = false python-versions = "*" files = [ @@ -1230,7 +1228,6 @@ urllib3 = "<2" name = "mako" version = "1.2.4" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1250,7 +1247,6 @@ testing = ["pytest"] name = "markdown" version = "3.3.7" description = "Python implementation of Markdown." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1265,7 +1261,6 @@ testing = ["coverage", "pyyaml"] name = "markdown-it-py" version = "2.2.0" description = "Python port of markdown-it. Markdown parsing, done right!" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1290,7 +1285,6 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "markupsafe" version = "2.1.2" description = "Safely add untrusted strings to HTML/XML markup." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1350,7 +1344,6 @@ files = [ name = "marshmallow" version = "3.20.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1371,7 +1364,6 @@ tests = ["pytest", "pytz", "simplejson"] name = "marshmallow-enum" version = "1.5.1" description = "Enum field for Marshmallow" -category = "main" optional = false python-versions = "*" files = [ @@ -1386,7 +1378,6 @@ marshmallow = ">=2.0.0" name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1398,7 +1389,6 @@ files = [ name = "mergedeep" version = "1.3.4" description = "A deep merge function for 🐍." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1410,7 +1400,6 @@ files = [ name = "mkapi" version = "1.0.14" description = "An Auto API Documentation tool." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1426,7 +1415,6 @@ markdown = "*" name = "mkdocs" version = "1.4.3" description = "Project documentation with Markdown." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1454,7 +1442,6 @@ min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-imp name = "mkdocs-awesome-pages-plugin" version = "2.9.1" description = "An MkDocs plugin that simplifies configuring page titles and their order" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1471,7 +1458,6 @@ wcmatch = ">=7" name = "mkdocs-gen-files" version = "0.4.0" description = "MkDocs plugin to programmatically generate documentation pages during the build" -category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1486,7 +1472,6 @@ mkdocs = ">=1.0.3,<2.0.0" name = "mkdocs-material" version = "8.5.11" description = "Documentation that simply works" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1507,7 +1492,6 @@ requests = ">=2.26" name = "mkdocs-material-extensions" version = "1.1.1" description = "Extension pack for Python Markdown and MkDocs Material." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1519,7 +1503,6 @@ files = [ name = "multidict" version = "6.0.4" description = "multidict implementation" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1603,7 +1586,6 @@ files = [ name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1615,7 +1597,6 @@ files = [ name = "natsort" version = "8.3.1" description = "Simple yet flexible natural sorting in Python." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1631,7 +1612,6 @@ icu = ["PyICU (>=1.0.0)"] name = "nest-asyncio" version = "1.5.7" description = "Patch asyncio to allow nested event loops" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1643,7 +1623,6 @@ files = [ name = "nltk" version = "3.8.1" description = "Natural Language Toolkit" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1669,7 +1648,6 @@ twitter = ["twython"] name = "nodeenv" version = "1.8.0" description = "Node.js virtual environment builder" -category = "dev" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" files = [ @@ -1684,7 +1662,6 @@ setuptools = "*" name = "numexpr" version = "2.8.6" description = "Fast numerical expression evaluator for NumPy" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1727,7 +1704,6 @@ numpy = ">=1.13.3" name = "numpy" version = "1.24.3" description = "Fundamental package for array computing in Python" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1765,7 +1741,6 @@ files = [ name = "openai" version = "0.28.0" description = "Python client library for the OpenAI API" -category = "main" optional = false python-versions = ">=3.7.1" files = [ @@ -1780,7 +1755,7 @@ tqdm = "*" [package.extras] datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] -dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "pytest-mock"] +dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"] embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] @@ -1788,7 +1763,6 @@ wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1 name = "opendal" version = "0.38.1" description = "OpenDAL Python Binding" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1807,7 +1781,6 @@ test = ["behave"] name = "packaging" version = "23.1" description = "Core utilities for Python packages" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1819,7 +1792,6 @@ files = [ name = "pandas" version = "2.0.2" description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1886,7 +1858,6 @@ xml = ["lxml (>=4.6.3)"] name = "parse" version = "1.19.0" description = "parse() is the opposite of format()" -category = "dev" optional = false python-versions = "*" files = [ @@ -1897,7 +1868,6 @@ files = [ name = "parse-type" version = "0.6.0" description = "Simplifies to build parse types based on the parse module" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*" files = [ @@ -1917,7 +1887,6 @@ docs = ["sphinx (>=1.2)"] name = "pastel" version = "0.2.1" description = "Bring colors to your terminal." -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1929,7 +1898,6 @@ files = [ name = "pathspec" version = "0.11.1" description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1941,7 +1909,6 @@ files = [ name = "pillow" version = "9.5.0" description = "Python Imaging Library (Fork)" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2021,7 +1988,6 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa name = "platformdirs" version = "3.5.1" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2037,7 +2003,6 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest- name = "pluggy" version = "1.0.0" description = "plugin and hook calling mechanisms for python" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2053,7 +2018,6 @@ testing = ["pytest", "pytest-benchmark"] name = "poethepoet" version = "0.16.5" description = "A task runner that works well with poetry." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2072,7 +2036,6 @@ poetry-plugin = ["poetry (>=1.0,<2.0)"] name = "pre-commit" version = "2.21.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2091,7 +2054,6 @@ virtualenv = ">=20.10.0" name = "protobuf" version = "3.20.3" description = "Protocol Buffers" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2123,7 +2085,6 @@ files = [ name = "py" version = "1.11.0" description = "library with cross-python path, ini-parsing, io, code, log facilities" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -2135,7 +2096,6 @@ files = [ name = "pyarrow" version = "12.0.0" description = "Python library for Apache Arrow" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2173,7 +2133,6 @@ numpy = ">=1.16.6" name = "pycparser" version = "2.21" description = "C parser in Python" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2185,7 +2144,6 @@ files = [ name = "pydantic" version = "1.10.12" description = "Data validation and settings management using python type hints" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2238,7 +2196,6 @@ email = ["email-validator (>=1.0.3)"] name = "pydeck" version = "0.8.0" description = "Widget for deck.gl maps" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2258,7 +2215,6 @@ jupyter = ["ipykernel (>=5.1.2)", "ipython (>=5.8.0)", "ipywidgets (>=7,<8)", "t name = "pygments" version = "2.15.1" description = "Pygments is a syntax highlighting package written in Python." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2273,7 +2229,6 @@ plugins = ["importlib-metadata"] name = "pymdown-extensions" version = "10.0.1" description = "Extension pack for Python Markdown." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2289,7 +2244,6 @@ pyyaml = "*" name = "pympler" version = "1.0.1" description = "A development tool to measure, monitor and analyze the memory behavior of Python objects." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2301,7 +2255,6 @@ files = [ name = "pypdf" version = "3.9.0" description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2320,7 +2273,6 @@ image = ["Pillow"] name = "pyrsistent" version = "0.19.3" description = "Persistent/Functional/Immutable data structures" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2357,7 +2309,6 @@ files = [ name = "pytest" version = "7.1.1" description = "pytest: simple powerful testing with Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2382,7 +2333,6 @@ testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2. name = "pytest-bdd" version = "6.1.1" description = "BDD for pytest" -category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -2401,7 +2351,6 @@ typing-extensions = "*" name = "pytest-cov" version = "3.0.0" description = "Pytest plugin for measuring coverage." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2420,7 +2369,6 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtuale name = "pytest-html" version = "3.2.0" description = "pytest plugin for generating HTML reports" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2437,7 +2385,6 @@ pytest-metadata = "*" name = "pytest-metadata" version = "3.0.0" description = "pytest plugin for test session metadata" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2455,7 +2402,6 @@ test = ["black (>=22.1.0)", "flake8 (>=4.0.1)", "pre-commit (>=2.17.0)", "tox (> name = "pytest-reverse" version = "1.5.0" description = "Pytest plugin to reverse test order." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2470,7 +2416,6 @@ pytest = "*" name = "pytest-sugar" version = "0.9.7" description = "pytest-sugar is a plugin for pytest that changes the default look and feel of pytest (e.g. progressbar, show tests that fail instantly)." -category = "dev" optional = false python-versions = "*" files = [ @@ -2490,7 +2435,6 @@ dev = ["black", "flake8", "pre-commit"] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -2505,7 +2449,6 @@ six = ">=1.5" name = "pytz" version = "2023.3" description = "World timezone definitions, modern and historical" -category = "main" optional = false python-versions = "*" files = [ @@ -2517,7 +2460,6 @@ files = [ name = "pytz-deprecation-shim" version = "0.1.0.post0" description = "Shims to make deprecation of pytz easier" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -2532,7 +2474,6 @@ tzdata = {version = "*", markers = "python_version >= \"3.6\""} name = "pyyaml" version = "6.0" description = "YAML parser and emitter for Python" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2582,7 +2523,6 @@ files = [ name = "pyyaml-env-tag" version = "0.1" description = "A custom YAML tag for referencing environment variables in YAML files. " -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2597,7 +2537,6 @@ pyyaml = "*" name = "regex" version = "2023.8.8" description = "Alternative regular expression module, to replace re." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2695,7 +2634,6 @@ files = [ name = "requests" version = "2.31.0" description = "Python HTTP for Humans." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2717,7 +2655,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "rich" version = "13.4.1" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -2736,7 +2673,6 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] name = "ruff" version = "0.0.253" description = "An extremely fast Python linter, written in Rust." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2763,7 +2699,6 @@ files = [ name = "setuptools" version = "67.8.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2780,7 +2715,6 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -2792,7 +2726,6 @@ files = [ name = "smmap" version = "5.0.0" description = "A pure Python implementation of a sliding window memory map manager" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2804,7 +2737,6 @@ files = [ name = "soupsieve" version = "2.5" description = "A modern CSS selector implementation for Beautiful Soup." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2816,7 +2748,6 @@ files = [ name = "sqlalchemy" version = "2.0.20" description = "Database Abstraction Library" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2864,7 +2795,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\""} typing-extensions = ">=4.2.0" [package.extras] @@ -2895,7 +2826,6 @@ sqlcipher = ["sqlcipher3-binary"] name = "st-pages" version = "0.4.1" description = "An experimental version of Streamlit Multi-Page Apps" -category = "main" optional = false python-versions = ">=3.8, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*" files = [ @@ -2911,7 +2841,6 @@ streamlit = ">=1.10.0" name = "streamlit" version = "1.24.0" description = "A faster way to build and share data apps" -category = "main" optional = false python-versions = ">=3.8, !=3.9.7" files = [ @@ -2952,7 +2881,6 @@ snowflake = ["snowflake-snowpark-python"] name = "tenacity" version = "8.2.2" description = "Retry code until it succeeds" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2967,7 +2895,6 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] name = "termcolor" version = "2.3.0" description = "ANSI color formatting for output in terminal" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2982,7 +2909,6 @@ tests = ["pytest", "pytest-cov"] name = "tiktoken" version = "0.5.1" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3028,7 +2954,6 @@ blobfile = ["blobfile (>=2)"] name = "tinycss2" version = "1.2.1" description = "A tiny CSS parser" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3047,7 +2972,6 @@ test = ["flake8", "isort", "pytest"] name = "toml" version = "0.10.2" description = "Python Library for Tom's Obvious, Minimal Language" -category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3059,7 +2983,6 @@ files = [ name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3071,7 +2994,6 @@ files = [ name = "toolz" version = "0.12.0" description = "List processing tools and functional utilities" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -3083,7 +3005,6 @@ files = [ name = "tornado" version = "6.3.3" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -category = "main" optional = false python-versions = ">= 3.8" files = [ @@ -3104,7 +3025,6 @@ files = [ name = "tqdm" version = "4.66.1" description = "Fast, Extensible Progress Meter" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3125,7 +3045,6 @@ telegram = ["requests"] name = "typing-extensions" version = "4.5.0" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3137,7 +3056,6 @@ files = [ name = "typing-inspect" version = "0.9.0" description = "Runtime inspection utilities for typing module." -category = "main" optional = false python-versions = "*" files = [ @@ -3153,7 +3071,6 @@ typing-extensions = ">=3.7.4" name = "tzdata" version = "2023.3" description = "Provider of IANA time zone data" -category = "main" optional = false python-versions = ">=2" files = [ @@ -3165,7 +3082,6 @@ files = [ name = "tzlocal" version = "4.3.1" description = "tzinfo object for the local timezone" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3184,7 +3100,6 @@ devenv = ["black", "check-manifest", "flake8", "pyroma", "pytest (>=4.3)", "pyte name = "urllib3" version = "1.26.16" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -3201,7 +3116,6 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] name = "validators" version = "0.20.0" description = "Python Data Validation for Humans™." -category = "main" optional = false python-versions = ">=3.4" files = [ @@ -3218,7 +3132,6 @@ test = ["flake8 (>=2.4.0)", "isort (>=4.2.2)", "pytest (>=2.2.3)"] name = "virtualenv" version = "20.23.0" description = "Virtual Python Environment builder" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3239,7 +3152,6 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "coverage-enable-subprocess name = "watchdog" version = "3.0.0" description = "Filesystem events monitoring" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3279,7 +3191,6 @@ watchmedo = ["PyYAML (>=3.10)"] name = "wcmatch" version = "8.4.1" description = "Wildcard/glob file name matcher." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3294,7 +3205,6 @@ bracex = ">=2.1.1" name = "webencodings" version = "0.5.1" description = "Character encoding aliases for legacy web content" -category = "dev" optional = false python-versions = "*" files = [ @@ -3306,7 +3216,6 @@ files = [ name = "xdoctest" version = "1.1.1" description = "A rewrite of the builtin doctest module" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3334,7 +3243,6 @@ tests-strict = ["codecov (==2.0.15)", "pytest (==4.6.0)", "pytest (==4.6.0)", "p name = "yarl" version = "1.9.2" description = "Yet another URL library" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3422,7 +3330,6 @@ multidict = ">=4.0" name = "zipp" version = "3.15.0" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3437,4 +3344,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "e25a66ab0535aa105f6f617627aeb1f7056d08d8792ed318d631b25f3cb883f9" +content-hash = "5d9ac6c0559c579362251b09f48ec9458cb0fb697562679477ed01959c7ba646" diff --git a/pyproject.toml b/pyproject.toml index f3d52a23..034a144c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ argon2-cffi = "^21.3.0" azure-core = "^1.27.1" opendal = "^0.38.1" llama-index = "0.8.27" +cryptography = "^41.0.3" [tool.poetry.group.dev.dependencies] pre-commit = "^2.18.1" diff --git a/source/docq/config.py b/source/docq/config.py index 9bbeba18..1568a48e 100644 --- a/source/docq/config.py +++ b/source/docq/config.py @@ -4,7 +4,9 @@ ENV_VAR_DOCQ_DATA = "DOCQ_DATA" ENV_VAR_DOCQ_DEMO = "DOCQ_DEMO" -ENV_VAR_OPENAI_API_KEY = "OPENAI_API_KEY" +ENV_VAR_OPENAI_API_KEY = "DOCQ_OPENAI_API_KEY" +ENV_VAR_DOCQ_COOKIE_HMAC_SECRET_KEY = "DOCQ_COOKIE_HMAC_SECRET_KEY" +SESSION_COOKIE_NAME = "docqai/_docq" class SpaceType(Enum): diff --git a/source/docq/manage_settings.py b/source/docq/manage_settings.py index 7c365dda..0eaf5ade 100644 --- a/source/docq/manage_settings.py +++ b/source/docq/manage_settings.py @@ -59,11 +59,11 @@ def _get_settings(org_id: int, user_id: int = None) -> dict: def _update_settings(settings: dict, org_id: int, user_id: int = None) -> bool: - log.debug("Updating settings for user %d", user_id) with closing( sqlite3.connect(_get_sqlite_file(user_id), detect_types=sqlite3.PARSE_DECLTYPES) ) as connection, closing(connection.cursor()) as cursor: user_id = user_id or USER_ID_AS_SYSTEM + log.debug("Updating settings for user %d", user_id) cursor.executemany( "INSERT OR REPLACE INTO settings (user_id, org_id, key, val) VALUES (?, ?, ?, ?)", [(user_id, org_id, key, json.dumps(val)) for key, val in settings.items()], diff --git a/source/docq/setup.py b/source/docq/setup.py index bb6fae93..dd73378a 100644 --- a/source/docq/setup.py +++ b/source/docq/setup.py @@ -9,7 +9,7 @@ manage_user_groups, manage_users, ) -from .support import store +from .support import auth_utils, store def _config_logging() -> None: @@ -29,4 +29,5 @@ def init() -> None: store._init() manage_organisations._init_default_org_if_necessary() manage_users._init_admin_if_necessary() + auth_utils.init_session_cache() logging.info("Docq initialized") diff --git a/source/docq/support/auth_utils.py b/source/docq/support/auth_utils.py new file mode 100644 index 00000000..e7f94ae2 --- /dev/null +++ b/source/docq/support/auth_utils.py @@ -0,0 +1,246 @@ +"""Cache user sessions.""" +import hashlib +import hmac +import json +import logging as log +import os +from datetime import datetime, timedelta +from secrets import token_hex +from typing import Dict, Optional + +from cachetools import TTLCache +from cryptography.fernet import Fernet +from streamlit.components.v1 import html +from streamlit.web.server.websocket_headers import _get_websocket_headers + +from ..config import ENV_VAR_DOCQ_COOKIE_HMAC_SECRET_KEY, SESSION_COOKIE_NAME + +TTL_HOURS = 1 +TTL_SEC = 60 * 60 * TTL_HOURS +CACHE_CONFIG = (1024 * 1, TTL_SEC) +AUTH_KEY = Fernet.generate_key() +AUTH_SESSION_SECRET_KEY: str = os.environ.get(ENV_VAR_DOCQ_COOKIE_HMAC_SECRET_KEY) + +# Cache of session data keyed by hmac hash (hmac of session id) +cached_session_data: TTLCache[str, bytes] = TTLCache(*CACHE_CONFIG) + +# Cache of session id's keyed by hmac hash (hmac of session id) +cached_session_ids: TTLCache[str, str] = TTLCache(*CACHE_CONFIG) + + +# TODO: the code that handles the cookie should move to the web side. session state tracking is in the backend but not a public API as it's just cross cutting. + + +def init_session_cache() -> None: + """Initialize session cache.""" + if AUTH_SESSION_SECRET_KEY is None: + log.fatal("Failed to initialize session cache: DOCQ_COOKIE_HMAC_SECRET_KEY not set") + raise ValueError("DOCQ_COOKIE_HMAC_SECRET_KEY must be set") + if len(AUTH_SESSION_SECRET_KEY) < 32: + log.fatal("Failed to initialize session cache: DOCQ_COOKIE_HMAC_SECRET_KEY must be 32 or more characters") + raise ValueError("DOCQ_COOKIE_HMAC_SECRET_KEY must be 32 or more characters") + + +def _set_cookie(cookie: str) -> None: + """Set client cookie for authentication.""" + try: + expiry = datetime.now() + timedelta(hours=TTL_HOURS) + html( + f""" + + """, + width=0, + height=0, + ) + except Exception as e: + log.error("Failed to set cookie: %s", e) + + +def _clear_cookie(cookie_name: str) -> None: + """Clear client cookie.""" + html( + f""" + + """, + width=0, + height=0, + ) + log.debug("Clear client cookie: %s", cookie_name) + + +def _get_cookies() -> Optional[Dict[str, str]]: + """Return client cookies.""" + try: + headers = _get_websocket_headers() + if headers is None: + return None + cookie_str = str(headers.get("Cookie")) + cookies: Dict[str, str] = {} + for cookie in cookie_str.split(";"): + key, val = cookie.split("=") + cookies[key.strip()] = val.strip() + return cookies + except Exception as e: + log.error("Failed to get cookies: %s", e) + return None + + +def _create_hmac(msg: str) -> str: + """Create a HMAC hash.""" + return hmac.new(AUTH_SESSION_SECRET_KEY.encode(), msg.encode(), hashlib.sha256).hexdigest() + + +def _verify_hmac(msg: str, digest: str) -> bool: + """Verify credibility of HMAC hash.""" + return hmac.compare_digest(_create_hmac(msg), digest) + + +def generate_hmac_session_id(length: int = 32) -> str: + """Generate a secure (HMAC) and unique session_id then track in session cache.""" + id_ = token_hex(length // 2) + hmac_ = _create_hmac(id_) + cached_session_ids[hmac_] = id_ + log.debug("Generated new hmac session id: %s", hmac_) + return hmac_ + + +def _set_cookie_session_id(hmac_session_id: str) -> None: + """Set the encrypted session_id in the cookie.""" + _set_cookie(hmac_session_id) + log.debug("_set_cookie_session_id() - hmac session id: %s", hmac_session_id) + + +def _get_cookie_session_id() -> str | None: + """Return the Docq encrypted HMAC session_id from the cookie.""" + try: + hmac_session_id = None + cookies = _get_cookies() + if cookies is not None: + hmac_session_id = cookies.get(SESSION_COOKIE_NAME) + return hmac_session_id + except Exception as e: + log.error("Failed to get session id: %s", e) + return None + + +def verify_cookie_hmac_session_id() -> str | None: + """Verify the encrypted session_id from the cookie. + + Return: + str: The hmac_session_id if verified. + None: If not verified. + """ + hmac_session_id = None + hmac_session_id = _get_cookie_session_id() + + if hmac_session_id is None: + log.debug("verify_cookie_hmac_session_id(): No session id (auth token) cookie found.") + elif hmac_session_id not in cached_session_ids: + log.warning( + "verify_cookie_hmac_session_id(): item with key=hmac_session_id `cached_session_ids`. The auth session either expired or explicitly removed." + ) + log.debug("cached session ids : %s", len(cached_session_ids.keys())) + log.debug("cached session data: %s", len(cached_session_data.keys())) + hmac_session_id = None + elif not _verify_hmac(cached_session_ids[hmac_session_id], hmac_session_id): + log.warning("verify_cookie_hmac_session_id(): HMAC Session ID failed verification.") + hmac_session_id = None + return hmac_session_id + + +def _encrypt(payload: dict) -> bytes: + """Encrypt some data.""" + try: + data = json.dumps(payload).encode() + cipher = Fernet(AUTH_KEY) + return cipher.encrypt(data) + except Exception as e: + log.error("Failed to encrypt auth data: %s", e) + return None + + +def _decrypt(encrypted_payload: bytes) -> dict: + """Decrypt some data.""" + try: + cipher = Fernet(AUTH_KEY) + data = cipher.decrypt(encrypted_payload) + result = json.loads(data) + return result + except Exception as e: + log.error("Failed to decrypt auth data: %s", e) + return None + + +def _reset_expiry_cache_auth_session(hmac_session_id: str) -> None: + """Update the auth expiry time.""" + try: + cached_session_data[hmac_session_id] = cached_session_data[hmac_session_id] + cached_session_ids[hmac_session_id] = cached_session_ids[hmac_session_id] + # _set_cookie_session_id(session_id) + except Exception as e: + log.error("Failed to update auth expiry: %s", e) + + +def set_cache_auth_session(val: dict) -> None: + """Caches the session state configs for auth, persisting across connections. + + Args: + val (dict): The session state for auth. + """ + try: + hmac_session_id = _get_cookie_session_id() + log.debug("set_cache_auth_session() - hmac session id: %s", hmac_session_id) + + if hmac_session_id is None or hmac_session_id not in cached_session_ids: + log.debug( + "set_cache_auth_session() - Valid session id (auth token) not found. session_data: %s", + len(cached_session_ids.keys()), + ) + hmac_session_id = generate_hmac_session_id() + _set_cookie_session_id(hmac_session_id) + cached_session_data[hmac_session_id] = _encrypt(val) + _reset_expiry_cache_auth_session(hmac_session_id) + except Exception as e: + log.error("Error caching auth session: %s", e) + + +def get_cache_auth_session() -> dict | None: + """Verify the session auth token and get the cached session state for the current session. The current session is identified by a session_id wrapped in a auth token in a browser session cookie.""" + try: + decrypted_auth_session_data = None + hmac_session_id = _get_cookie_session_id() + if hmac_session_id in cached_session_data: + encrypted_auth_session_data = cached_session_data[hmac_session_id] + decrypted_auth_session_data = _decrypt(encrypted_auth_session_data) + return decrypted_auth_session_data + except Exception as e: + log.error("Failed to get auth session from cache: %s", e) + return None + + +def remove_cache_auth_session() -> None: + """Remove the cached session state for the current session. The current session is identified by a session_id in a particular browsersession cookie.""" + try: + hmac_session_id = _get_cookie_session_id() + if hmac_session_id in cached_session_data: + del cached_session_data[hmac_session_id] + log.debug("Removed from cached_session: %s", hmac_session_id) + if hmac_session_id in cached_session_ids: + del cached_session_ids[hmac_session_id] + log.debug("Removed from session_data: %s", hmac_session_id) + except Exception as e: + log.error("Failed to remove auth session from cache: %s", e) + + +def reset_cache_and_cookie_auth_session() -> None: + """Clear all the data used to remember user session (auth session cache and session cookie). This must be called at login and cookie.""" + try: + remove_cache_auth_session() + _clear_cookie(SESSION_COOKIE_NAME) + except Exception as e: + log.error("Failed to clear session data caches (hmac, session data, and session cookie ): %s", e) diff --git a/tests/docq/support/auth_utils_test.py b/tests/docq/support/auth_utils_test.py new file mode 100644 index 00000000..001bba7a --- /dev/null +++ b/tests/docq/support/auth_utils_test.py @@ -0,0 +1,128 @@ +"""Test auth utils.""" +import unittest +from secrets import token_hex +from typing import Self +from unittest.mock import Mock, patch + +from docq.support import auth_utils +from docq.support.auth_utils import ( + SESSION_COOKIE_NAME, + _clear_cookie, + _create_hmac, + _decrypt, + _encrypt, + _get_cookie_session_id, + _get_cookies, + _set_cookie, + _set_cookie_session_id, + _verify_hmac, + cached_session_data, + generate_hmac_session_id, + get_cache_auth_session, + reset_cache_and_cookie_auth_session, + cached_session_ids, + set_cache_auth_session, +) + + +class TestAuthUtils(unittest.TestCase): + """Test auth utils.""" + + def setUp(self: Self) -> None: + """Setup module.""" + auth_utils.AUTH_SESSION_SECRET_KEY = token_hex(32) + + @patch("docq.support.auth_utils.html") + def test_set_cookie(self: Self, mock_html: Mock) -> None: + """Test set cookie.""" + _set_cookie("cookie") + mock_html.assert_called_once() + + @patch("docq.support.auth_utils.html") + def test_clear_cookie(self: Self, mock_html: Mock) -> None: + """Test clear cookie.""" + _clear_cookie(SESSION_COOKIE_NAME) + mock_html.assert_called_once() + + @patch("docq.support.auth_utils._get_websocket_headers") + def test_get_cookies(self: Self, mock_headers: Mock) -> None: + """Test get cookies.""" + mock_headers.return_value = {"Cookie": "key=value"} + result = _get_cookies() + assert result == {"key": "value"} + + def test_create_hmac(self: Self) -> None: + """Test create hmac.""" + msg = "test" + digest = _create_hmac(msg) + assert isinstance(digest, str) + + def test_verify_hmac(self: Self) -> None: + """Test verify hmac.""" + msg = "test" + digest = _create_hmac(msg) + result = _verify_hmac(msg, digest) + assert result + + def test_generate_session_id(self: Self) -> None: + """Test generate session id.""" + id_ = generate_hmac_session_id() + assert isinstance(id_, str) + assert len(id_) == 64 + + @patch("docq.support.auth_utils._set_cookie") + def test_set_session_id(self: Self, mock_set_cookie: Mock) -> None: + """Test set session id.""" + session_id = "test" + _set_cookie_session_id(session_id) + mock_set_cookie.assert_called_once_with(session_id) + + @patch("docq.support.auth_utils._get_cookies") + def test_get_cookie_session_id(self: Self, mock_get_cookies: Mock) -> None: + """Test get session id.""" + session_id = generate_hmac_session_id() + cached_session_data[session_id] = _encrypt(("9999", "user", 1)) + mock_get_cookies.return_value = {SESSION_COOKIE_NAME: session_id} + result = _get_cookie_session_id() + assert result == session_id + + def test_encrypt_decrypt_auth(self: Self) -> None: + """Test encrypt decrypt auth.""" + payload = {"org_id": "9999", "username": "user name", "user_id": 1} + encrypted_auth = _encrypt(payload) + decrypted_auth = _decrypt(encrypted_auth) + assert payload == decrypted_auth + + @patch("docq.support.auth_utils._get_cookie_session_id") + def test_cache_auth(self: Self, mock_get_cookie_session_id: Mock) -> None: + """Test cache auth.""" + payload = {"org_id": "9999", "username": "user name", "user_id": 1} + session_id = generate_hmac_session_id() + mock_get_cookie_session_id.return_value = session_id + set_cache_auth_session(payload) + assert session_id in cached_session_data + + @patch("docq.support.auth_utils._get_cookie_session_id") + def test_auth_result( + self: Self, + mock_get_cookie_session_id: Mock, + ) -> None: + """Test auth result.""" + payload = {"org_id": "9999", "username": "user name", "user_id": 1} + session_id = generate_hmac_session_id() + mock_get_cookie_session_id.return_value = session_id + # mock_auto_login_enabled.return_value = True + set_cache_auth_session(payload) + result = get_cache_auth_session() + assert result == {"org_id": "9999", "username": "user name", "user_id": 1} + + @patch("docq.support.auth_utils._get_cookie_session_id") + def test_session_logout(self: Self, mock_get_cookie_session_id: Mock) -> None: + """Test session logout.""" + session_id = generate_hmac_session_id() + cached_session_data[session_id] = _encrypt(("9999", "user", 1)) + cached_session_ids[session_id] = session_id + mock_get_cookie_session_id.return_value = session_id + reset_cache_and_cookie_auth_session() + assert session_id not in cached_session_data, "Cached session should be deleted on logout" + assert session_id not in cached_session_ids, "Session data should be deleted on logout" diff --git a/web/index.py b/web/index.py index 2f12c244..710fd1a1 100644 --- a/web/index.py +++ b/web/index.py @@ -1,11 +1,10 @@ """Page: Home (no auth required).""" import streamlit as st -from docq import setup from st_pages import Page, Section, add_page_title, show_pages -from utils.layout import org_selection_ui, production_layout, public_access +from utils.layout import init_with_pretty_error_ui, org_selection_ui, production_layout, public_access -setup.init() +init_with_pretty_error_ui() production_layout() diff --git a/web/utils/handlers.py b/web/utils/handlers.py index de6a1ee0..4e6d8229 100644 --- a/web/utils/handlers.py +++ b/web/utils/handlers.py @@ -24,6 +24,7 @@ from docq.access_control.main import SpaceAccessor, SpaceAccessType from docq.data_source.list import SpaceDataSources from docq.domain import DocumentListItem, SpaceKey +from docq.support.auth_utils import get_cache_auth_session, reset_cache_and_cookie_auth_session, set_cache_auth_session from .constants import ( MAX_NUMBER_OF_PERSONAL_DOCS, @@ -40,6 +41,7 @@ get_chat_session, get_public_space_group_id, get_selected_org_id, + get_settings_session, get_username, reset_session_state, set_auth_session, @@ -59,7 +61,8 @@ def _set_session_state_configs( super_admin: bool = False, selected_org_admin: bool = False, space_group_id: Optional[int] = None, - public_session_id: Optional[str] = None ) -> None: + public_session_id: Optional[str] = None, +) -> None: """Set the session state for the configs. Args: @@ -85,9 +88,18 @@ def _set_session_state_configs( SessionKeyNameForAuth.PUBLIC_SESSION_ID.name: public_session_id, SessionKeyNameForAuth.PUBLIC_SPACE_GROUP_ID.name: space_group_id, SessionKeyNameForAuth.ANONYMOUS.name: anonymous, - } + }, + True, ) else: + # cache_session_state_configs( + # user_id=user_id, + # selected_org_id=selected_org_id, + # name=name, + # username=username, + # super_admin=super_admin, + # selected_org_admin=selected_org_admin, + # ) set_auth_session( { SessionKeyNameForAuth.ID.name: user_id, @@ -97,7 +109,8 @@ def _set_session_state_configs( SessionKeyNameForAuth.SELECTED_ORG_ID.name: selected_org_id, SessionKeyNameForAuth.SELECTED_ORG_ADMIN.name: selected_org_admin, SessionKeyNameForAuth.ANONYMOUS.name: anonymous, - } + }, + True, ) set_settings_session( { @@ -112,15 +125,17 @@ def _set_session_state_configs( def handle_login(username: str, password: str) -> bool: """Handle login.""" reset_session_state() + reset_cache_and_cookie_auth_session() result = manage_users.authenticate(username, password) - current_user_id = result[0] - member_orgs = manage_organisations.list_organisations( - user_id=current_user_id - ) # we can't use handle_list_orgs() here - default_org_id = member_orgs[0][0] - selected_org_admin = current_user_id in [x[0] for x in member_orgs[0][2]] - log.info("Login result: %s", result) + if result: + current_user_id = result[0] + member_orgs = manage_organisations.list_organisations( + user_id=current_user_id + ) # we can't use handle_list_orgs() here + default_org_id = member_orgs[0][0] + selected_org_admin = current_user_id in [x[0] for x in member_orgs[0][2]] + log.info("Login result: %s", result) _set_session_state_configs( user_id=current_user_id, selected_org_id=default_org_id, @@ -137,7 +152,10 @@ def handle_login(username: str, password: str) -> bool: def handle_logout() -> None: + """Handle logout.""" reset_session_state() + reset_cache_and_cookie_auth_session() + log.info("Logout") def handle_create_user() -> int: @@ -335,10 +353,7 @@ def _get_chat_spaces(feature: domain.FeatureKey) -> tuple[Optional[SpaceKey], Li if feature.type_ == config.FeatureType.ASK_PUBLIC: personal_space = None - shared_spaces = [ - domain.SpaceKey(config.SpaceType.SHARED, s_[0], select_org_id) - for s_ in list_public_spaces() - ] + shared_spaces = [domain.SpaceKey(config.SpaceType.SHARED, s_[0], select_org_id) for s_ in list_public_spaces()] return personal_space, shared_spaces shared_spaces = None @@ -514,6 +529,7 @@ def get_enabled_features() -> list[domain.FeatureKey]: def handle_update_system_settings() -> None: current_org_id = get_selected_org_id() + manage_settings.update_organisation_settings( { config.SystemSettingsKey.ENABLED_FEATURES.name: [ @@ -522,6 +538,14 @@ def handle_update_system_settings() -> None: }, org_id=current_org_id, ) + set_settings_session( + { + config.SystemSettingsKey.ENABLED_FEATURES.name: [ + f.name for f in st.session_state[f"system_settings_{config.SystemSettingsKey.ENABLED_FEATURES.name}"] + ], + }, + SessionKeyNameForSettings.SYSTEM, + ) def get_max_number_of_documents(type_: config.SpaceType): @@ -617,7 +641,7 @@ def handle_public_session() -> None: space_group_id=space_group_id, public_session_id=session_id, ) - else: # if no query params are provided, set space_group_id and public_session_id to -1 to disable ASK_PUBLIC feature + else: # if no query params are provided, set space_group_id and public_session_id to -1 to disable ASK_PUBLIC feature _set_session_state_configs( user_id=None, selected_org_id=None, diff --git a/web/utils/layout.py b/web/utils/layout.py index eaa4988f..a55a182c 100644 --- a/web/utils/layout.py +++ b/web/utils/layout.py @@ -4,10 +4,15 @@ from typing import List, Tuple import streamlit as st +from docq import setup from docq.access_control.main import SpaceAccessType from docq.config import FeatureType, LogType, SpaceType, SystemSettingsKey from docq.domain import DocumentListItem, FeatureKey, SpaceKey -from docq.manage_users import list_users_by_org +from docq.support.auth_utils import ( + get_cache_auth_session, + reset_cache_and_cookie_auth_session, + verify_cookie_hmac_session_id, +) from st_pages import hide_pages from streamlit.components.v1 import html from streamlit.delta_generator import DeltaGenerator @@ -15,6 +20,7 @@ from .constants import ALLOWED_DOC_EXTS, SessionKeyNameForAuth, SessionKeyNameForChat from .formatters import format_archived, format_datetime, format_filesize, format_timestamp from .handlers import ( + _set_session_state_configs, get_enabled_features, get_max_number_of_documents, get_shared_space, @@ -68,7 +74,8 @@ get_public_space_group_id, get_selected_org_id, is_current_user_super_admin, - set_selected_org_id, + reset_session_state, + session_state_exists, ) _chat_ui_script = """ @@ -198,6 +205,7 @@ def __no_admin_menu() -> None: ] ) + def __embed_page_config() -> None: st.markdown( """ @@ -237,7 +245,7 @@ def __login_form() -> None: if handle_login(username, password): st.experimental_rerun() else: - st.error("Invalid username or password.") + st.error("The Username and Password you entered doesn't match what we have.") st.stop() else: st.stop() @@ -266,9 +274,33 @@ def public_access() -> None: def auth_required(show_login_form: bool = True, requiring_admin: bool = False, show_logout_button: bool = True) -> bool: """Decide layout based on current user's access.""" - auth = get_auth_session() + log.debug("auth_required() called") + auth = None __always_hidden_pages() + + session_state_existed = session_state_exists() + log.debug("auth_required(): session_state_existed: %s", session_state_existed) + if session_state_existed: + auth = get_auth_session() + elif verify_cookie_hmac_session_id() is not None: + # there's a valid auth session token. Let's get session state from cache. + auth = get_cache_auth_session() + log.debug("auth_required(): Got auth session state from cache: %s", auth) + if auth: + log.debug("auth_required(): Valid auth session found: %s", auth) + if not session_state_existed: + # the user probably refreshed the page resetting Streamlit session state because it's bound to a browser session connection. + _set_session_state_configs( + user_id=auth[SessionKeyNameForAuth.ID.name], + selected_org_id=auth[SessionKeyNameForAuth.SELECTED_ORG_ID.name], + name=auth[SessionKeyNameForAuth.NAME.name], + username=auth[SessionKeyNameForAuth.USERNAME.name], + anonymous=False, + super_admin=auth[SessionKeyNameForAuth.SUPER_ADMIN.name], + selected_org_admin=auth[SessionKeyNameForAuth.SELECTED_ORG_ADMIN.name], + ) + if show_logout_button: __logout_button() @@ -280,12 +312,14 @@ def auth_required(show_login_form: bool = True, requiring_admin: bool = False, s return True else: + log.debug("auth_required(): No valid auth session found. User needs to re-authenticate.") + reset_session_state() + reset_cache_and_cookie_auth_session() if show_login_form: __login_form() return False - def public_session_setup() -> None: """Initialize session state for the public pages.""" handle_public_session() @@ -312,7 +346,7 @@ def public_space_enabled(feature: FeatureKey) -> None: feature_is_ready, spaces = (space_group_id != -1 or session_id != -1), None if feature_is_ready: spaces = list_public_spaces() - if not feature_is_ready or not spaces: # Stop the app if there are no public spaces. + if not feature_is_ready or not spaces: # Stop the app if there are no public spaces. st.error("This feature is not ready.") st.info("Please contact your administrator to configure this feature.") st.stop() @@ -877,3 +911,13 @@ def org_selection_ui() -> None: ) if selected: handle_org_selection_change(selected[0]) + + +def init_with_pretty_error_ui() -> None: + """UI to run setup and prevent showing errors to the user.""" + try: + setup.init() + except Exception as e: + st.error("Something went wrong starting Docq.") + log.fatal("Error: setup.init() failed with %s", e) + st.stop() diff --git a/web/utils/sessions.py b/web/utils/sessions.py index bca975d7..aee05b7d 100644 --- a/web/utils/sessions.py +++ b/web/utils/sessions.py @@ -1,9 +1,11 @@ """Session utilities.""" +import logging from typing import Any import streamlit as st from docq import config, manage_users +from docq.support.auth_utils import set_cache_auth_session from .constants import ( SESSION_KEY_NAME_DOCQ, @@ -25,9 +27,15 @@ def _init_session_state() -> None: st.session_state[SESSION_KEY_NAME_DOCQ][SessionKeySubName.CHAT.name][n.name] = {} +def session_state_exists() -> bool: + """Check if any session state exists.""" + return SESSION_KEY_NAME_DOCQ in st.session_state + + def reset_session_state() -> None: """Reset the session state. This must be called for user login and logout.""" st.session_state[SESSION_KEY_NAME_DOCQ] = {} + logging.debug("called reset_session_state()") def _get_session_value(name: SessionKeySubName, key_: str = None, subkey_: str = None) -> Any | None: @@ -72,6 +80,14 @@ def set_chat_session(val: Any | None, type_: config.FeatureType = None, key_: Se ) +def set_auth_session(val: dict = None, cache: bool = False) -> None: + """Set the auth session value.""" + _set_session_value(val, SessionKeySubName.AUTH) + if cache: + # this persists the auth session across browser session in Streamlit i.e. when the user hits refresh. + set_cache_auth_session(val) + + def get_auth_session() -> dict: """Get the auth session value.""" return _get_session_value(SessionKeySubName.AUTH) @@ -96,11 +112,6 @@ def set_if_current_user_is_selected_org_admin(selected_org_id: int) -> None: _set_session_value(is_org_admin, SessionKeySubName.AUTH, SessionKeyNameForAuth.SELECTED_ORG_ADMIN.name) -def set_auth_session(val: dict = None) -> None: - """Set the auth session value.""" - _set_session_value(val, SessionKeySubName.AUTH) - - def get_authenticated_user_id() -> int | None: """Get the authenticated user id.""" return _get_session_value(SessionKeySubName.AUTH, SessionKeyNameForAuth.ID.name)