diff --git a/package-lock.json b/package-lock.json index ae6442636e3..58f9657d3e1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -25,6 +25,7 @@ "nanoid": "^4.0.2", "openid-client": "^5.4.2", "parquetjs": "^0.11.2", + "pdfjs-dist": "^4.0.269", "postcss": "^8.4.31", "serpapi": "^1.1.1", "tailwind-scrollbar": "^3.0.0", @@ -676,6 +677,26 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@mapbox/node-pre-gyp": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz", + "integrity": "sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==", + "optional": true, + "dependencies": { + "detect-libc": "^2.0.0", + "https-proxy-agent": "^5.0.0", + "make-dir": "^3.1.0", + "node-fetch": "^2.6.7", + "nopt": "^5.0.0", + "npmlog": "^5.0.1", + "rimraf": "^3.0.2", + "semver": "^7.3.5", + "tar": "^6.1.11" + }, + "bin": { + "node-pre-gyp": "bin/node-pre-gyp" + } + }, "node_modules/@mongodb-js/saslprep": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@mongodb-js/saslprep/-/saslprep-1.1.0.tgz", @@ -1489,6 +1510,12 @@ "resolved": "https://registry.npmjs.org/abab/-/abab-2.0.6.tgz", "integrity": "sha512-j2afSsaIENvHZN2B8GOpF566vZ5WVk5opAiMTvWgaQT8DkbOqsTfvNAvHoRGU2zzP8cPoqys+xHTRDWW8L+/BA==" }, + "node_modules/abbrev": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", + "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==", + "optional": true + }, "node_modules/abort-controller": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", @@ -1574,7 +1601,7 @@ "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, + "devOptional": true, "engines": { "node": ">=8" } @@ -1611,6 +1638,25 @@ "node": ">= 8" } }, + "node_modules/aproba": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz", + "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==", + "optional": true + }, + "node_modules/are-we-there-yet": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-2.0.0.tgz", + "integrity": "sha512-Ci/qENmwHnsYo9xKIcUJN5LeDKdJ6R1Z1j9V/J5wyq8nh/mYPEpIKJbBZXtZjG04HiK7zV/p6Vs9952MrMeUIw==", + "optional": true, + "dependencies": { + "delegates": "^1.0.0", + "readable-stream": "^3.6.0" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/arg": { "version": "5.0.2", "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz", @@ -1937,6 +1983,56 @@ } ] }, + "node_modules/canvas": { + "version": "2.11.2", + "resolved": "https://registry.npmjs.org/canvas/-/canvas-2.11.2.tgz", + "integrity": "sha512-ItanGBMrmRV7Py2Z+Xhs7cT+FNt5K0vPL4p9EZ/UX/Mu7hFbkxSjKF2KVtPwX7UYWp7dRKnrTvReflgrItJbdw==", + "hasInstallScript": true, + "optional": true, + "dependencies": { + "@mapbox/node-pre-gyp": "^1.0.0", + "nan": "^2.17.0", + "simple-get": "^3.0.3" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/canvas/node_modules/decompress-response": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-4.2.1.tgz", + "integrity": "sha512-jOSne2qbyE+/r8G1VU+G/82LBs2Fs4LAsTiLSHOCOMZQl2OKZ6i8i4IyHemTe+/yIXOtTcRQMzPcgyhoFlqPkw==", + "optional": true, + "dependencies": { + "mimic-response": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/canvas/node_modules/mimic-response": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-2.1.0.tgz", + "integrity": "sha512-wXqjST+SLt7R009ySCglWBCFpjUygmCIfD790/kVbiGmUgfYGuB14PiTd5DwVxSV4NcYHjzMkoj5LjQZwTQLEA==", + "optional": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/canvas/node_modules/simple-get": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-3.1.1.tgz", + "integrity": "sha512-CQ5LTKGfCpvE1K0n2us+kuMPbk/q0EKl82s4aheV9oXjFEz6W/Y7oQFVJuU6QG77hRT4Ghb5RURteF5vnWjupA==", + "optional": true, + "dependencies": { + "decompress-response": "^4.2.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, "node_modules/chai": { "version": "4.3.7", "resolved": "https://registry.npmjs.org/chai/-/chai-4.3.7.tgz", @@ -2090,6 +2186,15 @@ "simple-swizzle": "^0.2.2" } }, + "node_modules/color-support": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-support/-/color-support-1.1.3.tgz", + "integrity": "sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==", + "optional": true, + "bin": { + "color-support": "bin.js" + } + }, "node_modules/combined-stream": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", @@ -2139,6 +2244,12 @@ "node": ">=10.18.0 <11 || >=12.14.0 <13 || >=14" } }, + "node_modules/console-control-strings": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/console-control-strings/-/console-control-strings-1.1.0.tgz", + "integrity": "sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==", + "optional": true + }, "node_modules/cookie": { "version": "0.5.0", "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz", @@ -2350,6 +2461,12 @@ "node": ">=0.4.0" } }, + "node_modules/delegates": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delegates/-/delegates-1.0.0.tgz", + "integrity": "sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ==", + "optional": true + }, "node_modules/dequal": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", @@ -2459,6 +2576,12 @@ "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.359.tgz", "integrity": "sha512-OoVcngKCIuNXtZnsYoqlCvr0Cf3NIPzDIgwUfI9bdTFjXCrr79lI0kwQstLPZ7WhCezLlGksZk/BFAzoXC7GDw==" }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "optional": true + }, "node_modules/end-of-stream": { "version": "1.4.4", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", @@ -2973,6 +3096,30 @@ "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==" }, + "node_modules/fs-minipass": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fs-minipass/-/fs-minipass-2.1.0.tgz", + "integrity": "sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==", + "optional": true, + "dependencies": { + "minipass": "^3.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/fs-minipass/node_modules/minipass": { + "version": "3.3.6", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", + "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", + "optional": true, + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", @@ -2996,6 +3143,26 @@ "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==" }, + "node_modules/gauge": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/gauge/-/gauge-3.0.2.tgz", + "integrity": "sha512-+5J6MS/5XksCuXq++uFRsnUd7Ovu1XenbeuIuNRJxYWjgQbPuFhT14lAvsWfqfAmnwluf1OwMjz39HjfLPci0Q==", + "optional": true, + "dependencies": { + "aproba": "^1.0.3 || ^2.0.0", + "color-support": "^1.1.2", + "console-control-strings": "^1.0.0", + "has-unicode": "^2.0.1", + "object-assign": "^4.1.1", + "signal-exit": "^3.0.0", + "string-width": "^4.2.3", + "strip-ansi": "^6.0.1", + "wide-align": "^1.1.2" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/get-func-name": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/get-func-name/-/get-func-name-2.0.2.tgz", @@ -3026,7 +3193,7 @@ "version": "7.2.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", - "dev": true, + "devOptional": true, "dependencies": { "fs.realpath": "^1.0.0", "inflight": "^1.0.4", @@ -3163,6 +3330,12 @@ "node": ">=8" } }, + "node_modules/has-unicode": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz", + "integrity": "sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==", + "optional": true + }, "node_modules/hash-wasm": { "version": "4.9.0", "resolved": "https://registry.npmjs.org/hash-wasm/-/hash-wasm-4.9.0.tgz", @@ -3393,6 +3566,15 @@ "node": ">=0.10.0" } }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "optional": true, + "engines": { + "node": ">=8" + } + }, "node_modules/is-glob": { "version": "4.0.3", "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", @@ -3764,6 +3946,30 @@ "node": ">=12" } }, + "node_modules/make-dir": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-3.1.0.tgz", + "integrity": "sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==", + "optional": true, + "dependencies": { + "semver": "^6.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/make-dir/node_modules/semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "optional": true, + "bin": { + "semver": "bin/semver.js" + } + }, "node_modules/make-error": { "version": "1.3.6", "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", @@ -3922,6 +4128,40 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/minipass": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-5.0.0.tgz", + "integrity": "sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ==", + "optional": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/minizlib": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-2.1.2.tgz", + "integrity": "sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==", + "optional": true, + "dependencies": { + "minipass": "^3.0.0", + "yallist": "^4.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/minizlib/node_modules/minipass": { + "version": "3.3.6", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", + "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", + "optional": true, + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/mkdirp": { "version": "0.5.6", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz", @@ -4033,6 +4273,12 @@ "thenify-all": "^1.0.0" } }, + "node_modules/nan": { + "version": "2.18.0", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.18.0.tgz", + "integrity": "sha512-W7tfG7vMOGtD30sHoZSSc/JVYiyDPEyQVso/Zz+/uQd0B0L46gtC+pHha5FFMRpil6fm/AoEcRWyOVi4+E/f8w==", + "optional": true + }, "node_modules/nanoid": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-4.0.2.tgz", @@ -4165,6 +4411,21 @@ "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.10.tgz", "integrity": "sha512-5GFldHPXVG/YZmFzJvKK2zDSzPKhEp0+ZR5SVaoSag9fsL5YgHbUHDfnG5494ISANDcK4KwPXAx2xqVEydmd7w==" }, + "node_modules/nopt": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/nopt/-/nopt-5.0.0.tgz", + "integrity": "sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==", + "optional": true, + "dependencies": { + "abbrev": "1" + }, + "bin": { + "nopt": "bin/nopt.js" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/normalize-path": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", @@ -4193,6 +4454,18 @@ "node": ">=8" } }, + "node_modules/npmlog": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/npmlog/-/npmlog-5.0.1.tgz", + "integrity": "sha512-AqZtDUWOMKs1G/8lwylVjrdYgqA4d9nu8hc+0gzRxlDb1I10+FHBGMXs6aiQHFdCUUlqH99MUMuLfzWDNDtfxw==", + "optional": true, + "dependencies": { + "are-we-there-yet": "^2.0.0", + "console-control-strings": "^1.1.0", + "gauge": "^3.0.0", + "set-blocking": "^2.0.0" + } + }, "node_modules/nwsapi": { "version": "2.2.4", "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.4.tgz", @@ -4482,6 +4755,15 @@ "node": ">=8" } }, + "node_modules/path2d-polyfill": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/path2d-polyfill/-/path2d-polyfill-2.0.1.tgz", + "integrity": "sha512-ad/3bsalbbWhmBo0D6FZ4RNMwsLsPpL6gnvhuSaU5Vm7b06Kr5ubSltQQ0T7YKsiJQO+g22zJ4dJKNTXIyOXtA==", + "optional": true, + "engines": { + "node": ">=8" + } + }, "node_modules/pathe": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/pathe/-/pathe-1.1.0.tgz", @@ -4497,6 +4779,18 @@ "node": "*" } }, + "node_modules/pdfjs-dist": { + "version": "4.0.269", + "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-4.0.269.tgz", + "integrity": "sha512-jjWO56tcOjnmPqDf8PmXDeZ781AGvpHMYI3HhNtaFKTRXXPaD1ArSrhVe38/XsrIQJ0onISCND/vuXaWJkiDWw==", + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "canvas": "^2.11.2", + "path2d-polyfill": "^2.0.1" + } + }, "node_modules/periscopic": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/periscopic/-/periscopic-3.1.0.tgz", @@ -5149,7 +5443,7 @@ "version": "3.0.2", "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", - "dev": true, + "devOptional": true, "dependencies": { "glob": "^7.1.3" }, @@ -5296,6 +5590,12 @@ "undici": "^5.12.0" } }, + "node_modules/set-blocking": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", + "integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==", + "optional": true + }, "node_modules/set-cookie-parser": { "version": "2.6.0", "resolved": "https://registry.npmjs.org/set-cookie-parser/-/set-cookie-parser-2.6.0.tgz", @@ -5355,7 +5655,7 @@ "version": "3.0.7", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", - "dev": true + "devOptional": true }, "node_modules/simple-concat": { "version": "1.0.1", @@ -5527,11 +5827,25 @@ "safe-buffer": "~5.2.0" } }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "optional": true, + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/strip-ansi": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, + "devOptional": true, "dependencies": { "ansi-regex": "^5.0.1" }, @@ -5906,6 +6220,23 @@ "postcss": "^8.0.9" } }, + "node_modules/tar": { + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/tar/-/tar-6.2.0.tgz", + "integrity": "sha512-/Wo7DcT0u5HUV486xg675HtjNd3BXZ6xDbzsCUZPt5iw8bTQ63bP0Raut3mvro9u+CUyq7YQd8Cx55fsZXxqLQ==", + "optional": true, + "dependencies": { + "chownr": "^2.0.0", + "fs-minipass": "^2.0.0", + "minipass": "^5.0.0", + "minizlib": "^2.1.1", + "mkdirp": "^1.0.3", + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/tar-fs": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.4.tgz", @@ -5926,6 +6257,27 @@ "streamx": "^2.15.0" } }, + "node_modules/tar/node_modules/chownr": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-2.0.0.tgz", + "integrity": "sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==", + "optional": true, + "engines": { + "node": ">=10" + } + }, + "node_modules/tar/node_modules/mkdirp": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz", + "integrity": "sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==", + "optional": true, + "bin": { + "mkdirp": "bin/cmd.js" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/text-table": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", @@ -6604,6 +6956,15 @@ "node": ">=8" } }, + "node_modules/wide-align": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/wide-align/-/wide-align-1.1.5.tgz", + "integrity": "sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg==", + "optional": true, + "dependencies": { + "string-width": "^1.0.2 || 2 || 3 || 4" + } + }, "node_modules/word-wrap": { "version": "1.2.5", "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", diff --git a/package.json b/package.json index 9d2614b9aed..6fd1b0445cc 100644 --- a/package.json +++ b/package.json @@ -61,6 +61,7 @@ "nanoid": "^4.0.2", "openid-client": "^5.4.2", "parquetjs": "^0.11.2", + "pdfjs-dist": "^4.0.269", "postcss": "^8.4.31", "serpapi": "^1.1.1", "tailwind-scrollbar": "^3.0.0", diff --git a/src/lib/buildPrompt.ts b/src/lib/buildPrompt.ts index 15e3a450e64..b3c624647d5 100644 --- a/src/lib/buildPrompt.ts +++ b/src/lib/buildPrompt.ts @@ -2,7 +2,8 @@ import type { BackendModel } from "./server/models"; import type { Message } from "./types/Message"; import { format } from "date-fns"; import type { WebSearch } from "./types/WebSearch"; -import { downloadFile } from "./server/files/downloadFile"; +import type { PdfSearch } from "./types/PdfSearch"; +import { downloadImgFile } from "./server/files/downloadFile"; import type { Conversation } from "./types/Conversation"; interface buildPromptOptions { @@ -11,6 +12,7 @@ interface buildPromptOptions { model: BackendModel; locals?: App.Locals; webSearch?: WebSearch; + pdfSearch?: PdfSearch; preprompt?: string; files?: File[]; } @@ -19,6 +21,7 @@ export async function buildPrompt({ messages, model, webSearch, + pdfSearch, preprompt, id, }: buildPromptOptions): Promise { @@ -47,6 +50,31 @@ export async function buildPrompt({ `, }, ]; + }else if (pdfSearch && pdfSearch.context) { + const lastMsg = messages.slice(-1)[0]; + const messagesWithoutLastUsrMsg = messages.slice(0, -1); + const previousUserMessages = messages.filter((el) => el.from === "user").slice(0, -1); + + const previousQuestions = + previousUserMessages.length > 0 + ? `Previous questions: \n${previousUserMessages + .map(({ content }) => `- ${content}`) + .join("\n")}` + : ""; + + messages = [ + ...messagesWithoutLastUsrMsg, + { + from: "user", + content: `Below are the information I extracted from a PDF file that might be useful: + ===================== + ${pdfSearch.context} + ===================== + ${previousQuestions} + Answer the question: ${lastMsg.content} + `, + }, + ]; } // section to handle potential files input @@ -60,7 +88,7 @@ export async function buildPrompt({ const markdowns = await Promise.all( el.files.map(async (hash) => { try { - const { content: image, mime } = await downloadFile(hash, id); + const { content: image, mime } = await downloadImgFile(hash, id); const b64 = image.toString("base64"); return `![](data:${mime};base64,${b64})})`; } catch (e) { diff --git a/src/lib/components/OpenPdfSearchResults.svelte b/src/lib/components/OpenPdfSearchResults.svelte new file mode 100644 index 00000000000..71a372b0813 --- /dev/null +++ b/src/lib/components/OpenPdfSearchResults.svelte @@ -0,0 +1,114 @@ + + +
+ + {#if error} + + {:else if loading} + + {:else} + + {/if} + PDF search + +
+ +
+
+ +
+ {#if pdfSearchMessages.length === 0} +
+ +
+ {:else} +
    + {#each pdfSearchMessages as message} + {#if message.messageType === "update"} +
  1. +
    +
    +

    + {message.message} +

    +
    + {#if message.args} +

    + {message.args} +

    + {/if} +
  2. + {:else if message.messageType === "error"} +
  3. +
    + +

    + {message.message} +

    +
    + {#if message.args} +

    + {message.args} +

    + {/if} +
  4. + {/if} + {/each} +
+ {/if} +
+
+ + diff --git a/src/lib/components/UploadBtn.svelte b/src/lib/components/UploadBtn.svelte index cb869443e9b..18b97bab28d 100644 --- a/src/lib/components/UploadBtn.svelte +++ b/src/lib/components/UploadBtn.svelte @@ -1,23 +1,52 @@ diff --git a/src/lib/components/chat/ChatMessage.svelte b/src/lib/components/chat/ChatMessage.svelte index 685587ff9dd..921734175fa 100644 --- a/src/lib/components/chat/ChatMessage.svelte +++ b/src/lib/components/chat/ChatMessage.svelte @@ -17,7 +17,8 @@ import type { Model } from "$lib/types/Model"; import OpenWebSearchResults from "../OpenWebSearchResults.svelte"; - import type { WebSearchUpdate } from "$lib/types/MessageUpdate"; + import OpenPdfSearchResults from "../OpenPdfSearchResults.svelte"; + import type { RAGUpdate, WebSearchUpdate, PdfSearchUpdate } from "$lib/types/MessageUpdate"; function sanitizeMd(md: string) { let ret = md @@ -49,7 +50,7 @@ export let readOnly = false; export let isTapped = false; - export let webSearchMessages: WebSearchUpdate[]; + export let RAGMessages: RAGUpdate[]; const dispatch = createEventDispatcher<{ retry: { content: string; id: Message["id"] }; @@ -108,9 +109,15 @@ let searchUpdates: WebSearchUpdate[] = []; - $: searchUpdates = ((webSearchMessages.length > 0 - ? webSearchMessages + $: searchUpdates = ((RAGMessages.filter(({type}) => type === "webSearch").length > 0 + ? RAGMessages.filter(({type}) => type === "webSearch") : message.updates?.filter(({ type }) => type === "webSearch")) ?? []) as WebSearchUpdate[]; + + let pdfUpdates: PdfSearchUpdate[] = []; + + $: pdfUpdates = ((RAGMessages.filter(({type}) => type === "pdfSearch").length > 0 + ? RAGMessages.filter(({type}) => type === "pdfSearch") + : message.updates?.filter(({ type }) => type === "pdfSearch")) ?? []) as PdfSearchUpdate[]; $: downloadLink = message.from === "user" ? `${$page.url.pathname}/message/${message.id}/prompt` : undefined; @@ -153,7 +160,14 @@ loading={!(searchUpdates[searchUpdates.length - 1]?.messageType === "sources")} /> {/if} - {#if !message.content && (webSearchIsDone || (webSearchMessages && webSearchMessages.length === 0))} + {#if pdfUpdates && pdfUpdates.length > 0} + + {/if} + {#if !message.content && (webSearchIsDone || (RAGMessages && RAGMessages.length === 0))} {/if} diff --git a/src/lib/components/chat/ChatMessages.svelte b/src/lib/components/chat/ChatMessages.svelte index 7feda19e427..f5e4e7d0e43 100644 --- a/src/lib/components/chat/ChatMessages.svelte +++ b/src/lib/components/chat/ChatMessages.svelte @@ -8,7 +8,7 @@ import type { LayoutData } from "../../../routes/$types"; import ChatIntroduction from "./ChatIntroduction.svelte"; import ChatMessage from "./ChatMessage.svelte"; - import type { WebSearchUpdate } from "$lib/types/MessageUpdate"; + import type { RAGUpdate } from "$lib/types/MessageUpdate"; import { browser } from "$app/environment"; import SystemPromptModal from "../SystemPromptModal.svelte"; import { page } from "$app/stores"; @@ -25,7 +25,7 @@ let chatContainer: HTMLElement; - export let webSearchMessages: WebSearchUpdate[] = []; + export let RAGMessages: RAGUpdate[] = []; async function scrollToBottom() { await tick(); @@ -40,7 +40,7 @@
@@ -54,7 +54,7 @@ {isAuthor} {readOnly} model={currentModel} - webSearchMessages={i === messages.length - 1 ? webSearchMessages : []} + RAGMessages={i === messages.length - 1 ? RAGMessages : []} on:retry on:vote /> @@ -65,7 +65,7 @@ {/if}
diff --git a/src/lib/components/chat/ChatWindow.svelte b/src/lib/components/chat/ChatWindow.svelte index 5a80b6e4c96..abfd8f9cfd2 100644 --- a/src/lib/components/chat/ChatWindow.svelte +++ b/src/lib/components/chat/ChatWindow.svelte @@ -15,7 +15,7 @@ import type { Model } from "$lib/types/Model"; import WebSearchToggle from "../WebSearchToggle.svelte"; import LoginModal from "../LoginModal.svelte"; - import type { WebSearchUpdate } from "$lib/types/MessageUpdate"; + import type { RAGUpdate } from "$lib/types/MessageUpdate"; import { page } from "$app/stores"; import DisclaimerModal from "../DisclaimerModal.svelte"; import FileDropzone from "./FileDropzone.svelte"; @@ -30,9 +30,10 @@ export let shared = false; export let currentModel: Model; export let models: Model[]; - export let webSearchMessages: WebSearchUpdate[] = []; + export let RAGMessages: RAGUpdate[] = []; export let preprompt: string | undefined = undefined; export let files: File[] = []; + export let uploadingPdf: boolean = false; $: isReadOnly = !models.some((model) => model.id === currentModel.id); @@ -94,7 +95,7 @@ {messages} readOnly={isReadOnly} isAuthor={!shared} - {webSearchMessages} + {RAGMessages} {preprompt} on:message={(ev) => { if ($page.data.loginRequired) { @@ -153,9 +154,8 @@ content: messages[messages.length - 1].content, })} /> - {:else if currentModel.multimodal} - {/if} +
("settings"); const users = db.collection("users"); const sessions = db.collection("sessions"); const messageEvents = db.collection("messageEvents"); -const bucket = new GridFSBucket(db, { bucketName: "files" }); + +const bucketName = "files"; +const bucket = new GridFSBucket(db, { bucketName }); +const files = db.collection(`${bucketName}.files`); export { client, db }; export const collections = { @@ -41,6 +44,7 @@ export const collections = { sessions, messageEvents, bucket, + files, }; client.on("open", () => { diff --git a/src/lib/server/websearch/sentenceSimilarity.ts b/src/lib/server/embeddings.ts similarity index 76% rename from src/lib/server/websearch/sentenceSimilarity.ts rename to src/lib/server/embeddings.ts index a877f8e0cd6..921c00d7569 100644 --- a/src/lib/server/websearch/sentenceSimilarity.ts +++ b/src/lib/server/embeddings.ts @@ -21,23 +21,22 @@ class PipelineSingleton { // see https://huggingface.co/thenlper/gte-small/blob/d8e2604cadbeeda029847d19759d219e0ce2e6d8/README.md?code=true#L2625 export const MAX_SEQ_LEN = 512 as const; -export async function findSimilarSentences( - query: string, - sentences: string[], - { topK = 5 }: { topK: number } -) { - const input = [query, ...sentences]; - +export async function createEmbeddings(input: string[]) { const extractor = await PipelineSingleton.getInstance(); - const output: Tensor = await extractor(input, { pooling: "mean", normalize: true }); - - const queryTensor: Tensor = output[0]; - const sentencesTensor: Tensor = output.slice([1, input.length - 1]); + const embeddings: Tensor = await extractor(input, { pooling: "mean", normalize: true }); + return embeddings; +} - const distancesFromQuery: { distance: number; index: number }[] = [...sentencesTensor].map( +// docstring about the first sentence being the query sentence +export function findSimilarSentences( + embeddings: Tensor, + queryEmbedding: Tensor, + { topK = 5 }: { topK: number } +) { + const distancesFromQuery: { distance: number; index: number }[] = [...embeddings].map( (sentenceTensor: Tensor, index: number) => { return { - distance: innerProduct(queryTensor, sentenceTensor), + distance: innerProduct(queryEmbedding, sentenceTensor), index: index, }; } @@ -49,4 +48,4 @@ export async function findSimilarSentences( // Return the indexes of the closest topK sentences return distancesFromQuery.slice(0, topK).map((item) => item.index); -} +} \ No newline at end of file diff --git a/src/lib/server/endpoints/aws/endpointAws.ts b/src/lib/server/endpoints/aws/endpointAws.ts index 0cd899be19a..0051ceea3ec 100644 --- a/src/lib/server/endpoints/aws/endpointAws.ts +++ b/src/lib/server/endpoints/aws/endpointAws.ts @@ -40,6 +40,7 @@ export async function endpointAws( const prompt = await buildPrompt({ messages: conversation.messages, webSearch: conversation.messages[conversation.messages.length - 1].webSearch, + pdfSearch: conversation.messages[conversation.messages.length - 1].pdfSearch, preprompt: conversation.preprompt, model, }); diff --git a/src/lib/server/endpoints/llamacpp/endpointLlamacpp.ts b/src/lib/server/endpoints/llamacpp/endpointLlamacpp.ts index 33a3c93460e..9d6659eb0f4 100644 --- a/src/lib/server/endpoints/llamacpp/endpointLlamacpp.ts +++ b/src/lib/server/endpoints/llamacpp/endpointLlamacpp.ts @@ -23,6 +23,7 @@ export function endpointLlamacpp( const prompt = await buildPrompt({ messages: conversation.messages, webSearch: conversation.messages[conversation.messages.length - 1].webSearch, + pdfSearch: conversation.messages[conversation.messages.length - 1].pdfSearch, preprompt: conversation.preprompt, model, }); diff --git a/src/lib/server/endpoints/ollama/endpointOllama.ts b/src/lib/server/endpoints/ollama/endpointOllama.ts index fab06a8dd17..f33600d1b7a 100644 --- a/src/lib/server/endpoints/ollama/endpointOllama.ts +++ b/src/lib/server/endpoints/ollama/endpointOllama.ts @@ -18,6 +18,7 @@ export function endpointOllama(input: z.input): const prompt = await buildPrompt({ messages: conversation.messages, webSearch: conversation.messages[conversation.messages.length - 1].webSearch, + pdfSearch: conversation.messages[conversation.messages.length - 1].pdfSearch, preprompt: conversation.preprompt, model, id: conversation._id, diff --git a/src/lib/server/files/downloadFile.ts b/src/lib/server/files/downloadFile.ts index 4d2bddb1c30..5ec8d704d5f 100644 --- a/src/lib/server/files/downloadFile.ts +++ b/src/lib/server/files/downloadFile.ts @@ -3,7 +3,7 @@ import { collections } from "../database"; import type { Conversation } from "$lib/types/Conversation"; import type { SharedConversation } from "$lib/types/SharedConversation"; -export async function downloadFile( +export async function downloadImgFile( sha256: string, convId: Conversation["_id"] | SharedConversation["_id"] ) { @@ -34,3 +34,36 @@ export async function downloadFile( return { content, mime }; } + +export async function downloadPdfEmbeddings( + convId: Conversation["_id"] | SharedConversation["_id"] +) { + const fileId = collections.bucket.find({ filename: `${convId.toString()}-pdf` }); + let textChunks: string[] = []; + let dims: number[] = [] + + const content = await fileId.next().then(async (file) => { + if (!file) { + throw error(404, "File not found"); + } + if (file.metadata?.conversation !== convId.toString()) { + throw error(403, "You don't have access to this file."); + } + + textChunks = file.metadata?.textChunks; + dims = file.metadata?.dims; + + const fileStream = collections.bucket.openDownloadStream(file._id); + + const fileBuffer = await new Promise((resolve, reject) => { + const chunks: Uint8Array[] = []; + fileStream.on("data", (chunk) => chunks.push(chunk)); + fileStream.on("error", reject); + fileStream.on("end", () => resolve(Buffer.concat(chunks))); + }); + + return fileBuffer; + }); + + return { content, textChunks, dims }; +} diff --git a/src/lib/server/files/uploadFile.ts b/src/lib/server/files/uploadFile.ts index 1c4a59b6f44..96859cd6424 100644 --- a/src/lib/server/files/uploadFile.ts +++ b/src/lib/server/files/uploadFile.ts @@ -1,8 +1,9 @@ import type { Conversation } from "$lib/types/Conversation"; +import type { Tensor } from "@xenova/transformers"; import { sha256 } from "$lib/utils/sha256"; import { collections } from "../database"; -export async function uploadFile(file: Blob, conv: Conversation): Promise { +export async function uploadImgFile(file: Blob, conv: Conversation): Promise { const sha = await sha256(await file.text()); const upload = collections.bucket.openUploadStream(`${conv._id}-${sha}`, { @@ -19,3 +20,30 @@ export async function uploadFile(file: Blob, conv: Conversation): Promise reject(new Error("Upload timed out")), 10000); }); } + +export async function uploadPdfEmbeddings(embeddings: Tensor, textChunks: string[], conv: Conversation): Promise { + const filename = `${conv._id}-pdf`; + + // Step 1: Check if the file exists + const existingFile = await collections.files.findOne({ filename }); + + // Step 2: Delete the existing file if it exists + if (existingFile) { + await collections.bucket.delete(existingFile._id); + } + + // Step 3: Upload the new file + const upload = collections.bucket.openUploadStream(filename, { + metadata: { conversation: conv._id.toString(), textChunks, dims: embeddings.dims }, + }); + + upload.write((await embeddings.data.buffer) as unknown as Buffer); + upload.end(); + + // only return the filename when upload throws a finish event or a 10s time out occurs + return new Promise((resolve, reject) => { + upload.once("finish", () => resolve()); + upload.once("error", reject); + setTimeout(() => reject(new Error("Upload timed out")), 10000); + }); +} diff --git a/src/lib/server/pdfSearch.ts b/src/lib/server/pdfSearch.ts new file mode 100644 index 00000000000..6221f60c8db --- /dev/null +++ b/src/lib/server/pdfSearch.ts @@ -0,0 +1,51 @@ +import type { PdfSearch } from "$lib/types/PdfSearch"; +import { + createEmbeddings, + findSimilarSentences, +} from "$lib/server/embeddings"; +import type { Conversation } from "$lib/types/Conversation"; +import type { MessageUpdate } from "$lib/types/MessageUpdate"; +import { downloadPdfEmbeddings } from "./files/downloadFile"; +import { Tensor } from "@xenova/transformers"; + +// todo: embed the prompt, download the embeddings, serialize them, and find the closest sentences, and get their texts, lets go +export async function runPdfSearch( + conv: Conversation, + prompt: string, + updatePad: (upd: MessageUpdate) => void +) { + const pdfSearch: PdfSearch = { + context: "", + createdAt: new Date(), + updatedAt: new Date(), + }; + + function appendUpdate(message: string, args?: string[], type?: "error" | "update" | "done") { + updatePad({ type: "pdfSearch", messageType: type ?? "update", message: message, args: args }); + } + + try { + appendUpdate("Extracting relevant information from PDF file"); + const { content, textChunks, dims } = await downloadPdfEmbeddings(conv._id); + // reconstruct pdfEmbeddings + const buffer = Buffer.from(content); + const data = new Float32Array(buffer.buffer, buffer.byteOffset, buffer.length / Float32Array.BYTES_PER_ELEMENT); + const pdfEmbeddings = new Tensor('float32', data, dims); + const promptEmbeddings = await createEmbeddings([prompt]); + + const indices = findSimilarSentences(pdfEmbeddings, promptEmbeddings, {topK: 5}); + pdfSearch.context = indices.map((idx) => textChunks[idx]).join(" "); + + appendUpdate("Done", [], "done"); + } catch (pdfError) { + if (pdfError instanceof Error) { + appendUpdate( + "An error occurred with the pdf search", + [JSON.stringify(pdfError.message)], + "error" + ); + } + } + + return pdfSearch; +} diff --git a/src/lib/server/websearch/runWebSearch.ts b/src/lib/server/websearch/runWebSearch.ts index 0869ea8b494..b77ca84e838 100644 --- a/src/lib/server/websearch/runWebSearch.ts +++ b/src/lib/server/websearch/runWebSearch.ts @@ -7,10 +7,12 @@ import { chunk } from "$lib/utils/chunk"; import { MAX_SEQ_LEN as CHUNK_CAR_LEN, findSimilarSentences, -} from "$lib/server/websearch/sentenceSimilarity"; + createEmbeddings, +} from "$lib/server/embeddings"; import type { Conversation } from "$lib/types/Conversation"; import type { MessageUpdate } from "$lib/types/MessageUpdate"; import { getWebSearchProvider } from "./searchWeb"; +import type { Tensor } from "@xenova/transformers"; const MAX_N_PAGES_SCRAPE = 10 as const; const MAX_N_PAGES_EMBED = 5 as const; @@ -87,7 +89,10 @@ export async function runWebSearch( appendUpdate("Extracting relevant information"); const topKClosestParagraphs = 8; const texts = paragraphChunks.map(({ text }) => text); - const indices = await findSimilarSentences(prompt, texts, { + const embeddings = await createEmbeddings([prompt, ...texts]); + const queryTensor: Tensor = embeddings[0]; + const sentencesTensor: Tensor = embeddings.slice([1, texts.length]); + const indices = findSimilarSentences(sentencesTensor, queryTensor, { topK: topKClosestParagraphs, }); webSearch.context = indices.map((idx) => texts[idx]).join(""); diff --git a/src/lib/stores/pendingMessage.ts b/src/lib/stores/pendingMessage.ts index 2a7387f393f..410155ad659 100644 --- a/src/lib/stores/pendingMessage.ts +++ b/src/lib/stores/pendingMessage.ts @@ -2,8 +2,9 @@ import { writable } from "svelte/store"; export const pendingMessage = writable< | { - content: string; + content?: string; files: File[]; + pdfFile?: File; } | undefined >(); diff --git a/src/lib/types/Message.ts b/src/lib/types/Message.ts index e485dfc444f..c5caf8192cc 100644 --- a/src/lib/types/Message.ts +++ b/src/lib/types/Message.ts @@ -1,4 +1,5 @@ import type { MessageUpdate } from "./MessageUpdate"; +import type { PdfSearch } from "./PdfSearch"; import type { Timestamps } from "./Timestamps"; import type { WebSearch } from "./WebSearch"; @@ -9,6 +10,7 @@ export type Message = Partial & { updates?: MessageUpdate[]; webSearchId?: WebSearch["_id"]; // legacy version webSearch?: WebSearch; + pdfSearch?: PdfSearch; score?: -1 | 0 | 1; files?: string[]; // can contain either the hash of the file or the b64 encoded image data on the client side when uploading }; diff --git a/src/lib/types/MessageUpdate.ts b/src/lib/types/MessageUpdate.ts index 9bfb25667b9..684bb4cd24c 100644 --- a/src/lib/types/MessageUpdate.ts +++ b/src/lib/types/MessageUpdate.ts @@ -25,6 +25,15 @@ export type WebSearchUpdate = { sources?: WebSearchSource[]; }; +export type PdfSearchUpdate = { + type: "pdfSearch"; + messageType: "update" | "error" | "done"; + message: string; + args?: string[]; +}; + +export type RAGUpdate = WebSearchUpdate | PdfSearchUpdate; + export type StatusUpdate = { type: "status"; status: "started" | "pending" | "finished" | "error" | "title"; @@ -42,5 +51,6 @@ export type MessageUpdate = | TextStreamUpdate | AgentUpdate | WebSearchUpdate + | PdfSearchUpdate | StatusUpdate | ErrorUpdate; diff --git a/src/lib/types/PdfChat.ts b/src/lib/types/PdfChat.ts new file mode 100644 index 00000000000..f57e6f0390f --- /dev/null +++ b/src/lib/types/PdfChat.ts @@ -0,0 +1,9 @@ +import type { Tensor } from "@xenova/transformers"; +import type { Conversation } from "./Conversation"; +import type { Timestamps } from "./Timestamps"; + +export interface Pdf extends Timestamps { + conversationId: Conversation["_id"]; + textChunks: string[]; + embeddings: Tensor; +} diff --git a/src/lib/types/PdfSearch.ts b/src/lib/types/PdfSearch.ts new file mode 100644 index 00000000000..9da8433d7e0 --- /dev/null +++ b/src/lib/types/PdfSearch.ts @@ -0,0 +1,7 @@ +import type { ObjectId } from "mongodb"; +import type { Timestamps } from "./Timestamps"; + +export interface PdfSearch extends Timestamps { + _id?: ObjectId; + context: string; +} diff --git a/src/routes/+page.svelte b/src/routes/+page.svelte index 97c0d56da46..5ba5eebb87b 100644 --- a/src/routes/+page.svelte +++ b/src/routes/+page.svelte @@ -11,7 +11,7 @@ let loading = false; let files: File[] = []; - async function createConversation(message: string) { + async function createConversation() { try { loading = true; const res = await fetch(`${base}/conversation`, { @@ -33,12 +33,6 @@ const { conversationId } = await res.json(); - // Ugly hack to use a store as temp storage, feel free to improve ^^ - pendingMessage.set({ - content: message, - files, - }); - // invalidateAll to update list of conversations await goto(`${base}/conversation/${conversationId}`, { invalidateAll: true }); } catch (err) { @@ -48,6 +42,27 @@ loading = false; } } + + async function createConversationWithMsg(message: string) { + // Ugly hack to use a store as temp storage, feel free to improve ^^ + pendingMessage.set({ + content: message, + files, + }); + + await createConversation(); + } + + + + async function createConversationWithPdf(pdfFile: File) { + pendingMessage.set({ + files, + pdfFile, + }); + + await createConversation(); + } @@ -55,7 +70,8 @@ createConversation(ev.detail)} + on:message={(ev) => createConversationWithMsg(ev.detail)} + on:uploadpdf={(ev) => createConversationWithPdf(ev.detail)} {loading} currentModel={findCurrentModel([...data.models, ...data.oldModels], data.settings.activeModel)} models={data.models} diff --git a/src/routes/conversation/[id]/+page.svelte b/src/routes/conversation/[id]/+page.svelte index 363d14d6176..c297694f4fe 100644 --- a/src/routes/conversation/[id]/+page.svelte +++ b/src/routes/conversation/[id]/+page.svelte @@ -13,7 +13,7 @@ import { findCurrentModel } from "$lib/utils/models"; import { webSearchParameters } from "$lib/stores/webSearchParameters"; import type { Message } from "$lib/types/Message"; - import type { MessageUpdate, WebSearchUpdate } from "$lib/types/MessageUpdate"; + import type { MessageUpdate, RAGUpdate } from "$lib/types/MessageUpdate"; import titleUpdate from "$lib/stores/titleUpdate"; import file2base64 from "$lib/utils/file2base64"; export let data; @@ -21,7 +21,7 @@ let messages = data.messages; let lastLoadedMessages = data.messages; - let webSearchMessages: WebSearchUpdate[] = []; + let RAGMessages: RAGUpdate[] = []; // Since we modify the messages array locally, we don't want to reset it if an old version is passed $: if (data.messages !== lastLoadedMessages) { @@ -31,6 +31,7 @@ let loading = false; let pending = false; + let uploadingPdf = false; let files: File[] = []; @@ -193,8 +194,8 @@ lastMessage.content += update.token; messages = [...messages]; } - } else if (update.type === "webSearch") { - webSearchMessages = [...webSearchMessages, update]; + } else if (update.type === "webSearch" || update.type === "pdfSearch") { + RAGMessages = [...RAGMessages, update]; } else if (update.type === "status") { if (update.status === "title" && update.message) { const conv = data.conversations.find(({ id }) => id === $page.params.id); @@ -225,8 +226,8 @@ }); } - // reset the websearchmessages - webSearchMessages = []; + // reset the RAGMessages + RAGMessages = []; await invalidate(UrlDependency.ConversationList); } catch (err) { @@ -272,11 +273,35 @@ } } + async function uploadPdf(file: File) { + uploadingPdf = true; + + const formData = new FormData(); + formData.append('pdf', file); + + const res = await fetch(`${base}/conversation/${$page.params.id}/upload-pdf`, { + method: "POST", + body: formData, + }); + + if (!res.ok) { + error.set("Error while uploading PDF, try again."); + console.error("Error while uploading PDF: " + (await res.text())); + } + + uploadingPdf = false; + } + onMount(async () => { // only used in case of creating new conversations (from the parent POST endpoint) if ($pendingMessage) { files = $pendingMessage.files; - await writeMessage($pendingMessage.content); + if($pendingMessage.content){ + await writeMessage($pendingMessage.content); + } + if($pendingMessage.pdfFile){ + await uploadPdf($pendingMessage.pdfFile); + } $pendingMessage = undefined; } }); @@ -327,11 +352,13 @@ {messages} shared={data.shared} preprompt={data.preprompt} - bind:webSearchMessages + bind:RAGMessages={RAGMessages} bind:files on:message={onMessage} on:retry={onRetry} on:vote={(event) => voteMessage(event.detail.score, event.detail.id)} + on:uploadpdf={(event) => uploadPdf(event.detail)} + {uploadingPdf} on:share={() => shareConversation($page.params.id, data.title)} on:stop={() => (($isAborted = true), (loading = false))} models={data.models} diff --git a/src/routes/conversation/[id]/+server.ts b/src/routes/conversation/[id]/+server.ts index daad2dd8283..d18d3978f02 100644 --- a/src/routes/conversation/[id]/+server.ts +++ b/src/routes/conversation/[id]/+server.ts @@ -9,10 +9,12 @@ import { ObjectId } from "mongodb"; import { z } from "zod"; import type { MessageUpdate } from "$lib/types/MessageUpdate"; import { runWebSearch } from "$lib/server/websearch/runWebSearch"; +import { runPdfSearch } from "$lib/server/pdfSearch"; import type { WebSearch } from "$lib/types/WebSearch"; +import type { PdfSearch } from "$lib/types/PdfSearch"; import { abortedGenerations } from "$lib/server/abortedGenerations"; import { summarize } from "$lib/server/summarize"; -import { uploadFile } from "$lib/server/files/uploadFile"; +import { uploadImgFile } from "$lib/server/files/uploadFile"; import sizeof from "image-size"; export async function POST({ request, locals, params, getClientAddress }) { @@ -135,7 +137,7 @@ export async function POST({ request, locals, params, getClientAddress }) { let hashes: undefined | string[]; if (files) { - hashes = await Promise.all(files.map(async (file) => await uploadFile(file, conv))); + hashes = await Promise.all(files.map(async (file) => await uploadImgFile(file, conv))); } // get the list of messages @@ -240,6 +242,14 @@ export async function POST({ request, locals, params, getClientAddress }) { messages[messages.length - 1].webSearch = webSearchResults; + let pdfSearchResults: PdfSearch | undefined; + const pdfSearch = await collections.files.findOne({ filename: `${convId.toString()}-pdf` }); + if(pdfSearch){ + pdfSearchResults = await runPdfSearch(conv, newPrompt, update); + } + + messages[messages.length - 1].pdfSearch = pdfSearchResults; + conv.messages = messages; try { diff --git a/src/routes/conversation/[id]/message/[messageId]/prompt/+server.ts b/src/routes/conversation/[id]/message/[messageId]/prompt/+server.ts index 5ecac0bbcc1..01cbf56e88f 100644 --- a/src/routes/conversation/[id]/message/[messageId]/prompt/+server.ts +++ b/src/routes/conversation/[id]/message/[messageId]/prompt/+server.ts @@ -39,6 +39,7 @@ export async function GET({ params, locals }) { const prompt = await buildPrompt({ preprompt: conv.preprompt, webSearch: messagesUpTo[messagesUpTo.length - 1].webSearch, + pdfSearch: messagesUpTo[messagesUpTo.length - 1].pdfSearch, messages: messagesUpTo, model: model, }); diff --git a/src/routes/conversation/[id]/output/[sha256]/+server.ts b/src/routes/conversation/[id]/output/[sha256]/+server.ts index 79ae37b7585..0ab95db7ebd 100644 --- a/src/routes/conversation/[id]/output/[sha256]/+server.ts +++ b/src/routes/conversation/[id]/output/[sha256]/+server.ts @@ -4,7 +4,7 @@ import { error } from "@sveltejs/kit"; import { ObjectId } from "mongodb"; import { z } from "zod"; import type { RequestHandler } from "./$types"; -import { downloadFile } from "$lib/server/files/downloadFile"; +import { downloadImgFile } from "$lib/server/files/downloadFile"; export const GET: RequestHandler = async ({ locals, params }) => { const sha256 = z.string().parse(params.sha256); @@ -39,7 +39,7 @@ export const GET: RequestHandler = async ({ locals, params }) => { } } - const { content, mime } = await downloadFile(sha256, params.id); + const { content, mime } = await downloadImgFile(sha256, params.id); return new Response(content, { headers: { diff --git a/src/routes/conversation/[id]/upload-pdf/+server.ts b/src/routes/conversation/[id]/upload-pdf/+server.ts new file mode 100644 index 00000000000..e4532f1567c --- /dev/null +++ b/src/routes/conversation/[id]/upload-pdf/+server.ts @@ -0,0 +1,41 @@ +import { authCondition } from "$lib/server/auth"; +import { collections } from "$lib/server/database"; +import { MAX_SEQ_LEN as CHUNK_CAR_LEN, createEmbeddings } from "$lib/server/embeddings"; +import { uploadPdfEmbeddings } from "$lib/server/files/uploadFile"; +import { chunk } from "$lib/utils/chunk"; +import { error } from "@sveltejs/kit"; +import { ObjectId } from "mongodb"; +import * as pdfjsLib from "pdfjs-dist/legacy/build/pdf"; + +export async function POST({ request, params, locals }) { + const conversationId = new ObjectId(params.id); + const conversation = await collections.conversations.findOne({ + _id: conversationId, + ...authCondition(locals), + }); + + if (!conversation) { + throw error(404, "Conversation not found"); + } + + const formData = await request.formData(); + const file = formData.get('pdf'); // 'pdf' is the name used in FormData on the frontend + const data = new Uint8Array(await file.arrayBuffer()) + const loadingTask = pdfjsLib.getDocument({ data }); + const pdf = await loadingTask.promise; + + const N_MAX_PAGES = 20; + let text = ''; + for (let i = 1; i <= Math.min(pdf.numPages, N_MAX_PAGES); i++) { + const page = await pdf.getPage(i); + const content = await page.getTextContent(); + text += content.items.map(item => item.str).join(' '); + } + + const textChunks = chunk(text, CHUNK_CAR_LEN); + const embeddings = await createEmbeddings(textChunks); + + await uploadPdfEmbeddings(embeddings, textChunks, conversation); + + return new Response(); +}