Skip to content

Commit

Permalink
Also index pdf, markdown and plaintext files using khoj emacs client
Browse files Browse the repository at this point in the history
Previously you could only index org-mode files and directories from
khoj.el

Mark the `khoj-org-directories', `khoj-org-files' variables for
deprecation, since `khoj-index-directories', `khoj-index-files'
replace them as more appropriate names for the more general case

Resolves #597
  • Loading branch information
debanjum committed Jan 3, 2024
1 parent 5abaed9 commit e28adf2
Showing 1 changed file with 41 additions and 14 deletions.
55 changes: 41 additions & 14 deletions src/interface/emacs/khoj.el
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ for example), set this to the full interpreter path."
(member val '("python" "python3" "pythonw" "py")))
:group 'khoj)

(defcustom khoj-org-files (org-agenda-files t t)
(defcustom khoj-org-files nil
"List of org-files to index on khoj server."
:type '(repeat string)
:group 'khoj)
Expand All @@ -251,6 +251,19 @@ for example), set this to the full interpreter path."
:type '(repeat string)
:group 'khoj)

(make-obsolete-variable 'khoj-org-directories 'khoj-index-directories "1.2.0" 'set)
(make-obsolete-variable 'khoj-org-files 'khoj-index-files "1.2.0" 'set)

(defcustom khoj-index-files (org-agenda-files t t)
"List of org, markdown, pdf and other plaintext to index on khoj server."
:type '(repeat string)
:group 'khoj)

(defcustom khoj-index-directories nil
"List of directories with org, markdown, pdf and other plaintext files to index on khoj server."
:type '(repeat string)
:group 'khoj)

(defcustom khoj-auto-setup t
"Automate install, configure and start of khoj server.
Auto invokes setup steps on calling main entrypoint."
Expand Down Expand Up @@ -395,12 +408,16 @@ Auto invokes setup steps on calling main entrypoint."
"Send files at `FILE-PATHS' to the Khoj server to index for search and chat.
`FORCE' re-indexes all files of `CONTENT-TYPE' even if they are already indexed."
(interactive)
(let ((boundary (format "-------------------------%d" (random (expt 10 10))))
(files-to-index (or file-paths
(append (mapcan (lambda (dir) (directory-files-recursively dir "\\.org$")) khoj-org-directories) khoj-org-files)))
(type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type)))
(inhibit-message t)
(message-log-max nil))
(let* ((boundary (format "-------------------------%d" (random (expt 10 10))))
;; Use `khoj-index-directories', `khoj-index-files' when set, else fallback to `khoj-org-directories', `khoj-org-files'
;; This is a temporary change. `khoj-org-directories', `khoj-org-files' are deprecated. They will be removed in a future release
(content-directories (or khoj-index-directories khoj-org-directories))
(content-files (or khoj-index-files khoj-org-files))
(files-to-index (or file-paths
(append (mapcan (lambda (dir) (directory-files-recursively dir "\\.\\(org\\|md\\|markdown\\|pdf\\|txt\\|rst\\|xml\\|htm\\|html\\)$")) content-directories) content-files)))
(type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type)))
(inhibit-message t)
(message-log-max nil))
(let ((url-request-method "POST")
(url-request-data (khoj--render-files-as-request-body files-to-index khoj--indexed-files boundary))
(url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary))
Expand Down Expand Up @@ -430,20 +447,30 @@ Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request.
(set-buffer-multibyte nil)
(insert "\n")
(dolist (file-to-index files-to-index)
;; find file content-type. Choose from org, markdown, pdf, plaintext
(let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org")
((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown")
((string-match "\\.pdf$" file-to-index) "application/pdf")
(t "text/plain"))))
(insert (format "--%s\r\n" boundary))
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
(insert "Content-Type: text/org\r\n\r\n")
(insert (format "Content-Type: %s\r\n\r\n" content-type))
(insert (with-temp-buffer
(insert-file-contents-literally file-to-index)
(buffer-string)))
(insert "\r\n"))
(insert "\r\n")))
(dolist (file-to-index previously-indexed-files)
(when (not (member file-to-index files-to-index))
(insert (format "--%s\r\n" boundary))
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
(insert "Content-Type: text/org\r\n\r\n")
(insert "")
(insert "\r\n")))
;; find file content-type. Choose from org, markdown, pdf, plaintext
(let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org")
((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown")
((string-match "\\.pdf$" file-to-index) "application/pdf")
(t "text/plain"))))
(insert (format "--%s\r\n" boundary))
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
(insert "Content-Type: text/org\r\n\r\n")
(insert "")
(insert "\r\n"))))
(insert (format "--%s--\r\n" boundary))
(buffer-string)))

Expand Down

0 comments on commit e28adf2

Please sign in to comment.