From 36a04a325b0daefba223c136358c9f8f89b1651a Mon Sep 17 00:00:00 2001 From: jao Date: Mon, 26 Sep 2022 04:27:29 +0100 Subject: doc-view: better access to current page's text --- lib/doc/jao-doc-view.el | 61 +++++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 22 deletions(-) (limited to 'lib/doc') diff --git a/lib/doc/jao-doc-view.el b/lib/doc/jao-doc-view.el index cce158e..3631e98 100644 --- a/lib/doc/jao-doc-view.el +++ b/lib/doc/jao-doc-view.el @@ -47,7 +47,7 @@ jao-doc-view--goer (or goto-page 'jao-doc-view-goto-page))) (defun jao-doc-view--imenu-create-index () - (jao-pdf-imenu-index #'jao-doc-view--goer #jao-doc-view--imenu-file)) + (jao-pdf-imenu-index 'jao-doc-view--goer 'jao-doc-view--imenu-file)) (add-hook 'doc-view-mode-hook #'jao-pdf-set-up-imenu) @@ -76,34 +76,51 @@ (advice-add 'doc-view-goto-page :before #'jao-doc-view--trail-push) +;;; Extract text +(defun jao-doc-view-page-text (&optional re-render no-select) + (interactive "P") + (let* ((pno (doc-view-current-page)) + (in buffer-file-name) + (cdir (or (doc-view--current-cache-dir) "/tmp")) + (out (format "%s/p%s.txt" cdir pno))) + (when (and (file-exists-p out) re-render) + (delete-file out)) + (unless (file-exists-p out) + (shell-command-to-string (format "mutool convert -o %s %s %s" out in pno))) + (if no-select + out + (find-file out) + (view-mode)))) + +(define-key doc-view-mode-map "t" #'jao-doc-view-page-text) + ;;; Find URLs +(defun jao-doc-view--full-txt () + (expand-file-name "doc.txt" (doc-view--current-cache-dir))) + +(defun jao-doc-view--collect-urls (file) + (with-current-buffer (find-file-noselect file) + (goto-char (point-min)) + (let ((urls nil)) + (while (re-search-forward "https?://" nil t) + (push (thing-at-point-url-at-point) urls)) + urls))) + (defun jao-doc-view--page-urls (&optional all) - (if doc-view--current-converter-processes - (message "DocView: please wait till conversion finished.") - (let ((txt (expand-file-name "doc.txt" (doc-view--current-cache-dir))) - (page (doc-view-current-page)) - (pd-rx "^ ") - (urls)) - (if (file-readable-p txt) - (with-current-buffer (find-file-noselect txt) - (goto-char (point-min)) - (unless all (re-search-forward pd-rx nil t (1- page))) - (let ((end (save-excursion - (if (and (not all) (re-search-forward pd-rx nil t)) - (point) - (point-max))))) - (while (re-search-forward "https?://" end t) - (push (thing-at-point-url-at-point) urls)) - urls)) - (doc-view-doc->txt txt (lambda () (jao-doc-view--page-urls all))) - 'wait)))) + (cond ((and all (not (file-exists-p (jao-doc-view--full-txt)))) + (message "Full text not extracted yet: doing so!") + (doc-view-doc->txt txt (lambda () (message "Text extracted"))) + 'wait) + (all (jao-doc-view--collect-urls (jao-doc-view--full-txt))) + (t (jao-doc-view--collect-urls (jao-doc-view-page-text nil t))))) (defun jao-doc-view-visit-url (all) "Visit URL displayed in this page." - (interactive "P" doc-view-mode) + (interactive "P") (let ((urls (jao-doc-view--page-urls all))) (cond ((eq 'wait urls) (message "Extracting text, please wait and retry.")) - ((zerop (length urls)) (message "No URLs in this page")) + ((zerop (length urls)) + (message "No URLs in this %s" (if all "document" "page"))) (t (when-let (url (completing-read "URL: " urls nil nil (when (null (cdr urls)) (car urls)))) (browse-url url)))))) -- cgit v1.2.3