summaryrefslogtreecommitdiffhomepage
path: root/lib/doc/jao-words.el
blob: 32baab85bc226492aa4ee54da121787cf0486475 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
;;; jao-words.el --- utilities for word stats        -*- lexical-binding: t; -*-

;; Copyright (C) 2026  Jose Antonio Ortega Ruiz

;; Author: Jose Antonio Ortega Ruiz <mail@jao.io>
;; Keywords: text

;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with this program.  If not, see <https://www.gnu.org/licenses/>.

;;; Commentary:

;; Simple word counting and classification

;;; Code:

(defun jao-words--count-words (raw-word-list)
  (cl-loop with result = nil
           for elt in raw-word-list
           do (cl-incf (cdr (or (assoc elt result)
                                (first (push (cons elt 0) result)))))
           finally return (sort result
                                (lambda (a b) (string< (car a) (car b))))))

(defun jao-words--word-lines (buffer word)
  (let ((lines nil)
        (last-line nil)
        (case-fold-search nil)
        (rx (format "\\b%s\\b" (regexp-quote word))))
    (with-current-buffer buffer
      (save-excursion
        (goto-char (point-min))
        (while (and (re-search-forward rx nil t) (< (length lines) 12))
          (let* ((line (line-number-at-pos))
                 (d (- line (or last-line line))))
            (when (<= d 10)
              (when last-line
                (push last-line lines)
                (push line lines)))
            (setq last-line (line-number-at-pos))))))
    (mapconcat #'number-to-string (nreverse lines) " ")))

(defun jao-words-show-stats ()
  (interactive)
  (let* ((words (split-string (downcase (buffer-string)) "\\W+" t))
         (word-list (jao-words--count-words words))
         (buffer (current-buffer)))
    (with-current-buffer (get-buffer-create "*word-statistics*")
      (erase-buffer)
      (insert "| word | occurences | lines |
               |-----------+------------|--------------------|\n")
      (dolist (elt word-list)
        (let* ((word (car elt))
               (count (cdr elt))
               (lines (jao-words--word-lines buffer word)))
          (insert (format "| %s | %d | %s |\n" word count lines))))
      (org-mode)
      (indent-region (point-min) (point-max))
      (goto-char 5)
      (next-line 2)
      (org-table-next-field)
      (org-table-sort-lines nil ?N)))
  (pop-to-buffer "*word-statistics*"))


(provide 'jao-words)
;;; jao-words.el ends here